torchcodec 0.8.0__cp311-cp311-win_amd64.whl → 0.8.1__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchcodec might be problematic. Click here for more details.
- torchcodec/_core/AVIOTensorContext.cpp +23 -16
- torchcodec/_core/AVIOTensorContext.h +2 -1
- torchcodec/_core/BetaCudaDeviceInterface.cpp +168 -86
- torchcodec/_core/BetaCudaDeviceInterface.h +7 -5
- torchcodec/_core/CMakeLists.txt +1 -19
- torchcodec/_core/CUDACommon.cpp +21 -6
- torchcodec/_core/CUDACommon.h +6 -1
- torchcodec/_core/Cache.h +6 -20
- torchcodec/_core/CpuDeviceInterface.cpp +7 -1
- torchcodec/_core/CpuDeviceInterface.h +4 -1
- torchcodec/_core/CudaDeviceInterface.cpp +19 -11
- torchcodec/_core/CudaDeviceInterface.h +6 -1
- torchcodec/_core/DeviceInterface.h +27 -27
- torchcodec/_core/Encoder.cpp +51 -7
- torchcodec/_core/Encoder.h +12 -1
- torchcodec/_core/FFMPEGCommon.cpp +1 -1
- torchcodec/_core/FFMPEGCommon.h +9 -1
- torchcodec/_core/FilterGraph.cpp +2 -1
- torchcodec/_core/Frame.cpp +5 -0
- torchcodec/_core/Frame.h +1 -1
- torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
- torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
- torchcodec/_core/NVDECCache.cpp +3 -13
- torchcodec/_core/NVDECCache.h +4 -6
- torchcodec/_core/SingleStreamDecoder.cpp +22 -31
- torchcodec/_core/SingleStreamDecoder.h +4 -2
- torchcodec/_core/StreamOptions.h +2 -2
- torchcodec/_core/Transform.cpp +27 -0
- torchcodec/_core/Transform.h +25 -0
- torchcodec/_core/__init__.py +3 -0
- torchcodec/_core/custom_ops.cpp +99 -22
- torchcodec/_core/ops.py +76 -16
- torchcodec/decoders/_video_decoder.py +0 -10
- torchcodec/libtorchcodec_core4.dll +0 -0
- torchcodec/libtorchcodec_core5.dll +0 -0
- torchcodec/libtorchcodec_core6.dll +0 -0
- torchcodec/libtorchcodec_core7.dll +0 -0
- torchcodec/libtorchcodec_core8.dll +0 -0
- torchcodec/libtorchcodec_custom_ops4.dll +0 -0
- torchcodec/libtorchcodec_custom_ops5.dll +0 -0
- torchcodec/libtorchcodec_custom_ops6.dll +0 -0
- torchcodec/libtorchcodec_custom_ops7.dll +0 -0
- torchcodec/libtorchcodec_custom_ops8.dll +0 -0
- torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops8.pyd +0 -0
- torchcodec/version.py +1 -1
- {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/METADATA +6 -4
- torchcodec-0.8.1.dist-info/RECORD +82 -0
- torchcodec-0.8.0.dist-info/RECORD +0 -80
- {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/WHEEL +0 -0
- {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/licenses/LICENSE +0 -0
- {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#ifdef FBCODE_CAFFE2
|
|
8
|
+
// No need to do anything on fbcode. NVCUVID is available there, we can take a
|
|
9
|
+
// hard dependency on it.
|
|
10
|
+
// The FBCODE_CAFFE2 macro is defined in the upstream fbcode build of torch, so
|
|
11
|
+
// we can rely on it, that's what torch does too.
|
|
12
|
+
|
|
13
|
+
namespace facebook::torchcodec {
|
|
14
|
+
bool loadNVCUVIDLibrary() {
|
|
15
|
+
return true;
|
|
16
|
+
}
|
|
17
|
+
} // namespace facebook::torchcodec
|
|
18
|
+
#else
|
|
19
|
+
|
|
20
|
+
#include "src/torchcodec/_core/NVCUVIDRuntimeLoader.h"
|
|
21
|
+
|
|
22
|
+
#include "src/torchcodec/_core/nvcuvid_include/cuviddec.h"
|
|
23
|
+
#include "src/torchcodec/_core/nvcuvid_include/nvcuvid.h"
|
|
24
|
+
|
|
25
|
+
#include <torch/types.h>
|
|
26
|
+
#include <cstdio>
|
|
27
|
+
#include <mutex>
|
|
28
|
+
|
|
29
|
+
#if defined(WIN64) || defined(_WIN64)
|
|
30
|
+
#include <windows.h>
|
|
31
|
+
typedef HMODULE tHandle;
|
|
32
|
+
#else
|
|
33
|
+
#include <dlfcn.h>
|
|
34
|
+
typedef void* tHandle;
|
|
35
|
+
#endif
|
|
36
|
+
|
|
37
|
+
namespace facebook::torchcodec {
|
|
38
|
+
|
|
39
|
+
/* clang-format off */
|
|
40
|
+
// This file defines the logic to load the NVCUVID library **at runtime**,
|
|
41
|
+
// along with the corresponding NVCUVID functions that we'll need.
|
|
42
|
+
//
|
|
43
|
+
// We do this because we *do not want* to link (statically or dynamically)
|
|
44
|
+
// against libnvcuvid.so: it is not always available on the users machine! If we
|
|
45
|
+
// were to link against libnvcuvid.so, that would mean that our
|
|
46
|
+
// libtorchcodec_coreN.so would try to look for it when loaded at import time.
|
|
47
|
+
// And if it's not on the users machine, that causes `import torchcodec` to
|
|
48
|
+
// fail. Source: that's what we did, and we got user reports.
|
|
49
|
+
//
|
|
50
|
+
// So, we don't link against libnvcuvid.so. But we still want to call its
|
|
51
|
+
// functions. So here's how it's done, we'll use cuvidCreateVideoParser as an
|
|
52
|
+
// example, but it works the same for all. We are largely following the
|
|
53
|
+
// instructions from the NVCUVID docs:
|
|
54
|
+
// https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/nvdec-video-decoder-api-prog-guide/index.html#dynamic-loading-nvidia-components
|
|
55
|
+
//
|
|
56
|
+
// This:
|
|
57
|
+
// typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser*, CUVIDPARSERPARAMS*);
|
|
58
|
+
// defines tcuvidCreateVideoParser, which is the *type* of a *function*.
|
|
59
|
+
// We define such a function of that type just below with:
|
|
60
|
+
// static tcuvidCreateVideoParser* dl_cuvidCreateVideoParser = nullptr;
|
|
61
|
+
// "dl" is for "dynamically loaded. For now dl_cuvidCreateVideoParser is
|
|
62
|
+
// nullptr, but later it will be a proper function [pointer] that can be called
|
|
63
|
+
// with dl_cuvidCreateVideoParser(...);
|
|
64
|
+
//
|
|
65
|
+
// For that to happen we need to call loadNVCUVIDLibrary(): in there, we first
|
|
66
|
+
// dlopen(libnvcuvid.so) which loads the .so somewhere in memory. Then we call
|
|
67
|
+
// dlsym(...), which binds dl_cuvidCreateVideoParser to its actual address: it
|
|
68
|
+
// literally sets the value of the dl_cuvidCreateVideoParser pointer to the
|
|
69
|
+
// address of the actual code section. If all went well, by now, we can safely
|
|
70
|
+
// call dl_cuvidCreateVideoParser(...);
|
|
71
|
+
// All of that happens at runtime *after* import time, when the first instance
|
|
72
|
+
// of the Beta CUDA interface is created, i.e. only when the user explicitly
|
|
73
|
+
// requests it.
|
|
74
|
+
//
|
|
75
|
+
// At the bottom of this file we have an `extern "C"` section with function
|
|
76
|
+
// definitions like:
|
|
77
|
+
//
|
|
78
|
+
// CUresult CUDAAPI cuvidCreateVideoParser(
|
|
79
|
+
// CUvideoparser* videoParser,
|
|
80
|
+
// CUVIDPARSERPARAMS* parserParams) {...}
|
|
81
|
+
//
|
|
82
|
+
// These are the actual functions that are compiled against and called by the
|
|
83
|
+
// Beta CUDA interface code. Crucially, these functions signature match exactly
|
|
84
|
+
// the NVCUVID functions (as defined in cuviddec.h). Inside of
|
|
85
|
+
// cuvidCreateVideoParser(...) we simply call the dl_cuvidCreateVideoParser
|
|
86
|
+
// function [pointer] that we dynamically loaded earlier.
|
|
87
|
+
//
|
|
88
|
+
// At runtime, within the Beta CUDA interface code we have a fallback mechanism
|
|
89
|
+
// to switch back to the CPU backend if any of the NVCUVID functions are not
|
|
90
|
+
// available, or if libnvcuvid.so itself couldn't be found. This is what FFmpeg
|
|
91
|
+
// does too.
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
// Function pointers types
|
|
95
|
+
typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser*, CUVIDPARSERPARAMS*);
|
|
96
|
+
typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser, CUVIDSOURCEDATAPACKET*);
|
|
97
|
+
typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser);
|
|
98
|
+
typedef CUresult CUDAAPI tcuvidGetDecoderCaps(CUVIDDECODECAPS*);
|
|
99
|
+
typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder*, CUVIDDECODECREATEINFO*);
|
|
100
|
+
typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder);
|
|
101
|
+
typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder, CUVIDPICPARAMS*);
|
|
102
|
+
typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder, int, unsigned int*, unsigned int*, CUVIDPROCPARAMS*);
|
|
103
|
+
typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder, unsigned int);
|
|
104
|
+
typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder, int, unsigned long long*, unsigned int*, CUVIDPROCPARAMS*);
|
|
105
|
+
typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder, unsigned long long);
|
|
106
|
+
/* clang-format on */
|
|
107
|
+
|
|
108
|
+
// Global function pointers - will be dynamically loaded
|
|
109
|
+
static tcuvidCreateVideoParser* dl_cuvidCreateVideoParser = nullptr;
|
|
110
|
+
static tcuvidParseVideoData* dl_cuvidParseVideoData = nullptr;
|
|
111
|
+
static tcuvidDestroyVideoParser* dl_cuvidDestroyVideoParser = nullptr;
|
|
112
|
+
static tcuvidGetDecoderCaps* dl_cuvidGetDecoderCaps = nullptr;
|
|
113
|
+
static tcuvidCreateDecoder* dl_cuvidCreateDecoder = nullptr;
|
|
114
|
+
static tcuvidDestroyDecoder* dl_cuvidDestroyDecoder = nullptr;
|
|
115
|
+
static tcuvidDecodePicture* dl_cuvidDecodePicture = nullptr;
|
|
116
|
+
static tcuvidMapVideoFrame* dl_cuvidMapVideoFrame = nullptr;
|
|
117
|
+
static tcuvidUnmapVideoFrame* dl_cuvidUnmapVideoFrame = nullptr;
|
|
118
|
+
static tcuvidMapVideoFrame64* dl_cuvidMapVideoFrame64 = nullptr;
|
|
119
|
+
static tcuvidUnmapVideoFrame64* dl_cuvidUnmapVideoFrame64 = nullptr;
|
|
120
|
+
|
|
121
|
+
static tHandle g_nvcuvid_handle = nullptr;
|
|
122
|
+
static std::mutex g_nvcuvid_mutex;
|
|
123
|
+
|
|
124
|
+
bool isLoaded() {
|
|
125
|
+
return (
|
|
126
|
+
g_nvcuvid_handle && dl_cuvidCreateVideoParser && dl_cuvidParseVideoData &&
|
|
127
|
+
dl_cuvidDestroyVideoParser && dl_cuvidGetDecoderCaps &&
|
|
128
|
+
dl_cuvidCreateDecoder && dl_cuvidDestroyDecoder &&
|
|
129
|
+
dl_cuvidDecodePicture && dl_cuvidMapVideoFrame &&
|
|
130
|
+
dl_cuvidUnmapVideoFrame && dl_cuvidMapVideoFrame64 &&
|
|
131
|
+
dl_cuvidUnmapVideoFrame64);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
template <typename T>
|
|
135
|
+
T* bindFunction(const char* functionName) {
|
|
136
|
+
#if defined(WIN64) || defined(_WIN64)
|
|
137
|
+
return reinterpret_cast<T*>(GetProcAddress(g_nvcuvid_handle, functionName));
|
|
138
|
+
#else
|
|
139
|
+
return reinterpret_cast<T*>(dlsym(g_nvcuvid_handle, functionName));
|
|
140
|
+
#endif
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
bool _loadLibrary() {
|
|
144
|
+
// Helper that just calls dlopen or equivalent on Windows. In a separate
|
|
145
|
+
// function because of the #ifdef uglyness.
|
|
146
|
+
#if defined(WIN64) || defined(_WIN64)
|
|
147
|
+
#ifdef UNICODE
|
|
148
|
+
static LPCWSTR nvcuvidDll = L"nvcuvid.dll";
|
|
149
|
+
#else
|
|
150
|
+
static LPCSTR nvcuvidDll = "nvcuvid.dll";
|
|
151
|
+
#endif
|
|
152
|
+
g_nvcuvid_handle = LoadLibrary(nvcuvidDll);
|
|
153
|
+
if (g_nvcuvid_handle == nullptr) {
|
|
154
|
+
return false;
|
|
155
|
+
}
|
|
156
|
+
#else
|
|
157
|
+
g_nvcuvid_handle = dlopen("libnvcuvid.so", RTLD_NOW);
|
|
158
|
+
if (g_nvcuvid_handle == nullptr) {
|
|
159
|
+
g_nvcuvid_handle = dlopen("libnvcuvid.so.1", RTLD_NOW);
|
|
160
|
+
}
|
|
161
|
+
if (g_nvcuvid_handle == nullptr) {
|
|
162
|
+
return false;
|
|
163
|
+
}
|
|
164
|
+
#endif
|
|
165
|
+
|
|
166
|
+
return true;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
bool loadNVCUVIDLibrary() {
|
|
170
|
+
// Loads NVCUVID library and all required function pointers.
|
|
171
|
+
// Returns true on success, false on failure.
|
|
172
|
+
std::lock_guard<std::mutex> lock(g_nvcuvid_mutex);
|
|
173
|
+
|
|
174
|
+
if (isLoaded()) {
|
|
175
|
+
return true;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (!_loadLibrary()) {
|
|
179
|
+
return false;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Load all function pointers. They'll be set to nullptr if not found.
|
|
183
|
+
dl_cuvidCreateVideoParser =
|
|
184
|
+
bindFunction<tcuvidCreateVideoParser>("cuvidCreateVideoParser");
|
|
185
|
+
dl_cuvidParseVideoData =
|
|
186
|
+
bindFunction<tcuvidParseVideoData>("cuvidParseVideoData");
|
|
187
|
+
dl_cuvidDestroyVideoParser =
|
|
188
|
+
bindFunction<tcuvidDestroyVideoParser>("cuvidDestroyVideoParser");
|
|
189
|
+
dl_cuvidGetDecoderCaps =
|
|
190
|
+
bindFunction<tcuvidGetDecoderCaps>("cuvidGetDecoderCaps");
|
|
191
|
+
dl_cuvidCreateDecoder =
|
|
192
|
+
bindFunction<tcuvidCreateDecoder>("cuvidCreateDecoder");
|
|
193
|
+
dl_cuvidDestroyDecoder =
|
|
194
|
+
bindFunction<tcuvidDestroyDecoder>("cuvidDestroyDecoder");
|
|
195
|
+
dl_cuvidDecodePicture =
|
|
196
|
+
bindFunction<tcuvidDecodePicture>("cuvidDecodePicture");
|
|
197
|
+
dl_cuvidMapVideoFrame =
|
|
198
|
+
bindFunction<tcuvidMapVideoFrame>("cuvidMapVideoFrame");
|
|
199
|
+
dl_cuvidUnmapVideoFrame =
|
|
200
|
+
bindFunction<tcuvidUnmapVideoFrame>("cuvidUnmapVideoFrame");
|
|
201
|
+
dl_cuvidMapVideoFrame64 =
|
|
202
|
+
bindFunction<tcuvidMapVideoFrame64>("cuvidMapVideoFrame64");
|
|
203
|
+
dl_cuvidUnmapVideoFrame64 =
|
|
204
|
+
bindFunction<tcuvidUnmapVideoFrame64>("cuvidUnmapVideoFrame64");
|
|
205
|
+
|
|
206
|
+
return isLoaded();
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
} // namespace facebook::torchcodec
|
|
210
|
+
|
|
211
|
+
extern "C" {
|
|
212
|
+
|
|
213
|
+
CUresult CUDAAPI cuvidCreateVideoParser(
|
|
214
|
+
CUvideoparser* videoParser,
|
|
215
|
+
CUVIDPARSERPARAMS* parserParams) {
|
|
216
|
+
TORCH_CHECK(
|
|
217
|
+
facebook::torchcodec::dl_cuvidCreateVideoParser,
|
|
218
|
+
"cuvidCreateVideoParser called but NVCUVID not loaded!");
|
|
219
|
+
return facebook::torchcodec::dl_cuvidCreateVideoParser(
|
|
220
|
+
videoParser, parserParams);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
CUresult CUDAAPI cuvidParseVideoData(
|
|
224
|
+
CUvideoparser videoParser,
|
|
225
|
+
CUVIDSOURCEDATAPACKET* cuvidPacket) {
|
|
226
|
+
TORCH_CHECK(
|
|
227
|
+
facebook::torchcodec::dl_cuvidParseVideoData,
|
|
228
|
+
"cuvidParseVideoData called but NVCUVID not loaded!");
|
|
229
|
+
return facebook::torchcodec::dl_cuvidParseVideoData(videoParser, cuvidPacket);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser videoParser) {
|
|
233
|
+
TORCH_CHECK(
|
|
234
|
+
facebook::torchcodec::dl_cuvidDestroyVideoParser,
|
|
235
|
+
"cuvidDestroyVideoParser called but NVCUVID not loaded!");
|
|
236
|
+
return facebook::torchcodec::dl_cuvidDestroyVideoParser(videoParser);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS* caps) {
|
|
240
|
+
TORCH_CHECK(
|
|
241
|
+
facebook::torchcodec::dl_cuvidGetDecoderCaps,
|
|
242
|
+
"cuvidGetDecoderCaps called but NVCUVID not loaded!");
|
|
243
|
+
return facebook::torchcodec::dl_cuvidGetDecoderCaps(caps);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
CUresult CUDAAPI cuvidCreateDecoder(
|
|
247
|
+
CUvideodecoder* decoder,
|
|
248
|
+
CUVIDDECODECREATEINFO* decoderParams) {
|
|
249
|
+
TORCH_CHECK(
|
|
250
|
+
facebook::torchcodec::dl_cuvidCreateDecoder,
|
|
251
|
+
"cuvidCreateDecoder called but NVCUVID not loaded!");
|
|
252
|
+
return facebook::torchcodec::dl_cuvidCreateDecoder(decoder, decoderParams);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder decoder) {
|
|
256
|
+
TORCH_CHECK(
|
|
257
|
+
facebook::torchcodec::dl_cuvidDestroyDecoder,
|
|
258
|
+
"cuvidDestroyDecoder called but NVCUVID not loaded!");
|
|
259
|
+
return facebook::torchcodec::dl_cuvidDestroyDecoder(decoder);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
CUresult CUDAAPI
|
|
263
|
+
cuvidDecodePicture(CUvideodecoder decoder, CUVIDPICPARAMS* picParams) {
|
|
264
|
+
TORCH_CHECK(
|
|
265
|
+
facebook::torchcodec::dl_cuvidDecodePicture,
|
|
266
|
+
"cuvidDecodePicture called but NVCUVID not loaded!");
|
|
267
|
+
return facebook::torchcodec::dl_cuvidDecodePicture(decoder, picParams);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
|
|
271
|
+
// We need to protect the definition of the 32bit versions under the above
|
|
272
|
+
// conditions (see cuviddec.h). Defining them unconditionally would cause
|
|
273
|
+
// conflict compilation errors when cuviddec.h redefines those to the 64bit
|
|
274
|
+
// versions.
|
|
275
|
+
CUresult CUDAAPI cuvidMapVideoFrame(
|
|
276
|
+
CUvideodecoder decoder,
|
|
277
|
+
int pixIndex,
|
|
278
|
+
unsigned int* framePtr,
|
|
279
|
+
unsigned int* pitch,
|
|
280
|
+
CUVIDPROCPARAMS* procParams) {
|
|
281
|
+
TORCH_CHECK(
|
|
282
|
+
facebook::torchcodec::dl_cuvidMapVideoFrame,
|
|
283
|
+
"cuvidMapVideoFrame called but NVCUVID not loaded!");
|
|
284
|
+
return facebook::torchcodec::dl_cuvidMapVideoFrame(
|
|
285
|
+
decoder, pixIndex, framePtr, pitch, procParams);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
CUresult CUDAAPI
|
|
289
|
+
cuvidUnmapVideoFrame(CUvideodecoder decoder, unsigned int framePtr) {
|
|
290
|
+
TORCH_CHECK(
|
|
291
|
+
facebook::torchcodec::dl_cuvidUnmapVideoFrame,
|
|
292
|
+
"cuvidUnmapVideoFrame called but NVCUVID not loaded!");
|
|
293
|
+
return facebook::torchcodec::dl_cuvidUnmapVideoFrame(decoder, framePtr);
|
|
294
|
+
}
|
|
295
|
+
#endif
|
|
296
|
+
|
|
297
|
+
CUresult CUDAAPI cuvidMapVideoFrame64(
|
|
298
|
+
CUvideodecoder decoder,
|
|
299
|
+
int pixIndex,
|
|
300
|
+
unsigned long long* framePtr,
|
|
301
|
+
unsigned int* pitch,
|
|
302
|
+
CUVIDPROCPARAMS* procParams) {
|
|
303
|
+
TORCH_CHECK(
|
|
304
|
+
facebook::torchcodec::dl_cuvidMapVideoFrame64,
|
|
305
|
+
"cuvidMapVideoFrame64 called but NVCUVID not loaded!");
|
|
306
|
+
return facebook::torchcodec::dl_cuvidMapVideoFrame64(
|
|
307
|
+
decoder, pixIndex, framePtr, pitch, procParams);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
CUresult CUDAAPI
|
|
311
|
+
cuvidUnmapVideoFrame64(CUvideodecoder decoder, unsigned long long framePtr) {
|
|
312
|
+
TORCH_CHECK(
|
|
313
|
+
facebook::torchcodec::dl_cuvidUnmapVideoFrame64,
|
|
314
|
+
"cuvidUnmapVideoFrame64 called but NVCUVID not loaded!");
|
|
315
|
+
return facebook::torchcodec::dl_cuvidUnmapVideoFrame64(decoder, framePtr);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
} // extern "C"
|
|
319
|
+
|
|
320
|
+
#endif // FBCODE_CAFFE2
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#pragma once
|
|
8
|
+
|
|
9
|
+
namespace facebook::torchcodec {
|
|
10
|
+
|
|
11
|
+
// See note in corresponding cpp file
|
|
12
|
+
bool loadNVCUVIDLibrary();
|
|
13
|
+
|
|
14
|
+
} // namespace facebook::torchcodec
|
torchcodec/_core/NVDECCache.cpp
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
#include <torch/types.h>
|
|
8
8
|
#include <mutex>
|
|
9
9
|
|
|
10
|
+
#include "src/torchcodec/_core/CUDACommon.h"
|
|
10
11
|
#include "src/torchcodec/_core/FFMPEGCommon.h"
|
|
11
12
|
#include "src/torchcodec/_core/NVDECCache.h"
|
|
12
13
|
|
|
@@ -19,20 +20,9 @@ extern "C" {
|
|
|
19
20
|
|
|
20
21
|
namespace facebook::torchcodec {
|
|
21
22
|
|
|
22
|
-
NVDECCache& NVDECCache::getCache(
|
|
23
|
-
const int MAX_CUDA_GPUS = 128;
|
|
24
|
-
TORCH_CHECK(
|
|
25
|
-
deviceIndex >= -1 && deviceIndex < MAX_CUDA_GPUS,
|
|
26
|
-
"Invalid device index = ",
|
|
27
|
-
deviceIndex);
|
|
23
|
+
NVDECCache& NVDECCache::getCache(const torch::Device& device) {
|
|
28
24
|
static NVDECCache cacheInstances[MAX_CUDA_GPUS];
|
|
29
|
-
|
|
30
|
-
// TODO NVDEC P3: Unify with existing getNonNegativeDeviceIndex()
|
|
31
|
-
TORCH_CHECK(
|
|
32
|
-
cudaGetDevice(&deviceIndex) == cudaSuccess,
|
|
33
|
-
"Failed to get current CUDA device.");
|
|
34
|
-
}
|
|
35
|
-
return cacheInstances[deviceIndex];
|
|
25
|
+
return cacheInstances[getDeviceIndex(device)];
|
|
36
26
|
}
|
|
37
27
|
|
|
38
28
|
UniqueCUvideodecoder NVDECCache::getDecoder(CUVIDEOFORMAT* videoFormat) {
|
torchcodec/_core/NVDECCache.h
CHANGED
|
@@ -11,6 +11,9 @@
|
|
|
11
11
|
#include <mutex>
|
|
12
12
|
|
|
13
13
|
#include <cuda.h>
|
|
14
|
+
#include <torch/types.h>
|
|
15
|
+
|
|
16
|
+
#include "src/torchcodec/_core/NVCUVIDRuntimeLoader.h"
|
|
14
17
|
#include "src/torchcodec/_core/nvcuvid_include/cuviddec.h"
|
|
15
18
|
#include "src/torchcodec/_core/nvcuvid_include/nvcuvid.h"
|
|
16
19
|
|
|
@@ -36,7 +39,7 @@ using UniqueCUvideodecoder =
|
|
|
36
39
|
// per GPU device, and it is accessed through the static getCache() method.
|
|
37
40
|
class NVDECCache {
|
|
38
41
|
public:
|
|
39
|
-
static NVDECCache& getCache(
|
|
42
|
+
static NVDECCache& getCache(const torch::Device& device);
|
|
40
43
|
|
|
41
44
|
// Get decoder from cache - returns nullptr if none available
|
|
42
45
|
UniqueCUvideodecoder getDecoder(CUVIDEOFORMAT* videoFormat);
|
|
@@ -68,11 +71,6 @@ class NVDECCache {
|
|
|
68
71
|
CacheKey(const CacheKey&) = default;
|
|
69
72
|
CacheKey& operator=(const CacheKey&) = default;
|
|
70
73
|
|
|
71
|
-
// TODONVDEC P2: we only implement operator< which is enough for std::map,
|
|
72
|
-
// but:
|
|
73
|
-
// - we should consider using std::unordered_map
|
|
74
|
-
// - we should consider a more sophisticated and potentially less strict
|
|
75
|
-
// cache key comparison logic
|
|
76
74
|
bool operator<(const CacheKey& other) const {
|
|
77
75
|
return std::tie(
|
|
78
76
|
codecType,
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
#include <sstream>
|
|
13
13
|
#include <stdexcept>
|
|
14
14
|
#include <string_view>
|
|
15
|
+
#include "Metadata.h"
|
|
15
16
|
#include "torch/types.h"
|
|
16
17
|
|
|
17
18
|
namespace facebook::torchcodec {
|
|
@@ -429,7 +430,6 @@ void SingleStreamDecoder::addStream(
|
|
|
429
430
|
TORCH_CHECK(
|
|
430
431
|
deviceInterface_ != nullptr,
|
|
431
432
|
"Failed to create device interface. This should never happen, please report.");
|
|
432
|
-
deviceInterface_->initialize(streamInfo.stream, formatContext_);
|
|
433
433
|
|
|
434
434
|
// TODO_CODE_QUALITY it's pretty meh to have a video-specific logic within
|
|
435
435
|
// addStream() which is supposed to be generic
|
|
@@ -441,7 +441,7 @@ void SingleStreamDecoder::addStream(
|
|
|
441
441
|
|
|
442
442
|
AVCodecContext* codecContext = avcodec_alloc_context3(avCodec);
|
|
443
443
|
TORCH_CHECK(codecContext != nullptr);
|
|
444
|
-
streamInfo.codecContext
|
|
444
|
+
streamInfo.codecContext = makeSharedAVCodecContext(codecContext);
|
|
445
445
|
|
|
446
446
|
int retVal = avcodec_parameters_to_context(
|
|
447
447
|
streamInfo.codecContext.get(), streamInfo.stream->codecpar);
|
|
@@ -453,14 +453,19 @@ void SingleStreamDecoder::addStream(
|
|
|
453
453
|
// Note that we must make sure to register the harware device context
|
|
454
454
|
// with the codec context before calling avcodec_open2(). Otherwise, decoding
|
|
455
455
|
// will happen on the CPU and not the hardware device.
|
|
456
|
-
deviceInterface_->registerHardwareDeviceWithCodec(
|
|
456
|
+
deviceInterface_->registerHardwareDeviceWithCodec(
|
|
457
|
+
streamInfo.codecContext.get());
|
|
457
458
|
retVal = avcodec_open2(streamInfo.codecContext.get(), avCodec, nullptr);
|
|
458
459
|
TORCH_CHECK(retVal >= AVSUCCESS, getFFMPEGErrorStringFromErrorCode(retVal));
|
|
459
460
|
|
|
460
|
-
codecContext->time_base = streamInfo.stream->time_base;
|
|
461
|
+
streamInfo.codecContext->time_base = streamInfo.stream->time_base;
|
|
462
|
+
|
|
463
|
+
// Initialize the device interface with the codec context
|
|
464
|
+
deviceInterface_->initialize(
|
|
465
|
+
streamInfo.stream, formatContext_, streamInfo.codecContext);
|
|
461
466
|
|
|
462
467
|
containerMetadata_.allStreamMetadata[activeStreamIndex_].codecName =
|
|
463
|
-
std::string(avcodec_get_name(codecContext->codec_id));
|
|
468
|
+
std::string(avcodec_get_name(streamInfo.codecContext->codec_id));
|
|
464
469
|
|
|
465
470
|
// We will only need packets from the active stream, so we tell FFmpeg to
|
|
466
471
|
// discard packets from the other streams. Note that av_read_frame() may still
|
|
@@ -523,6 +528,7 @@ void SingleStreamDecoder::addVideoStream(
|
|
|
523
528
|
if (transform->getOutputFrameDims().has_value()) {
|
|
524
529
|
resizedOutputDims_ = transform->getOutputFrameDims().value();
|
|
525
530
|
}
|
|
531
|
+
transform->validate(streamMetadata);
|
|
526
532
|
|
|
527
533
|
// Note that we are claiming ownership of the transform objects passed in to
|
|
528
534
|
// us.
|
|
@@ -1149,8 +1155,6 @@ void SingleStreamDecoder::maybeSeekToBeforeDesiredPts() {
|
|
|
1149
1155
|
getFFMPEGErrorStringFromErrorCode(status));
|
|
1150
1156
|
|
|
1151
1157
|
decodeStats_.numFlushes++;
|
|
1152
|
-
avcodec_flush_buffers(streamInfo.codecContext.get());
|
|
1153
|
-
|
|
1154
1158
|
deviceInterface_->flush();
|
|
1155
1159
|
}
|
|
1156
1160
|
|
|
@@ -1169,24 +1173,16 @@ UniqueAVFrame SingleStreamDecoder::decodeAVFrame(
|
|
|
1169
1173
|
cursorWasJustSet_ = false;
|
|
1170
1174
|
}
|
|
1171
1175
|
|
|
1172
|
-
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
|
|
1173
1176
|
UniqueAVFrame avFrame(av_frame_alloc());
|
|
1174
1177
|
AutoAVPacket autoAVPacket;
|
|
1175
1178
|
int status = AVSUCCESS;
|
|
1176
1179
|
bool reachedEOF = false;
|
|
1177
1180
|
|
|
1178
|
-
//
|
|
1179
|
-
//
|
|
1180
|
-
//
|
|
1181
|
-
// receiveFrame and sendPacket could just be calling avcodec_receive_frame and
|
|
1182
|
-
// avcodec_send_packet. This would make the decoding loop even more generic.
|
|
1181
|
+
// The default implementation uses avcodec_receive_frame and
|
|
1182
|
+
// avcodec_send_packet, while specialized interfaces can override for
|
|
1183
|
+
// hardware-specific optimizations.
|
|
1183
1184
|
while (true) {
|
|
1184
|
-
|
|
1185
|
-
status = deviceInterface_->receiveFrame(avFrame);
|
|
1186
|
-
} else {
|
|
1187
|
-
status =
|
|
1188
|
-
avcodec_receive_frame(streamInfo.codecContext.get(), avFrame.get());
|
|
1189
|
-
}
|
|
1185
|
+
status = deviceInterface_->receiveFrame(avFrame);
|
|
1190
1186
|
|
|
1191
1187
|
if (status != AVSUCCESS && status != AVERROR(EAGAIN)) {
|
|
1192
1188
|
// Non-retriable error
|
|
@@ -1222,13 +1218,7 @@ UniqueAVFrame SingleStreamDecoder::decodeAVFrame(
|
|
|
1222
1218
|
|
|
1223
1219
|
if (status == AVERROR_EOF) {
|
|
1224
1220
|
// End of file reached. We must drain the decoder
|
|
1225
|
-
|
|
1226
|
-
status = deviceInterface_->sendEOFPacket();
|
|
1227
|
-
} else {
|
|
1228
|
-
status = avcodec_send_packet(
|
|
1229
|
-
streamInfo.codecContext.get(),
|
|
1230
|
-
/*avpkt=*/nullptr);
|
|
1231
|
-
}
|
|
1221
|
+
status = deviceInterface_->sendEOFPacket();
|
|
1232
1222
|
TORCH_CHECK(
|
|
1233
1223
|
status >= AVSUCCESS,
|
|
1234
1224
|
"Could not flush decoder: ",
|
|
@@ -1253,11 +1243,7 @@ UniqueAVFrame SingleStreamDecoder::decodeAVFrame(
|
|
|
1253
1243
|
|
|
1254
1244
|
// We got a valid packet. Send it to the decoder, and we'll receive it in
|
|
1255
1245
|
// the next iteration.
|
|
1256
|
-
|
|
1257
|
-
status = deviceInterface_->sendPacket(packet);
|
|
1258
|
-
} else {
|
|
1259
|
-
status = avcodec_send_packet(streamInfo.codecContext.get(), packet.get());
|
|
1260
|
-
}
|
|
1246
|
+
status = deviceInterface_->sendPacket(packet);
|
|
1261
1247
|
TORCH_CHECK(
|
|
1262
1248
|
status >= AVSUCCESS,
|
|
1263
1249
|
"Could not push packet to decoder: ",
|
|
@@ -1716,4 +1702,9 @@ double SingleStreamDecoder::getPtsSecondsForFrame(int64_t frameIndex) {
|
|
|
1716
1702
|
streamInfo.allFrames[frameIndex].pts, streamInfo.timeBase);
|
|
1717
1703
|
}
|
|
1718
1704
|
|
|
1705
|
+
std::string SingleStreamDecoder::getDeviceInterfaceDetails() const {
|
|
1706
|
+
TORCH_CHECK(deviceInterface_ != nullptr, "Device interface doesn't exist.");
|
|
1707
|
+
return deviceInterface_->getDetails();
|
|
1708
|
+
}
|
|
1709
|
+
|
|
1719
1710
|
} // namespace facebook::torchcodec
|
|
@@ -186,6 +186,8 @@ class SingleStreamDecoder {
|
|
|
186
186
|
DecodeStats getDecodeStats() const;
|
|
187
187
|
void resetDecodeStats();
|
|
188
188
|
|
|
189
|
+
std::string getDeviceInterfaceDetails() const;
|
|
190
|
+
|
|
189
191
|
private:
|
|
190
192
|
// --------------------------------------------------------------------------
|
|
191
193
|
// STREAMINFO AND ASSOCIATED STRUCTS
|
|
@@ -221,7 +223,7 @@ class SingleStreamDecoder {
|
|
|
221
223
|
AVMediaType avMediaType = AVMEDIA_TYPE_UNKNOWN;
|
|
222
224
|
|
|
223
225
|
AVRational timeBase = {};
|
|
224
|
-
|
|
226
|
+
SharedAVCodecContext codecContext;
|
|
225
227
|
|
|
226
228
|
// The FrameInfo indices we built when scanFileAndUpdateMetadataAndIndex was
|
|
227
229
|
// called.
|
|
@@ -311,7 +313,7 @@ class SingleStreamDecoder {
|
|
|
311
313
|
int streamIndex,
|
|
312
314
|
AVMediaType mediaType,
|
|
313
315
|
const torch::Device& device = torch::kCPU,
|
|
314
|
-
const std::string_view deviceVariant = "
|
|
316
|
+
const std::string_view deviceVariant = "ffmpeg",
|
|
315
317
|
std::optional<int> ffmpegThreadCount = std::nullopt);
|
|
316
318
|
|
|
317
319
|
// Returns the "best" stream index for a given media type. The "best" is
|
torchcodec/_core/StreamOptions.h
CHANGED
|
@@ -41,8 +41,8 @@ struct VideoStreamOptions {
|
|
|
41
41
|
|
|
42
42
|
// By default we use CPU for decoding for both C++ and python users.
|
|
43
43
|
torch::Device device = torch::kCPU;
|
|
44
|
-
// Device variant (e.g., "
|
|
45
|
-
std::string_view deviceVariant = "
|
|
44
|
+
// Device variant (e.g., "ffmpeg", "beta", etc.)
|
|
45
|
+
std::string_view deviceVariant = "ffmpeg";
|
|
46
46
|
|
|
47
47
|
// Encoding options
|
|
48
48
|
// TODO-VideoEncoder: Consider adding other optional fields here
|
torchcodec/_core/Transform.cpp
CHANGED
|
@@ -57,4 +57,31 @@ int ResizeTransform::getSwsFlags() const {
|
|
|
57
57
|
return toSwsInterpolation(interpolationMode_);
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
+
CropTransform::CropTransform(const FrameDims& dims, int x, int y)
|
|
61
|
+
: outputDims_(dims), x_(x), y_(y) {
|
|
62
|
+
TORCH_CHECK(x_ >= 0, "Crop x position must be >= 0, got: ", x_);
|
|
63
|
+
TORCH_CHECK(y_ >= 0, "Crop y position must be >= 0, got: ", y_);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
std::string CropTransform::getFilterGraphCpu() const {
|
|
67
|
+
return "crop=" + std::to_string(outputDims_.width) + ":" +
|
|
68
|
+
std::to_string(outputDims_.height) + ":" + std::to_string(x_) + ":" +
|
|
69
|
+
std::to_string(y_) + ":exact=1";
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
std::optional<FrameDims> CropTransform::getOutputFrameDims() const {
|
|
73
|
+
return outputDims_;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
void CropTransform::validate(const StreamMetadata& streamMetadata) const {
|
|
77
|
+
TORCH_CHECK(x_ <= streamMetadata.width, "Crop x position out of bounds");
|
|
78
|
+
TORCH_CHECK(
|
|
79
|
+
x_ + outputDims_.width <= streamMetadata.width,
|
|
80
|
+
"Crop x position out of bounds")
|
|
81
|
+
TORCH_CHECK(y_ <= streamMetadata.height, "Crop y position out of bounds");
|
|
82
|
+
TORCH_CHECK(
|
|
83
|
+
y_ + outputDims_.height <= streamMetadata.height,
|
|
84
|
+
"Crop y position out of bounds");
|
|
85
|
+
}
|
|
86
|
+
|
|
60
87
|
} // namespace facebook::torchcodec
|
torchcodec/_core/Transform.h
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
#include <optional>
|
|
10
10
|
#include <string>
|
|
11
11
|
#include "src/torchcodec/_core/Frame.h"
|
|
12
|
+
#include "src/torchcodec/_core/Metadata.h"
|
|
12
13
|
|
|
13
14
|
namespace facebook::torchcodec {
|
|
14
15
|
|
|
@@ -33,6 +34,16 @@ class Transform {
|
|
|
33
34
|
virtual bool isResize() const {
|
|
34
35
|
return false;
|
|
35
36
|
}
|
|
37
|
+
|
|
38
|
+
// The validity of some transforms depends on the characteristics of the
|
|
39
|
+
// AVStream they're being applied to. For example, some transforms will
|
|
40
|
+
// specify coordinates inside a frame, we need to validate that those are
|
|
41
|
+
// within the frame's bounds.
|
|
42
|
+
//
|
|
43
|
+
// Note that the validation function does not return anything. We expect
|
|
44
|
+
// invalid configurations to throw an exception.
|
|
45
|
+
virtual void validate(
|
|
46
|
+
[[maybe_unused]] const StreamMetadata& streamMetadata) const {}
|
|
36
47
|
};
|
|
37
48
|
|
|
38
49
|
class ResizeTransform : public Transform {
|
|
@@ -56,4 +67,18 @@ class ResizeTransform : public Transform {
|
|
|
56
67
|
InterpolationMode interpolationMode_;
|
|
57
68
|
};
|
|
58
69
|
|
|
70
|
+
class CropTransform : public Transform {
|
|
71
|
+
public:
|
|
72
|
+
CropTransform(const FrameDims& dims, int x, int y);
|
|
73
|
+
|
|
74
|
+
std::string getFilterGraphCpu() const override;
|
|
75
|
+
std::optional<FrameDims> getOutputFrameDims() const override;
|
|
76
|
+
void validate(const StreamMetadata& streamMetadata) const override;
|
|
77
|
+
|
|
78
|
+
private:
|
|
79
|
+
FrameDims outputDims_;
|
|
80
|
+
int x_;
|
|
81
|
+
int y_;
|
|
82
|
+
};
|
|
83
|
+
|
|
59
84
|
} // namespace facebook::torchcodec
|
torchcodec/_core/__init__.py
CHANGED
|
@@ -14,6 +14,7 @@ from ._metadata import (
|
|
|
14
14
|
)
|
|
15
15
|
from .ops import (
|
|
16
16
|
_add_video_stream,
|
|
17
|
+
_get_backend_details,
|
|
17
18
|
_get_key_frame_indices,
|
|
18
19
|
_test_frame_pts_equality,
|
|
19
20
|
add_audio_stream,
|
|
@@ -26,6 +27,8 @@ from .ops import (
|
|
|
26
27
|
encode_audio_to_file_like,
|
|
27
28
|
encode_audio_to_tensor,
|
|
28
29
|
encode_video_to_file,
|
|
30
|
+
encode_video_to_file_like,
|
|
31
|
+
encode_video_to_tensor,
|
|
29
32
|
get_ffmpeg_library_versions,
|
|
30
33
|
get_frame_at_index,
|
|
31
34
|
get_frame_at_pts,
|