torchcodec 0.3.0__cp311-cp311-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchcodec might be problematic. Click here for more details.
- torchcodec/__init__.py +16 -0
- torchcodec/_core/AVIOBytesContext.cpp +70 -0
- torchcodec/_core/AVIOBytesContext.h +32 -0
- torchcodec/_core/AVIOContextHolder.cpp +50 -0
- torchcodec/_core/AVIOContextHolder.h +65 -0
- torchcodec/_core/AVIOFileLikeContext.cpp +80 -0
- torchcodec/_core/AVIOFileLikeContext.h +54 -0
- torchcodec/_core/CMakeLists.txt +237 -0
- torchcodec/_core/CudaDeviceInterface.cpp +289 -0
- torchcodec/_core/CudaDeviceInterface.h +34 -0
- torchcodec/_core/DeviceInterface.cpp +88 -0
- torchcodec/_core/DeviceInterface.h +66 -0
- torchcodec/_core/Encoder.cpp +319 -0
- torchcodec/_core/Encoder.h +39 -0
- torchcodec/_core/FFMPEGCommon.cpp +264 -0
- torchcodec/_core/FFMPEGCommon.h +180 -0
- torchcodec/_core/Frame.h +47 -0
- torchcodec/_core/Metadata.h +70 -0
- torchcodec/_core/SingleStreamDecoder.cpp +1947 -0
- torchcodec/_core/SingleStreamDecoder.h +462 -0
- torchcodec/_core/StreamOptions.h +49 -0
- torchcodec/_core/__init__.py +39 -0
- torchcodec/_core/_metadata.py +277 -0
- torchcodec/_core/custom_ops.cpp +681 -0
- torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +226 -0
- torchcodec/_core/ops.py +381 -0
- torchcodec/_core/pybind_ops.cpp +45 -0
- torchcodec/_frame.py +145 -0
- torchcodec/_internally_replaced_utils.py +53 -0
- torchcodec/_samplers/__init__.py +7 -0
- torchcodec/_samplers/video_clip_sampler.py +430 -0
- torchcodec/decoders/__init__.py +11 -0
- torchcodec/decoders/_audio_decoder.py +168 -0
- torchcodec/decoders/_decoder_utils.py +52 -0
- torchcodec/decoders/_video_decoder.py +399 -0
- torchcodec/libtorchcodec_custom_ops4.so +0 -0
- torchcodec/libtorchcodec_custom_ops5.so +0 -0
- torchcodec/libtorchcodec_custom_ops6.so +0 -0
- torchcodec/libtorchcodec_custom_ops7.so +0 -0
- torchcodec/libtorchcodec_decoder4.so +0 -0
- torchcodec/libtorchcodec_decoder5.so +0 -0
- torchcodec/libtorchcodec_decoder6.so +0 -0
- torchcodec/libtorchcodec_decoder7.so +0 -0
- torchcodec/libtorchcodec_pybind_ops4.so +0 -0
- torchcodec/libtorchcodec_pybind_ops5.so +0 -0
- torchcodec/libtorchcodec_pybind_ops6.so +0 -0
- torchcodec/libtorchcodec_pybind_ops7.so +0 -0
- torchcodec/samplers/__init__.py +2 -0
- torchcodec/samplers/_common.py +84 -0
- torchcodec/samplers/_index_based.py +285 -0
- torchcodec/samplers/_time_based.py +348 -0
- torchcodec/version.py +2 -0
- torchcodec-0.3.0.dist-info/LICENSE +28 -0
- torchcodec-0.3.0.dist-info/METADATA +280 -0
- torchcodec-0.3.0.dist-info/RECORD +57 -0
- torchcodec-0.3.0.dist-info/WHEEL +5 -0
- torchcodec-0.3.0.dist-info/top_level.txt +2 -0
torchcodec/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
# Note: usort wants to put Frame and FrameBatch after decoders and samplers,
|
|
8
|
+
# but that results in circular import.
|
|
9
|
+
from ._frame import AudioSamples, Frame, FrameBatch # usort:skip # noqa
|
|
10
|
+
from . import decoders, samplers # noqa
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
# Note that version.py is generated during install.
|
|
14
|
+
from .version import __version__ # noqa: F401
|
|
15
|
+
except Exception:
|
|
16
|
+
pass
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#include "src/torchcodec/_core/AVIOBytesContext.h"
|
|
8
|
+
#include <torch/types.h>
|
|
9
|
+
|
|
10
|
+
namespace facebook::torchcodec {
|
|
11
|
+
|
|
12
|
+
AVIOBytesContext::AVIOBytesContext(const void* data, int64_t dataSize)
|
|
13
|
+
: dataContext_{static_cast<const uint8_t*>(data), dataSize, 0} {
|
|
14
|
+
TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!");
|
|
15
|
+
TORCH_CHECK(dataSize > 0, "Video data size must be positive");
|
|
16
|
+
createAVIOContext(&read, &seek, &dataContext_);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// The signature of this function is defined by FFMPEG.
|
|
20
|
+
int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) {
|
|
21
|
+
auto dataContext = static_cast<DataContext*>(opaque);
|
|
22
|
+
TORCH_CHECK(
|
|
23
|
+
dataContext->current <= dataContext->size,
|
|
24
|
+
"Tried to read outside of the buffer: current=",
|
|
25
|
+
dataContext->current,
|
|
26
|
+
", size=",
|
|
27
|
+
dataContext->size);
|
|
28
|
+
|
|
29
|
+
int64_t numBytesRead = std::min(
|
|
30
|
+
static_cast<int64_t>(buf_size), dataContext->size - dataContext->current);
|
|
31
|
+
|
|
32
|
+
TORCH_CHECK(
|
|
33
|
+
numBytesRead >= 0,
|
|
34
|
+
"Tried to read negative bytes: numBytesRead=",
|
|
35
|
+
numBytesRead,
|
|
36
|
+
", size=",
|
|
37
|
+
dataContext->size,
|
|
38
|
+
", current=",
|
|
39
|
+
dataContext->current);
|
|
40
|
+
|
|
41
|
+
if (numBytesRead == 0) {
|
|
42
|
+
return AVERROR_EOF;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
std::memcpy(buf, dataContext->data + dataContext->current, numBytesRead);
|
|
46
|
+
dataContext->current += numBytesRead;
|
|
47
|
+
return numBytesRead;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// The signature of this function is defined by FFMPEG.
|
|
51
|
+
int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) {
|
|
52
|
+
auto dataContext = static_cast<DataContext*>(opaque);
|
|
53
|
+
int64_t ret = -1;
|
|
54
|
+
|
|
55
|
+
switch (whence) {
|
|
56
|
+
case AVSEEK_SIZE:
|
|
57
|
+
ret = dataContext->size;
|
|
58
|
+
break;
|
|
59
|
+
case SEEK_SET:
|
|
60
|
+
dataContext->current = offset;
|
|
61
|
+
ret = offset;
|
|
62
|
+
break;
|
|
63
|
+
default:
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return ret;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#pragma once
|
|
8
|
+
|
|
9
|
+
#include "src/torchcodec/_core/AVIOContextHolder.h"
|
|
10
|
+
|
|
11
|
+
namespace facebook::torchcodec {
|
|
12
|
+
|
|
13
|
+
// Enables users to pass in the entire video as bytes. Our read and seek
|
|
14
|
+
// functions then traverse the bytes in memory.
|
|
15
|
+
class AVIOBytesContext : public AVIOContextHolder {
|
|
16
|
+
public:
|
|
17
|
+
explicit AVIOBytesContext(const void* data, int64_t dataSize);
|
|
18
|
+
|
|
19
|
+
private:
|
|
20
|
+
struct DataContext {
|
|
21
|
+
const uint8_t* data;
|
|
22
|
+
int64_t size;
|
|
23
|
+
int64_t current;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
static int read(void* opaque, uint8_t* buf, int buf_size);
|
|
27
|
+
static int64_t seek(void* opaque, int64_t offset, int whence);
|
|
28
|
+
|
|
29
|
+
DataContext dataContext_;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#include "src/torchcodec/_core/AVIOContextHolder.h"
|
|
8
|
+
#include <torch/types.h>
|
|
9
|
+
|
|
10
|
+
namespace facebook::torchcodec {
|
|
11
|
+
|
|
12
|
+
void AVIOContextHolder::createAVIOContext(
|
|
13
|
+
AVIOReadFunction read,
|
|
14
|
+
AVIOSeekFunction seek,
|
|
15
|
+
void* heldData,
|
|
16
|
+
int bufferSize) {
|
|
17
|
+
TORCH_CHECK(
|
|
18
|
+
bufferSize > 0,
|
|
19
|
+
"Buffer size must be greater than 0; is " + std::to_string(bufferSize));
|
|
20
|
+
auto buffer = static_cast<uint8_t*>(av_malloc(bufferSize));
|
|
21
|
+
TORCH_CHECK(
|
|
22
|
+
buffer != nullptr,
|
|
23
|
+
"Failed to allocate buffer of size " + std::to_string(bufferSize));
|
|
24
|
+
|
|
25
|
+
avioContext_.reset(avio_alloc_context(
|
|
26
|
+
buffer,
|
|
27
|
+
bufferSize,
|
|
28
|
+
0,
|
|
29
|
+
heldData,
|
|
30
|
+
read,
|
|
31
|
+
nullptr, // write function; not supported yet
|
|
32
|
+
seek));
|
|
33
|
+
|
|
34
|
+
if (!avioContext_) {
|
|
35
|
+
av_freep(&buffer);
|
|
36
|
+
TORCH_CHECK(false, "Failed to allocate AVIOContext");
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
AVIOContextHolder::~AVIOContextHolder() {
|
|
41
|
+
if (avioContext_) {
|
|
42
|
+
av_freep(&avioContext_->buffer);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
AVIOContext* AVIOContextHolder::getAVIOContext() {
|
|
47
|
+
return avioContext_.get();
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#pragma once
|
|
8
|
+
|
|
9
|
+
#include "src/torchcodec/_core/FFMPEGCommon.h"
|
|
10
|
+
|
|
11
|
+
namespace facebook::torchcodec {
|
|
12
|
+
|
|
13
|
+
// The AVIOContextHolder serves several purposes:
|
|
14
|
+
//
|
|
15
|
+
// 1. It is a smart pointer for the AVIOContext. It has the logic to create
|
|
16
|
+
// a new AVIOContext and will appropriately free the AVIOContext when it
|
|
17
|
+
// goes out of scope. Note that this requires more than just having a
|
|
18
|
+
// UniqueAVIOContext, as the AVIOContext points to a buffer which must be
|
|
19
|
+
// freed.
|
|
20
|
+
// 2. It is a base class for AVIOContext specializations. When specializing a
|
|
21
|
+
// AVIOContext, we need to provide four things:
|
|
22
|
+
// 1. A read callback function.
|
|
23
|
+
// 2. A seek callback function.
|
|
24
|
+
// 3. A write callback function. (Not supported yet; it's for encoding.)
|
|
25
|
+
// 4. A pointer to some context object that has the same lifetime as the
|
|
26
|
+
// AVIOContext itself. This context object holds the custom state that
|
|
27
|
+
// tracks the custom behavior of reading, seeking and writing. It is
|
|
28
|
+
// provided upon AVIOContext creation and to the read, seek and
|
|
29
|
+
// write callback functions.
|
|
30
|
+
// While it's not required, it is natural for the derived classes to make
|
|
31
|
+
// all of the above members. Base classes need to call
|
|
32
|
+
// createAVIOContext(), ideally in their constructor.
|
|
33
|
+
// 3. A generic handle for those that just need to manage having access to an
|
|
34
|
+
// AVIOContext, but aren't necessarily concerned with how it was customized:
|
|
35
|
+
// typically, the SingleStreamDecoder.
|
|
36
|
+
class AVIOContextHolder {
|
|
37
|
+
public:
|
|
38
|
+
virtual ~AVIOContextHolder();
|
|
39
|
+
AVIOContext* getAVIOContext();
|
|
40
|
+
|
|
41
|
+
protected:
|
|
42
|
+
// Make constructor protected to prevent anyone from constructing
|
|
43
|
+
// an AVIOContextHolder without deriving it. (Ordinarily this would be
|
|
44
|
+
// enforced by having a pure virtual methods, but we don't have any.)
|
|
45
|
+
AVIOContextHolder() = default;
|
|
46
|
+
|
|
47
|
+
// These signatures are defined by FFmpeg.
|
|
48
|
+
using AVIOReadFunction = int (*)(void*, uint8_t*, int);
|
|
49
|
+
using AVIOSeekFunction = int64_t (*)(void*, int64_t, int);
|
|
50
|
+
|
|
51
|
+
// Deriving classes should call this function in their constructor.
|
|
52
|
+
void createAVIOContext(
|
|
53
|
+
AVIOReadFunction read,
|
|
54
|
+
AVIOSeekFunction seek,
|
|
55
|
+
void* heldData,
|
|
56
|
+
int bufferSize = defaultBufferSize);
|
|
57
|
+
|
|
58
|
+
private:
|
|
59
|
+
UniqueAVIOContext avioContext_;
|
|
60
|
+
|
|
61
|
+
// Defaults to 64 KB
|
|
62
|
+
static const int defaultBufferSize = 64 * 1024;
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#include "src/torchcodec/_core/AVIOFileLikeContext.h"
|
|
8
|
+
#include <torch/types.h>
|
|
9
|
+
|
|
10
|
+
namespace facebook::torchcodec {
|
|
11
|
+
|
|
12
|
+
AVIOFileLikeContext::AVIOFileLikeContext(py::object fileLike)
|
|
13
|
+
: fileLike_{UniquePyObject(new py::object(fileLike))} {
|
|
14
|
+
{
|
|
15
|
+
// TODO: Is it necessary to acquire the GIL here? Is it maybe even
|
|
16
|
+
// harmful? At the moment, this is only called from within a pybind
|
|
17
|
+
// function, and pybind guarantees we have the GIL.
|
|
18
|
+
py::gil_scoped_acquire gil;
|
|
19
|
+
TORCH_CHECK(
|
|
20
|
+
py::hasattr(fileLike, "read"),
|
|
21
|
+
"File like object must implement a read method.");
|
|
22
|
+
TORCH_CHECK(
|
|
23
|
+
py::hasattr(fileLike, "seek"),
|
|
24
|
+
"File like object must implement a seek method.");
|
|
25
|
+
}
|
|
26
|
+
createAVIOContext(&read, &seek, &fileLike_);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
int AVIOFileLikeContext::read(void* opaque, uint8_t* buf, int buf_size) {
|
|
30
|
+
auto fileLike = static_cast<UniquePyObject*>(opaque);
|
|
31
|
+
|
|
32
|
+
// Note that we acquire the GIL outside of the loop. This is likely more
|
|
33
|
+
// efficient than releasing and acquiring it each loop iteration.
|
|
34
|
+
py::gil_scoped_acquire gil;
|
|
35
|
+
|
|
36
|
+
int totalNumRead = 0;
|
|
37
|
+
while (totalNumRead < buf_size) {
|
|
38
|
+
int request = buf_size - totalNumRead;
|
|
39
|
+
|
|
40
|
+
// The Python method returns the actual bytes, which we access through the
|
|
41
|
+
// py::bytes wrapper. That wrapper, however, does not provide us access to
|
|
42
|
+
// the underlying data pointer, which we need for the memcpy below. So we
|
|
43
|
+
// convert the bytes to a string_view to get access to the data pointer.
|
|
44
|
+
// Becauase it's a view and not a copy, it should be cheap.
|
|
45
|
+
auto bytesRead = static_cast<py::bytes>((*fileLike)->attr("read")(request));
|
|
46
|
+
auto bytesView = static_cast<std::string_view>(bytesRead);
|
|
47
|
+
|
|
48
|
+
int numBytesRead = static_cast<int>(bytesView.size());
|
|
49
|
+
if (numBytesRead == 0) {
|
|
50
|
+
break;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
TORCH_CHECK(
|
|
54
|
+
numBytesRead <= request,
|
|
55
|
+
"Requested up to ",
|
|
56
|
+
request,
|
|
57
|
+
" bytes but, received ",
|
|
58
|
+
numBytesRead,
|
|
59
|
+
" bytes. The given object does not conform to read protocol of file object.");
|
|
60
|
+
|
|
61
|
+
std::memcpy(buf, bytesView.data(), numBytesRead);
|
|
62
|
+
buf += numBytesRead;
|
|
63
|
+
totalNumRead += numBytesRead;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return totalNumRead == 0 ? AVERROR_EOF : totalNumRead;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
int64_t AVIOFileLikeContext::seek(void* opaque, int64_t offset, int whence) {
|
|
70
|
+
// We do not know the file size.
|
|
71
|
+
if (whence == AVSEEK_SIZE) {
|
|
72
|
+
return AVERROR(EIO);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
auto fileLike = static_cast<UniquePyObject*>(opaque);
|
|
76
|
+
py::gil_scoped_acquire gil;
|
|
77
|
+
return py::cast<int64_t>((*fileLike)->attr("seek")(offset, whence));
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#pragma once
|
|
8
|
+
|
|
9
|
+
#include <pybind11/pybind11.h>
|
|
10
|
+
#include <pybind11/stl.h>
|
|
11
|
+
|
|
12
|
+
#include "src/torchcodec/_core/AVIOContextHolder.h"
|
|
13
|
+
|
|
14
|
+
namespace py = pybind11;
|
|
15
|
+
|
|
16
|
+
namespace facebook::torchcodec {
|
|
17
|
+
|
|
18
|
+
// Enables uers to pass in a Python file-like object. We then forward all read
|
|
19
|
+
// and seek calls back up to the methods on the Python object.
|
|
20
|
+
class AVIOFileLikeContext : public AVIOContextHolder {
|
|
21
|
+
public:
|
|
22
|
+
explicit AVIOFileLikeContext(py::object fileLike);
|
|
23
|
+
|
|
24
|
+
private:
|
|
25
|
+
static int read(void* opaque, uint8_t* buf, int buf_size);
|
|
26
|
+
static int64_t seek(void* opaque, int64_t offset, int whence);
|
|
27
|
+
|
|
28
|
+
// Note that we dynamically allocate the Python object because we need to
|
|
29
|
+
// strictly control when its destructor is called. We must hold the GIL
|
|
30
|
+
// when its destructor gets called, as it needs to update the reference
|
|
31
|
+
// count. It's easiest to control that when it's dynamic memory. Otherwise,
|
|
32
|
+
// we'd have to ensure whatever enclosing scope holds the object has the GIL,
|
|
33
|
+
// and that's, at least, hard. For all of the common pitfalls, see:
|
|
34
|
+
//
|
|
35
|
+
// https://pybind11.readthedocs.io/en/stable/advanced/misc.html#common-sources-of-global-interpreter-lock-errors
|
|
36
|
+
//
|
|
37
|
+
// We maintain a reference to the file-like object because the file-like
|
|
38
|
+
// object that was created on the Python side must live as long as our
|
|
39
|
+
// potential use. That is, even if there are no more references to the object
|
|
40
|
+
// on the Python side, we require that the object is still live.
|
|
41
|
+
struct PyObjectDeleter {
|
|
42
|
+
inline void operator()(py::object* obj) const {
|
|
43
|
+
if (obj) {
|
|
44
|
+
py::gil_scoped_acquire gil;
|
|
45
|
+
delete obj;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
using UniquePyObject = std::unique_ptr<py::object, PyObjectDeleter>;
|
|
51
|
+
UniquePyObject fileLike_;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.18)
|
|
2
|
+
project(TorchCodec)
|
|
3
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
4
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
5
|
+
|
|
6
|
+
set(PYBIND11_FINDPYTHON ON)
|
|
7
|
+
find_package(pybind11 REQUIRED)
|
|
8
|
+
find_package(Torch REQUIRED)
|
|
9
|
+
find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
|
|
10
|
+
|
|
11
|
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
|
|
12
|
+
|
|
13
|
+
function(make_torchcodec_sublibrary
|
|
14
|
+
library_name
|
|
15
|
+
type
|
|
16
|
+
sources
|
|
17
|
+
library_dependencies)
|
|
18
|
+
|
|
19
|
+
add_library(${library_name} ${type} ${sources})
|
|
20
|
+
set_target_properties(${library_name} PROPERTIES CXX_STANDARD 17)
|
|
21
|
+
target_include_directories(${library_name}
|
|
22
|
+
PRIVATE
|
|
23
|
+
./../../../
|
|
24
|
+
"${TORCH_INSTALL_PREFIX}/include"
|
|
25
|
+
${Python3_INCLUDE_DIRS}
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Avoid adding the "lib" prefix which we already add explicitly.
|
|
29
|
+
set_target_properties(${library_name} PROPERTIES PREFIX "")
|
|
30
|
+
|
|
31
|
+
target_link_libraries(
|
|
32
|
+
${library_name}
|
|
33
|
+
PUBLIC
|
|
34
|
+
${library_dependencies}
|
|
35
|
+
)
|
|
36
|
+
endfunction()
|
|
37
|
+
|
|
38
|
+
function(make_torchcodec_libraries
|
|
39
|
+
ffmpeg_major_version
|
|
40
|
+
ffmpeg_target)
|
|
41
|
+
|
|
42
|
+
# We create three shared libraries per version of FFmpeg, where the version
|
|
43
|
+
# is denoted by N:
|
|
44
|
+
#
|
|
45
|
+
# 1. libtorchcodec_decoderN.{ext}: Base library which contains the
|
|
46
|
+
# implementation of VideoDecoder and everything VideoDecoder needs. On
|
|
47
|
+
# Linux, {ext} is so. On Mac, it is dylib.
|
|
48
|
+
#
|
|
49
|
+
# 2. libtorchcodec_custom_opsN.{ext}: Implementation of the PyTorch custom
|
|
50
|
+
# ops. Depends on libtorchcodec_decoderN.{ext}. On Linux, {ext} is so.
|
|
51
|
+
# On Mac, it is dylib.
|
|
52
|
+
#
|
|
53
|
+
# 3. libtorchcodec_pybind_opsN.{ext}: Implementation of the pybind11 ops. We
|
|
54
|
+
# keep these separate from the PyTorch custom ops because we have to
|
|
55
|
+
# load these libraries separately on the Python side. Depends on
|
|
56
|
+
# libtorchcodec_decoderN.{ext}. On BOTH Linux and Mac {ext} is so.
|
|
57
|
+
|
|
58
|
+
# 1. Create libtorchcodec_decoderN.{ext}.
|
|
59
|
+
set(decoder_library_name "libtorchcodec_decoder${ffmpeg_major_version}")
|
|
60
|
+
set(decoder_sources
|
|
61
|
+
AVIOContextHolder.cpp
|
|
62
|
+
FFMPEGCommon.cpp
|
|
63
|
+
DeviceInterface.cpp
|
|
64
|
+
SingleStreamDecoder.cpp
|
|
65
|
+
# TODO: lib name should probably not be "*_decoder*" now that it also
|
|
66
|
+
# contains an encoder
|
|
67
|
+
Encoder.cpp
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
if(ENABLE_CUDA)
|
|
71
|
+
list(APPEND decoder_sources CudaDeviceInterface.cpp)
|
|
72
|
+
endif()
|
|
73
|
+
|
|
74
|
+
set(decoder_library_dependencies
|
|
75
|
+
${ffmpeg_target}
|
|
76
|
+
${TORCH_LIBRARIES}
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if(ENABLE_CUDA)
|
|
80
|
+
list(APPEND decoder_library_dependencies
|
|
81
|
+
${CUDA_nppi_LIBRARY}
|
|
82
|
+
${CUDA_nppicc_LIBRARY}
|
|
83
|
+
)
|
|
84
|
+
endif()
|
|
85
|
+
|
|
86
|
+
make_torchcodec_sublibrary(
|
|
87
|
+
"${decoder_library_name}"
|
|
88
|
+
SHARED
|
|
89
|
+
"${decoder_sources}"
|
|
90
|
+
"${decoder_library_dependencies}"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# 2. Create libtorchcodec_custom_opsN.{ext}.
|
|
94
|
+
set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}")
|
|
95
|
+
set(custom_ops_sources
|
|
96
|
+
AVIOBytesContext.cpp
|
|
97
|
+
custom_ops.cpp
|
|
98
|
+
)
|
|
99
|
+
set(custom_ops_dependencies
|
|
100
|
+
${decoder_library_name}
|
|
101
|
+
${Python3_LIBRARIES}
|
|
102
|
+
)
|
|
103
|
+
make_torchcodec_sublibrary(
|
|
104
|
+
"${custom_ops_library_name}"
|
|
105
|
+
SHARED
|
|
106
|
+
"${custom_ops_sources}"
|
|
107
|
+
"${custom_ops_dependencies}"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# 3. Create libtorchcodec_pybind_opsN.so.
|
|
111
|
+
set(pybind_ops_library_name "libtorchcodec_pybind_ops${ffmpeg_major_version}")
|
|
112
|
+
set(pybind_ops_sources
|
|
113
|
+
AVIOFileLikeContext.cpp
|
|
114
|
+
pybind_ops.cpp
|
|
115
|
+
)
|
|
116
|
+
set(pybind_ops_dependencies
|
|
117
|
+
${decoder_library_name}
|
|
118
|
+
pybind11::module # This library dependency makes sure we have the right
|
|
119
|
+
# Python libraries included as well as all of the right
|
|
120
|
+
# settings so that we can successfully load the shared
|
|
121
|
+
# library as a Python module on Mac. If we instead use
|
|
122
|
+
# ${Python3_LIBRARIES}, it works on Linux but not on
|
|
123
|
+
# Mac.
|
|
124
|
+
)
|
|
125
|
+
make_torchcodec_sublibrary(
|
|
126
|
+
"${pybind_ops_library_name}"
|
|
127
|
+
MODULE # Note that this not SHARED; otherwise we build the wrong kind
|
|
128
|
+
# of library on Mac. On Mac, SHARED becomes .dylib and MODULE becomes
|
|
129
|
+
# a .so. We want pybind11 libraries to become .so. If this is
|
|
130
|
+
# changed to SHARED, we will be able to succesfully compile a
|
|
131
|
+
# .dylib, but we will not be able to succesfully import that as
|
|
132
|
+
# a Python module on Mac.
|
|
133
|
+
"${pybind_ops_sources}"
|
|
134
|
+
"${pybind_ops_dependencies}"
|
|
135
|
+
)
|
|
136
|
+
# pybind11 limits the visibility of symbols in the shared library to prevent
|
|
137
|
+
# stray initialization of py::objects. The rest of the object code must
|
|
138
|
+
# match. See:
|
|
139
|
+
# https://pybind11.readthedocs.io/en/stable/faq.html#someclass-declared-with-greater-visibility-than-the-type-of-its-field-someclass-member-wattributes
|
|
140
|
+
target_compile_options(
|
|
141
|
+
${pybind_ops_library_name}
|
|
142
|
+
PUBLIC
|
|
143
|
+
"-fvisibility=hidden"
|
|
144
|
+
)
|
|
145
|
+
# If we don't make sure this flag is set, we run into segfauls at import
|
|
146
|
+
# time on Mac. See:
|
|
147
|
+
# https://github.com/pybind/pybind11/issues/3907#issuecomment-1170412764
|
|
148
|
+
target_link_options(
|
|
149
|
+
${pybind_ops_library_name}
|
|
150
|
+
PUBLIC
|
|
151
|
+
"-undefined dynamic_lookup"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Install all libraries.
|
|
155
|
+
set(
|
|
156
|
+
all_libraries
|
|
157
|
+
${decoder_library_name}
|
|
158
|
+
${custom_ops_library_name}
|
|
159
|
+
${pybind_ops_library_name}
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# The install step is invoked within CMakeBuild.build_library() in
|
|
163
|
+
# setup.py and just copies the built files from the temp
|
|
164
|
+
# cmake/setuptools build folder into the CMAKE_INSTALL_PREFIX folder. We
|
|
165
|
+
# still need to manually pass "DESTINATION ..." for cmake to copy those
|
|
166
|
+
# files in CMAKE_INSTALL_PREFIX instead of CMAKE_INSTALL_PREFIX/lib.
|
|
167
|
+
install(
|
|
168
|
+
TARGETS ${all_libraries}
|
|
169
|
+
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}
|
|
170
|
+
)
|
|
171
|
+
endfunction()
|
|
172
|
+
|
|
173
|
+
if(DEFINED ENV{BUILD_AGAINST_ALL_FFMPEG_FROM_S3})
|
|
174
|
+
message(
|
|
175
|
+
STATUS
|
|
176
|
+
"Building and dynamically linking libtorchcodec against our pre-built
|
|
177
|
+
non-GPL FFmpeg libraries. These libraries are only used at build time,
|
|
178
|
+
you still need a different FFmpeg to be installed for run time!"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# This will expose the ffmpeg4, ffmpeg5, ffmpeg6, and ffmpeg7 targets
|
|
182
|
+
include(
|
|
183
|
+
${CMAKE_CURRENT_SOURCE_DIR}/fetch_and_expose_non_gpl_ffmpeg_libs.cmake
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
make_torchcodec_libraries(7 ffmpeg7)
|
|
187
|
+
make_torchcodec_libraries(6 ffmpeg6)
|
|
188
|
+
make_torchcodec_libraries(4 ffmpeg4)
|
|
189
|
+
make_torchcodec_libraries(5 ffmpeg5)
|
|
190
|
+
else()
|
|
191
|
+
message(
|
|
192
|
+
STATUS
|
|
193
|
+
"Building and dynamically linking libtorchcodec against the installed
|
|
194
|
+
FFmpeg libraries. This require pkg-config to be installed. If you have
|
|
195
|
+
installed FFmpeg from conda, make sure pkg-config is installed from
|
|
196
|
+
conda as well."
|
|
197
|
+
)
|
|
198
|
+
find_package(PkgConfig REQUIRED)
|
|
199
|
+
pkg_check_modules(LIBAV REQUIRED IMPORTED_TARGET
|
|
200
|
+
libavdevice
|
|
201
|
+
libavfilter
|
|
202
|
+
libavformat
|
|
203
|
+
libavcodec
|
|
204
|
+
libavutil
|
|
205
|
+
libswresample
|
|
206
|
+
libswscale
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Split libavcodec's version string by '.' and convert it to a list
|
|
210
|
+
string(REPLACE "." ";" libavcodec_version_list ${LIBAV_libavcodec_VERSION})
|
|
211
|
+
# Get the first element of the list, which is the major version
|
|
212
|
+
list(GET libavcodec_version_list 0 libavcodec_major_version)
|
|
213
|
+
|
|
214
|
+
if (${libavcodec_major_version} STREQUAL "58")
|
|
215
|
+
set(ffmpeg_major_version "4")
|
|
216
|
+
elseif (${libavcodec_major_version} STREQUAL "59")
|
|
217
|
+
set(ffmpeg_major_version "5")
|
|
218
|
+
elseif (${libavcodec_major_version} STREQUAL "60")
|
|
219
|
+
set(ffmpeg_major_version "6")
|
|
220
|
+
elseif (${libavcodec_major_version} STREQUAL "61")
|
|
221
|
+
set(ffmpeg_major_version "7")
|
|
222
|
+
else()
|
|
223
|
+
message(
|
|
224
|
+
FATAL_ERROR
|
|
225
|
+
"Unsupported libavcodec version: ${libavcodec_major_version}"
|
|
226
|
+
)
|
|
227
|
+
endif()
|
|
228
|
+
|
|
229
|
+
make_torchcodec_libraries(${ffmpeg_major_version} PkgConfig::LIBAV)
|
|
230
|
+
|
|
231
|
+
# Expose these values updwards so that the test compilation does not need
|
|
232
|
+
# to re-figure it out. FIXME: it's not great that we just copy-paste the
|
|
233
|
+
# library names.
|
|
234
|
+
set(libtorchcodec_library_name "libtorchcodec_decoder${ffmpeg_major_version}" PARENT_SCOPE)
|
|
235
|
+
set(libtorchcodec_custom_ops_name "libtorchcodec_custom_ops${ffmpeg_major_version}" PARENT_SCOPE)
|
|
236
|
+
set(libav_include_dirs ${LIBAV_INCLUDE_DIRS} PARENT_SCOPE)
|
|
237
|
+
endif()
|