torchcodec 0.3.0__cp311-cp311-macosx_11_0_arm64.whl → 0.5__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (61) hide show
  1. torchcodec/.dylibs/libc++.1.0.dylib +0 -0
  2. torchcodec/.dylibs/libpython3.11.dylib +0 -0
  3. torchcodec/_core/AVIOContextHolder.cpp +8 -3
  4. torchcodec/_core/AVIOContextHolder.h +7 -9
  5. torchcodec/_core/AVIOFileLikeContext.cpp +1 -1
  6. torchcodec/_core/AVIOTensorContext.cpp +121 -0
  7. torchcodec/_core/AVIOTensorContext.h +43 -0
  8. torchcodec/_core/CMakeLists.txt +38 -22
  9. torchcodec/_core/CpuDeviceInterface.cpp +360 -0
  10. torchcodec/_core/CpuDeviceInterface.h +80 -0
  11. torchcodec/_core/CudaDeviceInterface.cpp +5 -12
  12. torchcodec/_core/CudaDeviceInterface.h +1 -0
  13. torchcodec/_core/DeviceInterface.cpp +20 -29
  14. torchcodec/_core/DeviceInterface.h +1 -0
  15. torchcodec/_core/Encoder.cpp +297 -110
  16. torchcodec/_core/Encoder.h +96 -14
  17. torchcodec/_core/FFMPEGCommon.cpp +195 -46
  18. torchcodec/_core/FFMPEGCommon.h +44 -12
  19. torchcodec/_core/Frame.cpp +32 -0
  20. torchcodec/_core/Frame.h +71 -0
  21. torchcodec/_core/Metadata.h +12 -10
  22. torchcodec/_core/SingleStreamDecoder.cpp +335 -567
  23. torchcodec/_core/SingleStreamDecoder.h +30 -112
  24. torchcodec/_core/StreamOptions.h +4 -0
  25. torchcodec/_core/__init__.py +2 -2
  26. torchcodec/_core/_metadata.py +59 -19
  27. torchcodec/_core/custom_ops.cpp +137 -91
  28. torchcodec/_core/ops.py +38 -20
  29. torchcodec/_core/pybind_ops.cpp +5 -1
  30. torchcodec/_frame.py +2 -2
  31. torchcodec/_internally_replaced_utils.py +11 -0
  32. torchcodec/_samplers/video_clip_sampler.py +11 -11
  33. torchcodec/decoders/_audio_decoder.py +11 -4
  34. torchcodec/decoders/_video_decoder.py +7 -2
  35. torchcodec/encoders/__init__.py +1 -0
  36. torchcodec/encoders/_audio_encoder.py +110 -0
  37. torchcodec/libtorchcodec_core4.dylib +0 -0
  38. torchcodec/libtorchcodec_core5.dylib +0 -0
  39. torchcodec/libtorchcodec_core6.dylib +0 -0
  40. torchcodec/libtorchcodec_core7.dylib +0 -0
  41. torchcodec/libtorchcodec_custom_ops4.dylib +0 -0
  42. torchcodec/libtorchcodec_custom_ops5.dylib +0 -0
  43. torchcodec/libtorchcodec_custom_ops6.dylib +0 -0
  44. torchcodec/libtorchcodec_custom_ops7.dylib +0 -0
  45. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  46. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  47. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  48. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  49. torchcodec/version.py +1 -1
  50. {torchcodec-0.3.0.dist-info → torchcodec-0.5.dist-info}/METADATA +13 -24
  51. torchcodec-0.5.dist-info/RECORD +64 -0
  52. {torchcodec-0.3.0.dist-info → torchcodec-0.5.dist-info}/WHEEL +1 -1
  53. torchcodec/_core/AVIOBytesContext.cpp +0 -70
  54. torchcodec/_core/AVIOBytesContext.h +0 -32
  55. torchcodec/libtorchcodec_decoder4.dylib +0 -0
  56. torchcodec/libtorchcodec_decoder5.dylib +0 -0
  57. torchcodec/libtorchcodec_decoder6.dylib +0 -0
  58. torchcodec/libtorchcodec_decoder7.dylib +0 -0
  59. torchcodec-0.3.0.dist-info/RECORD +0 -59
  60. {torchcodec-0.3.0.dist-info → torchcodec-0.5.dist-info/licenses}/LICENSE +0 -0
  61. {torchcodec-0.3.0.dist-info → torchcodec-0.5.dist-info}/top_level.txt +0 -0
Binary file
Binary file
@@ -11,6 +11,7 @@ namespace facebook::torchcodec {
11
11
 
12
12
  void AVIOContextHolder::createAVIOContext(
13
13
  AVIOReadFunction read,
14
+ AVIOWriteFunction write,
14
15
  AVIOSeekFunction seek,
15
16
  void* heldData,
16
17
  int bufferSize) {
@@ -22,13 +23,17 @@ void AVIOContextHolder::createAVIOContext(
22
23
  buffer != nullptr,
23
24
  "Failed to allocate buffer of size " + std::to_string(bufferSize));
24
25
 
25
- avioContext_.reset(avio_alloc_context(
26
+ TORCH_CHECK(
27
+ (seek != nullptr) && ((write != nullptr) ^ (read != nullptr)),
28
+ "seek method must be defined, and either write or read must be defined. "
29
+ "But not both!")
30
+ avioContext_.reset(avioAllocContext(
26
31
  buffer,
27
32
  bufferSize,
28
- 0,
33
+ /*write_flag=*/write != nullptr,
29
34
  heldData,
30
35
  read,
31
- nullptr, // write function; not supported yet
36
+ write,
32
37
  seek));
33
38
 
34
39
  if (!avioContext_) {
@@ -19,16 +19,17 @@ namespace facebook::torchcodec {
19
19
  // freed.
20
20
  // 2. It is a base class for AVIOContext specializations. When specializing a
21
21
  // AVIOContext, we need to provide four things:
22
- // 1. A read callback function.
23
- // 2. A seek callback function.
24
- // 3. A write callback function. (Not supported yet; it's for encoding.)
22
+ // 1. A read callback function, for decoding.
23
+ // 2. A seek callback function, for decoding and encoding.
24
+ // 3. A write callback function, for encoding.
25
25
  // 4. A pointer to some context object that has the same lifetime as the
26
26
  // AVIOContext itself. This context object holds the custom state that
27
27
  // tracks the custom behavior of reading, seeking and writing. It is
28
28
  // provided upon AVIOContext creation and to the read, seek and
29
29
  // write callback functions.
30
- // While it's not required, it is natural for the derived classes to make
31
- // all of the above members. Base classes need to call
30
+ // The callback functions do not need to be members of the derived class,
31
+ // but the derived class must have access to them. The context object must
32
+ // be a member of the derived class. Derived classes need to call
32
33
  // createAVIOContext(), ideally in their constructor.
33
34
  // 3. A generic handle for those that just need to manage having access to an
34
35
  // AVIOContext, but aren't necessarily concerned with how it was customized:
@@ -44,13 +45,10 @@ class AVIOContextHolder {
44
45
  // enforced by having a pure virtual methods, but we don't have any.)
45
46
  AVIOContextHolder() = default;
46
47
 
47
- // These signatures are defined by FFmpeg.
48
- using AVIOReadFunction = int (*)(void*, uint8_t*, int);
49
- using AVIOSeekFunction = int64_t (*)(void*, int64_t, int);
50
-
51
48
  // Deriving classes should call this function in their constructor.
52
49
  void createAVIOContext(
53
50
  AVIOReadFunction read,
51
+ AVIOWriteFunction write,
54
52
  AVIOSeekFunction seek,
55
53
  void* heldData,
56
54
  int bufferSize = defaultBufferSize);
@@ -23,7 +23,7 @@ AVIOFileLikeContext::AVIOFileLikeContext(py::object fileLike)
23
23
  py::hasattr(fileLike, "seek"),
24
24
  "File like object must implement a seek method.");
25
25
  }
26
- createAVIOContext(&read, &seek, &fileLike_);
26
+ createAVIOContext(&read, nullptr, &seek, &fileLike_);
27
27
  }
28
28
 
29
29
  int AVIOFileLikeContext::read(void* opaque, uint8_t* buf, int buf_size) {
@@ -0,0 +1,121 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "src/torchcodec/_core/AVIOTensorContext.h"
8
+ #include <torch/types.h>
9
+
10
+ namespace facebook::torchcodec {
11
+
12
+ namespace {
13
+
14
+ constexpr int64_t INITIAL_TENSOR_SIZE = 10'000'000; // 10 MB
15
+ constexpr int64_t MAX_TENSOR_SIZE = 320'000'000; // 320 MB
16
+
17
+ // The signature of this function is defined by FFMPEG.
18
+ int read(void* opaque, uint8_t* buf, int buf_size) {
19
+ auto tensorContext = static_cast<detail::TensorContext*>(opaque);
20
+ TORCH_CHECK(
21
+ tensorContext->current <= tensorContext->data.numel(),
22
+ "Tried to read outside of the buffer: current=",
23
+ tensorContext->current,
24
+ ", size=",
25
+ tensorContext->data.numel());
26
+
27
+ int64_t numBytesRead = std::min(
28
+ static_cast<int64_t>(buf_size),
29
+ tensorContext->data.numel() - tensorContext->current);
30
+
31
+ TORCH_CHECK(
32
+ numBytesRead >= 0,
33
+ "Tried to read negative bytes: numBytesRead=",
34
+ numBytesRead,
35
+ ", size=",
36
+ tensorContext->data.numel(),
37
+ ", current=",
38
+ tensorContext->current);
39
+
40
+ if (numBytesRead == 0) {
41
+ return AVERROR_EOF;
42
+ }
43
+
44
+ std::memcpy(
45
+ buf,
46
+ tensorContext->data.data_ptr<uint8_t>() + tensorContext->current,
47
+ numBytesRead);
48
+ tensorContext->current += numBytesRead;
49
+ return numBytesRead;
50
+ }
51
+
52
+ // The signature of this function is defined by FFMPEG.
53
+ int write(void* opaque, const uint8_t* buf, int buf_size) {
54
+ auto tensorContext = static_cast<detail::TensorContext*>(opaque);
55
+
56
+ int64_t bufSize = static_cast<int64_t>(buf_size);
57
+ if (tensorContext->current + bufSize > tensorContext->data.numel()) {
58
+ TORCH_CHECK(
59
+ tensorContext->data.numel() * 2 <= MAX_TENSOR_SIZE,
60
+ "We tried to allocate an output encoded tensor larger than ",
61
+ MAX_TENSOR_SIZE,
62
+ " bytes. If you think this should be supported, please report.");
63
+
64
+ // We double the size of the outpout tensor. Calling cat() may not be the
65
+ // most efficient, but it's simple.
66
+ tensorContext->data =
67
+ torch::cat({tensorContext->data, tensorContext->data});
68
+ }
69
+
70
+ TORCH_CHECK(
71
+ tensorContext->current + bufSize <= tensorContext->data.numel(),
72
+ "Re-allocation of the output tensor didn't work. ",
73
+ "This should not happen, please report on TorchCodec bug tracker");
74
+
75
+ uint8_t* outputTensorData = tensorContext->data.data_ptr<uint8_t>();
76
+ std::memcpy(outputTensorData + tensorContext->current, buf, bufSize);
77
+ tensorContext->current += bufSize;
78
+ return buf_size;
79
+ }
80
+
81
+ // The signature of this function is defined by FFMPEG.
82
+ int64_t seek(void* opaque, int64_t offset, int whence) {
83
+ auto tensorContext = static_cast<detail::TensorContext*>(opaque);
84
+ int64_t ret = -1;
85
+
86
+ switch (whence) {
87
+ case AVSEEK_SIZE:
88
+ ret = tensorContext->data.numel();
89
+ break;
90
+ case SEEK_SET:
91
+ tensorContext->current = offset;
92
+ ret = offset;
93
+ break;
94
+ default:
95
+ break;
96
+ }
97
+
98
+ return ret;
99
+ }
100
+
101
+ } // namespace
102
+
103
+ AVIOFromTensorContext::AVIOFromTensorContext(torch::Tensor data)
104
+ : tensorContext_{data, 0} {
105
+ TORCH_CHECK(data.numel() > 0, "data must not be empty");
106
+ TORCH_CHECK(data.is_contiguous(), "data must be contiguous");
107
+ TORCH_CHECK(data.scalar_type() == torch::kUInt8, "data must be kUInt8");
108
+ createAVIOContext(&read, nullptr, &seek, &tensorContext_);
109
+ }
110
+
111
+ AVIOToTensorContext::AVIOToTensorContext()
112
+ : tensorContext_{torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}), 0} {
113
+ createAVIOContext(nullptr, &write, &seek, &tensorContext_);
114
+ }
115
+
116
+ torch::Tensor AVIOToTensorContext::getOutputTensor() {
117
+ return tensorContext_.data.narrow(
118
+ /*dim=*/0, /*start=*/0, /*length=*/tensorContext_.current);
119
+ }
120
+
121
+ } // namespace facebook::torchcodec
@@ -0,0 +1,43 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <torch/types.h>
10
+ #include "src/torchcodec/_core/AVIOContextHolder.h"
11
+
12
+ namespace facebook::torchcodec {
13
+
14
+ namespace detail {
15
+
16
+ struct TensorContext {
17
+ torch::Tensor data;
18
+ int64_t current;
19
+ };
20
+
21
+ } // namespace detail
22
+
23
+ // For Decoding: enables users to pass in the entire video or audio as bytes.
24
+ // Our read and seek functions then traverse the bytes in memory.
25
+ class AVIOFromTensorContext : public AVIOContextHolder {
26
+ public:
27
+ explicit AVIOFromTensorContext(torch::Tensor data);
28
+
29
+ private:
30
+ detail::TensorContext tensorContext_;
31
+ };
32
+
33
+ // For Encoding: used to encode into an output uint8 (bytes) tensor.
34
+ class AVIOToTensorContext : public AVIOContextHolder {
35
+ public:
36
+ explicit AVIOToTensorContext();
37
+ torch::Tensor getOutputTensor();
38
+
39
+ private:
40
+ detail::TensorContext tensorContext_;
41
+ };
42
+
43
+ } // namespace facebook::torchcodec
@@ -8,7 +8,13 @@ find_package(pybind11 REQUIRED)
8
8
  find_package(Torch REQUIRED)
9
9
  find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
10
10
 
11
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
11
+ if(DEFINED TORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR AND TORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR)
12
+ set(TORCHCODEC_WERROR_OPTION "")
13
+ else()
14
+ set(TORCHCODEC_WERROR_OPTION "-Werror")
15
+ endif()
16
+
17
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic ${TORCHCODEC_WERROR_OPTION} ${TORCH_CXX_FLAGS}")
12
18
 
13
19
  function(make_torchcodec_sublibrary
14
20
  library_name
@@ -42,62 +48,63 @@ function(make_torchcodec_libraries
42
48
  # We create three shared libraries per version of FFmpeg, where the version
43
49
  # is denoted by N:
44
50
  #
45
- # 1. libtorchcodec_decoderN.{ext}: Base library which contains the
51
+ # 1. libtorchcodec_coreN.{ext}: Base library which contains the
46
52
  # implementation of VideoDecoder and everything VideoDecoder needs. On
47
53
  # Linux, {ext} is so. On Mac, it is dylib.
48
54
  #
49
55
  # 2. libtorchcodec_custom_opsN.{ext}: Implementation of the PyTorch custom
50
- # ops. Depends on libtorchcodec_decoderN.{ext}. On Linux, {ext} is so.
56
+ # ops. Depends on libtorchcodec_coreN.{ext}. On Linux, {ext} is so.
51
57
  # On Mac, it is dylib.
52
58
  #
53
59
  # 3. libtorchcodec_pybind_opsN.{ext}: Implementation of the pybind11 ops. We
54
60
  # keep these separate from the PyTorch custom ops because we have to
55
61
  # load these libraries separately on the Python side. Depends on
56
- # libtorchcodec_decoderN.{ext}. On BOTH Linux and Mac {ext} is so.
62
+ # libtorchcodec_coreN.{ext}. On BOTH Linux and Mac {ext} is so.
57
63
 
58
- # 1. Create libtorchcodec_decoderN.{ext}.
59
- set(decoder_library_name "libtorchcodec_decoder${ffmpeg_major_version}")
60
- set(decoder_sources
64
+ # 1. Create libtorchcodec_coreN.{ext}.
65
+ set(core_library_name "libtorchcodec_core${ffmpeg_major_version}")
66
+ set(core_sources
61
67
  AVIOContextHolder.cpp
68
+ AVIOTensorContext.cpp
62
69
  FFMPEGCommon.cpp
63
- DeviceInterface.cpp
70
+ Frame.cpp
71
+ DeviceInterface.cpp
72
+ CpuDeviceInterface.cpp
64
73
  SingleStreamDecoder.cpp
65
- # TODO: lib name should probably not be "*_decoder*" now that it also
66
- # contains an encoder
67
74
  Encoder.cpp
68
75
  )
69
76
 
70
77
  if(ENABLE_CUDA)
71
- list(APPEND decoder_sources CudaDeviceInterface.cpp)
78
+ list(APPEND core_sources CudaDeviceInterface.cpp)
72
79
  endif()
73
80
 
74
- set(decoder_library_dependencies
81
+ set(core_library_dependencies
75
82
  ${ffmpeg_target}
76
83
  ${TORCH_LIBRARIES}
77
84
  )
78
85
 
79
86
  if(ENABLE_CUDA)
80
- list(APPEND decoder_library_dependencies
87
+ list(APPEND core_library_dependencies
81
88
  ${CUDA_nppi_LIBRARY}
82
89
  ${CUDA_nppicc_LIBRARY}
83
90
  )
84
91
  endif()
85
92
 
86
93
  make_torchcodec_sublibrary(
87
- "${decoder_library_name}"
94
+ "${core_library_name}"
88
95
  SHARED
89
- "${decoder_sources}"
90
- "${decoder_library_dependencies}"
96
+ "${core_sources}"
97
+ "${core_library_dependencies}"
91
98
  )
92
99
 
93
100
  # 2. Create libtorchcodec_custom_opsN.{ext}.
94
101
  set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}")
95
102
  set(custom_ops_sources
96
- AVIOBytesContext.cpp
103
+ AVIOTensorContext.cpp
97
104
  custom_ops.cpp
98
105
  )
99
106
  set(custom_ops_dependencies
100
- ${decoder_library_name}
107
+ ${core_library_name}
101
108
  ${Python3_LIBRARIES}
102
109
  )
103
110
  make_torchcodec_sublibrary(
@@ -114,7 +121,7 @@ function(make_torchcodec_libraries
114
121
  pybind_ops.cpp
115
122
  )
116
123
  set(pybind_ops_dependencies
117
- ${decoder_library_name}
124
+ ${core_library_name}
118
125
  pybind11::module # This library dependency makes sure we have the right
119
126
  # Python libraries included as well as all of the right
120
127
  # settings so that we can successfully load the shared
@@ -142,19 +149,28 @@ function(make_torchcodec_libraries
142
149
  PUBLIC
143
150
  "-fvisibility=hidden"
144
151
  )
152
+ # The value we use here must match the value we return from
153
+ # _get_pybind_ops_module_name() on the Python side. If the values do not
154
+ # match, then we will be unable to import the C++ shared library as a
155
+ # Python module at runtime.
156
+ target_compile_definitions(
157
+ ${pybind_ops_library_name}
158
+ PRIVATE
159
+ PYBIND_OPS_MODULE_NAME=core_pybind_ops
160
+ )
145
161
  # If we don't make sure this flag is set, we run into segfauls at import
146
162
  # time on Mac. See:
147
163
  # https://github.com/pybind/pybind11/issues/3907#issuecomment-1170412764
148
164
  target_link_options(
149
165
  ${pybind_ops_library_name}
150
166
  PUBLIC
151
- "-undefined dynamic_lookup"
167
+ "LINKER:-undefined,dynamic_lookup"
152
168
  )
153
169
 
154
170
  # Install all libraries.
155
171
  set(
156
172
  all_libraries
157
- ${decoder_library_name}
173
+ ${core_library_name}
158
174
  ${custom_ops_library_name}
159
175
  ${pybind_ops_library_name}
160
176
  )
@@ -231,7 +247,7 @@ else()
231
247
  # Expose these values updwards so that the test compilation does not need
232
248
  # to re-figure it out. FIXME: it's not great that we just copy-paste the
233
249
  # library names.
234
- set(libtorchcodec_library_name "libtorchcodec_decoder${ffmpeg_major_version}" PARENT_SCOPE)
250
+ set(libtorchcodec_library_name "libtorchcodec_core${ffmpeg_major_version}" PARENT_SCOPE)
235
251
  set(libtorchcodec_custom_ops_name "libtorchcodec_custom_ops${ffmpeg_major_version}" PARENT_SCOPE)
236
252
  set(libav_include_dirs ${LIBAV_INCLUDE_DIRS} PARENT_SCOPE)
237
253
  endif()