torchcodec 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. torchcodec/__init__.py +27 -0
  2. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  3. torchcodec/_core/AVIOContextHolder.h +64 -0
  4. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  5. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  6. torchcodec/_core/AVIOTensorContext.cpp +130 -0
  7. torchcodec/_core/AVIOTensorContext.h +44 -0
  8. torchcodec/_core/BetaCudaDeviceInterface.cpp +849 -0
  9. torchcodec/_core/BetaCudaDeviceInterface.h +196 -0
  10. torchcodec/_core/CMakeLists.txt +295 -0
  11. torchcodec/_core/CUDACommon.cpp +330 -0
  12. torchcodec/_core/CUDACommon.h +51 -0
  13. torchcodec/_core/Cache.h +124 -0
  14. torchcodec/_core/CpuDeviceInterface.cpp +509 -0
  15. torchcodec/_core/CpuDeviceInterface.h +141 -0
  16. torchcodec/_core/CudaDeviceInterface.cpp +602 -0
  17. torchcodec/_core/CudaDeviceInterface.h +79 -0
  18. torchcodec/_core/DeviceInterface.cpp +117 -0
  19. torchcodec/_core/DeviceInterface.h +191 -0
  20. torchcodec/_core/Encoder.cpp +1054 -0
  21. torchcodec/_core/Encoder.h +192 -0
  22. torchcodec/_core/FFMPEGCommon.cpp +684 -0
  23. torchcodec/_core/FFMPEGCommon.h +314 -0
  24. torchcodec/_core/FilterGraph.cpp +159 -0
  25. torchcodec/_core/FilterGraph.h +59 -0
  26. torchcodec/_core/Frame.cpp +47 -0
  27. torchcodec/_core/Frame.h +72 -0
  28. torchcodec/_core/Metadata.cpp +124 -0
  29. torchcodec/_core/Metadata.h +92 -0
  30. torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
  31. torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
  32. torchcodec/_core/NVDECCache.cpp +60 -0
  33. torchcodec/_core/NVDECCache.h +102 -0
  34. torchcodec/_core/SingleStreamDecoder.cpp +1586 -0
  35. torchcodec/_core/SingleStreamDecoder.h +391 -0
  36. torchcodec/_core/StreamOptions.h +70 -0
  37. torchcodec/_core/Transform.cpp +128 -0
  38. torchcodec/_core/Transform.h +86 -0
  39. torchcodec/_core/ValidationUtils.cpp +35 -0
  40. torchcodec/_core/ValidationUtils.h +21 -0
  41. torchcodec/_core/__init__.py +46 -0
  42. torchcodec/_core/_metadata.py +262 -0
  43. torchcodec/_core/custom_ops.cpp +1090 -0
  44. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +169 -0
  45. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  46. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  47. torchcodec/_core/ops.py +605 -0
  48. torchcodec/_core/pybind_ops.cpp +50 -0
  49. torchcodec/_frame.py +146 -0
  50. torchcodec/_internally_replaced_utils.py +68 -0
  51. torchcodec/_samplers/__init__.py +7 -0
  52. torchcodec/_samplers/video_clip_sampler.py +419 -0
  53. torchcodec/decoders/__init__.py +12 -0
  54. torchcodec/decoders/_audio_decoder.py +185 -0
  55. torchcodec/decoders/_decoder_utils.py +113 -0
  56. torchcodec/decoders/_video_decoder.py +601 -0
  57. torchcodec/encoders/__init__.py +2 -0
  58. torchcodec/encoders/_audio_encoder.py +149 -0
  59. torchcodec/encoders/_video_encoder.py +196 -0
  60. torchcodec/libtorchcodec_core4.so +0 -0
  61. torchcodec/libtorchcodec_core5.so +0 -0
  62. torchcodec/libtorchcodec_core6.so +0 -0
  63. torchcodec/libtorchcodec_core7.so +0 -0
  64. torchcodec/libtorchcodec_core8.so +0 -0
  65. torchcodec/libtorchcodec_custom_ops4.so +0 -0
  66. torchcodec/libtorchcodec_custom_ops5.so +0 -0
  67. torchcodec/libtorchcodec_custom_ops6.so +0 -0
  68. torchcodec/libtorchcodec_custom_ops7.so +0 -0
  69. torchcodec/libtorchcodec_custom_ops8.so +0 -0
  70. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  71. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  72. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  73. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  74. torchcodec/libtorchcodec_pybind_ops8.so +0 -0
  75. torchcodec/samplers/__init__.py +2 -0
  76. torchcodec/samplers/_common.py +84 -0
  77. torchcodec/samplers/_index_based.py +287 -0
  78. torchcodec/samplers/_time_based.py +358 -0
  79. torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake +76 -0
  80. torchcodec/share/cmake/TorchCodec/ffmpeg_versions.cmake +122 -0
  81. torchcodec/transforms/__init__.py +12 -0
  82. torchcodec/transforms/_decoder_transforms.py +375 -0
  83. torchcodec/version.py +2 -0
  84. torchcodec-0.10.0.dist-info/METADATA +286 -0
  85. torchcodec-0.10.0.dist-info/RECORD +88 -0
  86. torchcodec-0.10.0.dist-info/WHEEL +5 -0
  87. torchcodec-0.10.0.dist-info/licenses/LICENSE +28 -0
  88. torchcodec-0.10.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,192 @@
1
+ #pragma once
2
+ #include <torch/types.h>
3
+ #include <map>
4
+ #include <string>
5
+ #include "AVIOContextHolder.h"
6
+ #include "DeviceInterface.h"
7
+ #include "FFMPEGCommon.h"
8
+ #include "StreamOptions.h"
9
+
10
+ extern "C" {
11
+ #include <libavutil/dict.h>
12
+ }
13
+
14
+ namespace facebook::torchcodec {
15
+ class AudioEncoder {
16
+ public:
17
+ ~AudioEncoder();
18
+
19
+ AudioEncoder(
20
+ const torch::Tensor& samples,
21
+ int sampleRate,
22
+ std::string_view fileName,
23
+ const AudioStreamOptions& audioStreamOptions);
24
+
25
+ AudioEncoder(
26
+ const torch::Tensor& samples,
27
+ int sampleRate,
28
+ std::string_view formatName,
29
+ std::unique_ptr<AVIOContextHolder> avioContextHolder,
30
+ const AudioStreamOptions& audioStreamOptions);
31
+
32
+ void encode();
33
+
34
+ torch::Tensor encodeToTensor();
35
+
36
+ private:
37
+ void initializeEncoder(const AudioStreamOptions& audioStreamOptions);
38
+ UniqueAVFrame maybeConvertAVFrame(const UniqueAVFrame& avFrame);
39
+ void encodeFrameThroughFifo(
40
+ AutoAVPacket& autoAVPacket,
41
+ const UniqueAVFrame& avFrame,
42
+ bool flushFifo = false);
43
+ void encodeFrame(AutoAVPacket& autoAVPacket, const UniqueAVFrame& avFrame);
44
+ void maybeFlushSwrBuffers(AutoAVPacket& autoAVPacket);
45
+ void flushBuffers();
46
+
47
+ UniqueEncodingAVFormatContext avFormatContext_;
48
+ UniqueAVCodecContext avCodecContext_;
49
+ int streamIndex_;
50
+ UniqueSwrContext swrContext_;
51
+ AudioStreamOptions audioStreamOptions;
52
+
53
+ const torch::Tensor samples_;
54
+
55
+ int outNumChannels_ = -1;
56
+ int outSampleRate_ = -1;
57
+ int inSampleRate_ = -1;
58
+
59
+ UniqueAVAudioFifo avAudioFifo_;
60
+
61
+ std::unique_ptr<AVIOContextHolder> avioContextHolder_;
62
+
63
+ bool encodeWasCalled_ = false;
64
+ int64_t lastEncodedAVFramePts_ = 0;
65
+ };
66
+
67
+ /* clang-format off */
68
+ //
69
+ // Note: [Encoding loop, sample rate conversion and FIFO]
70
+ //
71
+ // The input samples are in a given format, sample rate, and number of channels.
72
+ // We may want to change these properties before encoding. The conversion is
73
+ // done in maybeConvertAVFrame() and we rely on libswresample. When sample rate
74
+ // conversion is needed, this means two things:
75
+ // - swr will be storing samples in its internal buffers, which we'll need to
76
+ // flush at the very end of the encoding process.
77
+ // - the converted AVFrame we get back from maybeConvertAVFrame() typically
78
+ // won't have the same number of samples as the original AVFrame. And that's
79
+ // a problem, because some encoders expect AVFrames with a specific and
80
+ // constant number of samples. If we were to send it as-is, we'd get an error
81
+ // in avcodec_send_frame(). In order to feed the encoder with AVFrames
82
+ // with the expected number of samples, we go through an intermediate FIFO
83
+ // from which we can pull the exact number of samples that we need. Note that
84
+ // this involves at least 2 additional copies.
85
+ //
86
+ // To be clear, the FIFO is only used if BOTH the following conditions are met:
87
+ // - sample rate conversion is needed (inSampleRate_ != outSampleRate_)
88
+ // - the encoder expects a specific number of samples per AVFrame (fixed frame size)
89
+ // This is not the case for all encoders, e.g. WAV doesn't care about frame size.
90
+ //
91
+ // Also, the FIFO is either:
92
+ // - used for every single frame during the encoding process, or
93
+ // - not used at all.
94
+ // There is no scenario where a given Encoder() instance would sometimes use a
95
+ // FIFO, sometimes not.
96
+ //
97
+ // Drawing made with https://asciiflow.com/, can be copy/pasted there if it
98
+ // needs editing:
99
+ //
100
+ // ┌─One─iteration─of─main─encoding─loop─(encode())───────────────────────────────────────────┐
101
+ // │ │
102
+ // │ Converts: │
103
+ // │ - num channels │
104
+ // │ - format │
105
+ // │ - sample rate │
106
+ // │ If sample rate is converted, stores data in swr buffers, │
107
+ // │ which will need to be flushed by maybeFlushSwrBuffers() │
108
+ // │ │
109
+ // │ ▲ │
110
+ // │ │ ┌─EncodeFrameThroughFifo()──────────────┐│
111
+ // │ │ │ ││
112
+ // │ AVFrame ──────► MaybeConvertAVFrame()───▲──│─┬──► NO FIFO ─►┬──▲────►encodeFrame() ││
113
+ // │ with │ │ │ │ │ ││
114
+ // │ input │ │ │ │ │ ││
115
+ // │ samples │ │ │ │ │ ││
116
+ // │ │ │ │ │ │ ││
117
+ // │ │ │ └────► FIFO ─►─┘ │ ││
118
+ // │ │ └───────────────────┼───────────────────┘│
119
+ // └──────────────────────────────────────────────┼──────────────────────┼────────────────────┘
120
+ // │ │
121
+ // AVFrame from maybeFlushSwrBuffers() ───┘ │
122
+ // Only if sample rate conversion was needed nullptr, to flush
123
+ // The call to maybeFlushSwrBuffers() will FFmpeg buffers
124
+ // also instruct to flush the FIFO, if it exists.
125
+ //
126
+ //
127
+ //
128
+ /* clang-format on */
129
+
130
+ class VideoEncoder {
131
+ public:
132
+ ~VideoEncoder();
133
+
134
+ // Rule of Five requires that we define copy and move
135
+ // constructors and assignment operators.
136
+ // Both are deleted because we have unique_ptr members
137
+ VideoEncoder(const VideoEncoder&) = delete;
138
+ VideoEncoder& operator=(const VideoEncoder&) = delete;
139
+
140
+ // Move operators deleted since UniqueAVDictionary member is not movable
141
+ VideoEncoder(VideoEncoder&&) = delete;
142
+ VideoEncoder& operator=(VideoEncoder&&) = delete;
143
+
144
+ VideoEncoder(
145
+ const torch::Tensor& frames,
146
+ double frameRate,
147
+ std::string_view fileName,
148
+ const VideoStreamOptions& videoStreamOptions);
149
+
150
+ VideoEncoder(
151
+ const torch::Tensor& frames,
152
+ double frameRate,
153
+ std::string_view formatName,
154
+ std::unique_ptr<AVIOContextHolder> avioContextHolder,
155
+ const VideoStreamOptions& videoStreamOptions);
156
+
157
+ void encode();
158
+
159
+ torch::Tensor encodeToTensor();
160
+
161
+ private:
162
+ void initializeEncoder(const VideoStreamOptions& videoStreamOptions);
163
+ UniqueAVFrame convertTensorToAVFrame(
164
+ const torch::Tensor& frame,
165
+ int frameIndex);
166
+ void encodeFrame(AutoAVPacket& autoAVPacket, const UniqueAVFrame& avFrame);
167
+ void flushBuffers();
168
+
169
+ UniqueEncodingAVFormatContext avFormatContext_;
170
+ UniqueAVCodecContext avCodecContext_;
171
+ AVStream* avStream_ = nullptr;
172
+ UniqueSwsContext swsContext_;
173
+
174
+ const torch::Tensor frames_;
175
+ double inFrameRate_;
176
+
177
+ int inWidth_ = -1;
178
+ int inHeight_ = -1;
179
+ AVPixelFormat inPixelFormat_ = AV_PIX_FMT_NONE;
180
+
181
+ int outWidth_ = -1;
182
+ int outHeight_ = -1;
183
+ AVPixelFormat outPixelFormat_ = AV_PIX_FMT_NONE;
184
+
185
+ std::unique_ptr<AVIOContextHolder> avioContextHolder_;
186
+ std::unique_ptr<DeviceInterface> deviceInterface_;
187
+
188
+ bool encodeWasCalled_ = false;
189
+ UniqueAVDictionary avFormatOptions_;
190
+ };
191
+
192
+ } // namespace facebook::torchcodec