torchcodec 0.8.0__cp310-cp310-win_amd64.whl → 0.8.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (55) hide show
  1. torchcodec/_core/AVIOTensorContext.cpp +23 -16
  2. torchcodec/_core/AVIOTensorContext.h +2 -1
  3. torchcodec/_core/BetaCudaDeviceInterface.cpp +168 -86
  4. torchcodec/_core/BetaCudaDeviceInterface.h +7 -5
  5. torchcodec/_core/CMakeLists.txt +1 -19
  6. torchcodec/_core/CUDACommon.cpp +21 -6
  7. torchcodec/_core/CUDACommon.h +6 -1
  8. torchcodec/_core/Cache.h +6 -20
  9. torchcodec/_core/CpuDeviceInterface.cpp +7 -1
  10. torchcodec/_core/CpuDeviceInterface.h +4 -1
  11. torchcodec/_core/CudaDeviceInterface.cpp +19 -11
  12. torchcodec/_core/CudaDeviceInterface.h +6 -1
  13. torchcodec/_core/DeviceInterface.h +27 -27
  14. torchcodec/_core/Encoder.cpp +51 -7
  15. torchcodec/_core/Encoder.h +12 -1
  16. torchcodec/_core/FFMPEGCommon.cpp +1 -1
  17. torchcodec/_core/FFMPEGCommon.h +9 -1
  18. torchcodec/_core/FilterGraph.cpp +2 -1
  19. torchcodec/_core/Frame.cpp +5 -0
  20. torchcodec/_core/Frame.h +1 -1
  21. torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
  22. torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
  23. torchcodec/_core/NVDECCache.cpp +3 -13
  24. torchcodec/_core/NVDECCache.h +4 -6
  25. torchcodec/_core/SingleStreamDecoder.cpp +22 -31
  26. torchcodec/_core/SingleStreamDecoder.h +4 -2
  27. torchcodec/_core/StreamOptions.h +2 -2
  28. torchcodec/_core/Transform.cpp +27 -0
  29. torchcodec/_core/Transform.h +25 -0
  30. torchcodec/_core/__init__.py +3 -0
  31. torchcodec/_core/custom_ops.cpp +99 -22
  32. torchcodec/_core/ops.py +76 -16
  33. torchcodec/decoders/_video_decoder.py +0 -10
  34. torchcodec/libtorchcodec_core4.dll +0 -0
  35. torchcodec/libtorchcodec_core5.dll +0 -0
  36. torchcodec/libtorchcodec_core6.dll +0 -0
  37. torchcodec/libtorchcodec_core7.dll +0 -0
  38. torchcodec/libtorchcodec_core8.dll +0 -0
  39. torchcodec/libtorchcodec_custom_ops4.dll +0 -0
  40. torchcodec/libtorchcodec_custom_ops5.dll +0 -0
  41. torchcodec/libtorchcodec_custom_ops6.dll +0 -0
  42. torchcodec/libtorchcodec_custom_ops7.dll +0 -0
  43. torchcodec/libtorchcodec_custom_ops8.dll +0 -0
  44. torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
  45. torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
  46. torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
  47. torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
  48. torchcodec/libtorchcodec_pybind_ops8.pyd +0 -0
  49. torchcodec/version.py +1 -1
  50. {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/METADATA +6 -4
  51. torchcodec-0.8.1.dist-info/RECORD +82 -0
  52. torchcodec-0.8.0.dist-info/RECORD +0 -80
  53. {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/WHEEL +0 -0
  54. {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/licenses/LICENSE +0 -0
  55. {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/top_level.txt +0 -0
torchcodec/_core/Cache.h CHANGED
@@ -95,30 +95,16 @@ class PerGpuCache {
95
95
  std::vector<std::unique_ptr<Cache<T, D>>> cache_;
96
96
  };
97
97
 
98
- // Note: this function is inline for convenience, not performance. Because the
99
- // rest of this file is template functions, they must all be defined in this
100
- // header. This function is not a template function, and should, in principle,
101
- // be defined in a .cpp file to preserve the One Definition Rule. That's
102
- // annoying for such a small amount of code, so we just inline it. If this file
103
- // grows, and there are more such functions, we should break them out into a
104
- // .cpp file.
105
- inline torch::DeviceIndex getNonNegativeDeviceIndex(
106
- const torch::Device& device) {
107
- torch::DeviceIndex deviceIndex = device.index();
108
- // For single GPU machines libtorch returns -1 for the device index. So for
109
- // that case we set the device index to 0. That's used in per-gpu cache
110
- // implementation and during initialization of CUDA and FFmpeg contexts
111
- // which require non negative indices.
112
- deviceIndex = std::max<at::DeviceIndex>(deviceIndex, 0);
113
- TORCH_CHECK(deviceIndex >= 0, "Device index out of range");
114
- return deviceIndex;
115
- }
98
+ // Forward declaration of getDeviceIndex which exists in CUDACommon.h
99
+ // This avoids circular dependency between Cache.h and CUDACommon.cpp which also
100
+ // needs to include Cache.h
101
+ int getDeviceIndex(const torch::Device& device);
116
102
 
117
103
  template <typename T, typename D>
118
104
  bool PerGpuCache<T, D>::addIfCacheHasCapacity(
119
105
  const torch::Device& device,
120
106
  element_type&& obj) {
121
- torch::DeviceIndex deviceIndex = getNonNegativeDeviceIndex(device);
107
+ int deviceIndex = getDeviceIndex(device);
122
108
  TORCH_CHECK(
123
109
  static_cast<size_t>(deviceIndex) < cache_.size(),
124
110
  "Device index out of range");
@@ -128,7 +114,7 @@ bool PerGpuCache<T, D>::addIfCacheHasCapacity(
128
114
  template <typename T, typename D>
129
115
  typename PerGpuCache<T, D>::element_type PerGpuCache<T, D>::get(
130
116
  const torch::Device& device) {
131
- torch::DeviceIndex deviceIndex = getNonNegativeDeviceIndex(device);
117
+ int deviceIndex = getDeviceIndex(device);
132
118
  TORCH_CHECK(
133
119
  static_cast<size_t>(deviceIndex) < cache_.size(),
134
120
  "Device index out of range");
@@ -48,8 +48,10 @@ CpuDeviceInterface::CpuDeviceInterface(const torch::Device& device)
48
48
 
49
49
  void CpuDeviceInterface::initialize(
50
50
  const AVStream* avStream,
51
- [[maybe_unused]] const UniqueDecodingAVFormatContext& avFormatCtx) {
51
+ [[maybe_unused]] const UniqueDecodingAVFormatContext& avFormatCtx,
52
+ const SharedAVCodecContext& codecContext) {
52
53
  TORCH_CHECK(avStream != nullptr, "avStream is null");
54
+ codecContext_ = codecContext;
53
55
  timeBase_ = avStream->time_base;
54
56
  }
55
57
 
@@ -344,4 +346,8 @@ torch::Tensor CpuDeviceInterface::convertAVFrameToTensorUsingFilterGraph(
344
346
  return rgbAVFrameToTensor(filterGraph_->convert(avFrame));
345
347
  }
346
348
 
349
+ std::string CpuDeviceInterface::getDetails() {
350
+ return std::string("CPU Device Interface.");
351
+ }
352
+
347
353
  } // namespace facebook::torchcodec
@@ -25,7 +25,8 @@ class CpuDeviceInterface : public DeviceInterface {
25
25
 
26
26
  virtual void initialize(
27
27
  const AVStream* avStream,
28
- const UniqueDecodingAVFormatContext& avFormatCtx) override;
28
+ const UniqueDecodingAVFormatContext& avFormatCtx,
29
+ const SharedAVCodecContext& codecContext) override;
29
30
 
30
31
  virtual void initializeVideo(
31
32
  const VideoStreamOptions& videoStreamOptions,
@@ -38,6 +39,8 @@ class CpuDeviceInterface : public DeviceInterface {
38
39
  std::optional<torch::Tensor> preAllocatedOutputTensor =
39
40
  std::nullopt) override;
40
41
 
42
+ std::string getDetails() override;
43
+
41
44
  private:
42
45
  int convertAVFrameToTensorUsingSwScale(
43
46
  const UniqueAVFrame& avFrame,
@@ -32,9 +32,6 @@ static bool g_cuda = registerDeviceInterface(
32
32
  // from
33
33
  // the cache. If the cache is empty we create a new cuda context.
34
34
 
35
- // Pytorch can only handle up to 128 GPUs.
36
- // https://github.com/pytorch/pytorch/blob/e30c55ee527b40d67555464b9e402b4b7ce03737/c10/cuda/CUDAMacros.h#L44
37
- const int MAX_CUDA_GPUS = 128;
38
35
  // Set to -1 to have an infinitely sized cache. Set it to 0 to disable caching.
39
36
  // Set to a positive number to have a cache of that size.
40
37
  const int MAX_CONTEXTS_PER_GPU_IN_CACHE = -1;
@@ -54,7 +51,7 @@ int getFlagsAVHardwareDeviceContextCreate() {
54
51
  UniqueAVBufferRef getHardwareDeviceContext(const torch::Device& device) {
55
52
  enum AVHWDeviceType type = av_hwdevice_find_type_by_name("cuda");
56
53
  TORCH_CHECK(type != AV_HWDEVICE_TYPE_NONE, "Failed to find cuda device");
57
- torch::DeviceIndex nonNegativeDeviceIndex = getNonNegativeDeviceIndex(device);
54
+ int deviceIndex = getDeviceIndex(device);
58
55
 
59
56
  UniqueAVBufferRef hardwareDeviceCtx = g_cached_hw_device_ctxs.get(device);
60
57
  if (hardwareDeviceCtx) {
@@ -63,14 +60,12 @@ UniqueAVBufferRef getHardwareDeviceContext(const torch::Device& device) {
63
60
 
64
61
  // Create hardware device context
65
62
  c10::cuda::CUDAGuard deviceGuard(device);
66
- // Valid values for the argument to cudaSetDevice are 0 to maxDevices - 1:
67
- // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g159587909ffa0791bbe4b40187a4c6bb
68
- // So we ensure the deviceIndex is not negative.
69
63
  // We set the device because we may be called from a different thread than
70
64
  // the one that initialized the cuda context.
71
- cudaSetDevice(nonNegativeDeviceIndex);
65
+ TORCH_CHECK(
66
+ cudaSetDevice(deviceIndex) == cudaSuccess, "Failed to set CUDA device");
72
67
  AVBufferRef* hardwareDeviceCtxRaw = nullptr;
73
- std::string deviceOrdinal = std::to_string(nonNegativeDeviceIndex);
68
+ std::string deviceOrdinal = std::to_string(deviceIndex);
74
69
 
75
70
  int err = av_hwdevice_ctx_create(
76
71
  &hardwareDeviceCtxRaw,
@@ -117,15 +112,17 @@ CudaDeviceInterface::~CudaDeviceInterface() {
117
112
 
118
113
  void CudaDeviceInterface::initialize(
119
114
  const AVStream* avStream,
120
- const UniqueDecodingAVFormatContext& avFormatCtx) {
115
+ const UniqueDecodingAVFormatContext& avFormatCtx,
116
+ const SharedAVCodecContext& codecContext) {
121
117
  TORCH_CHECK(avStream != nullptr, "avStream is null");
118
+ codecContext_ = codecContext;
122
119
  timeBase_ = avStream->time_base;
123
120
 
124
121
  // TODO: Ideally, we should keep all interface implementations independent.
125
122
  cpuInterface_ = createDeviceInterface(torch::kCPU);
126
123
  TORCH_CHECK(
127
124
  cpuInterface_ != nullptr, "Failed to create CPU device interface");
128
- cpuInterface_->initialize(avStream, avFormatCtx);
125
+ cpuInterface_->initialize(avStream, avFormatCtx, codecContext);
129
126
  cpuInterface_->initializeVideo(
130
127
  VideoStreamOptions(),
131
128
  {},
@@ -287,9 +284,12 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput(
287
284
  frameOutput.data = cpuFrameOutput.data.to(device_);
288
285
  }
289
286
 
287
+ usingCPUFallback_ = true;
290
288
  return;
291
289
  }
292
290
 
291
+ usingCPUFallback_ = false;
292
+
293
293
  // Above we checked that the AVFrame was on GPU, but that's not enough, we
294
294
  // also need to check that the AVFrame is in AV_PIX_FMT_NV12 format (8 bits),
295
295
  // because this is what the NPP color conversion routines expect. This SHOULD
@@ -354,4 +354,12 @@ std::optional<const AVCodec*> CudaDeviceInterface::findCodec(
354
354
  return std::nullopt;
355
355
  }
356
356
 
357
+ std::string CudaDeviceInterface::getDetails() {
358
+ // Note: for this interface specifically the fallback is only known after a
359
+ // frame has been decoded, not before: that's when FFmpeg decides to fallback,
360
+ // so we can't know earlier.
361
+ return std::string("FFmpeg CUDA Device Interface. Using ") +
362
+ (usingCPUFallback_ ? "CPU fallback." : "NVDEC.");
363
+ }
364
+
357
365
  } // namespace facebook::torchcodec
@@ -22,7 +22,8 @@ class CudaDeviceInterface : public DeviceInterface {
22
22
 
23
23
  void initialize(
24
24
  const AVStream* avStream,
25
- const UniqueDecodingAVFormatContext& avFormatCtx) override;
25
+ const UniqueDecodingAVFormatContext& avFormatCtx,
26
+ const SharedAVCodecContext& codecContext) override;
26
27
 
27
28
  void initializeVideo(
28
29
  const VideoStreamOptions& videoStreamOptions,
@@ -39,6 +40,8 @@ class CudaDeviceInterface : public DeviceInterface {
39
40
  std::optional<torch::Tensor> preAllocatedOutputTensor =
40
41
  std::nullopt) override;
41
42
 
43
+ std::string getDetails() override;
44
+
42
45
  private:
43
46
  // Our CUDA decoding code assumes NV12 format. In order to handle other
44
47
  // kinds of input, we need to convert them to NV12. Our current implementation
@@ -59,6 +62,8 @@ class CudaDeviceInterface : public DeviceInterface {
59
62
  // maybeConvertAVFrameToNV12().
60
63
  std::unique_ptr<FiltersContext> nv12ConversionContext_;
61
64
  std::unique_ptr<FilterGraph> nv12Conversion_;
65
+
66
+ bool usingCPUFallback_ = false;
62
67
  };
63
68
 
64
69
  } // namespace facebook::torchcodec
@@ -21,7 +21,7 @@ namespace facebook::torchcodec {
21
21
  // Key for device interface registration with device type + variant support
22
22
  struct DeviceInterfaceKey {
23
23
  torch::DeviceType deviceType;
24
- std::string_view variant = "default"; // e.g., "default", "beta", etc.
24
+ std::string_view variant = "ffmpeg"; // e.g., "ffmpeg", "beta", etc.
25
25
 
26
26
  bool operator<(const DeviceInterfaceKey& other) const {
27
27
  if (deviceType != other.deviceType) {
@@ -54,7 +54,8 @@ class DeviceInterface {
54
54
  // Initialize the device with parameters generic to all kinds of decoding.
55
55
  virtual void initialize(
56
56
  const AVStream* avStream,
57
- const UniqueDecodingAVFormatContext& avFormatCtx) = 0;
57
+ const UniqueDecodingAVFormatContext& avFormatCtx,
58
+ const SharedAVCodecContext& codecContext) = 0;
58
59
 
59
60
  // Initialize the device with parameters specific to video decoding. There is
60
61
  // a default empty implementation.
@@ -80,52 +81,51 @@ class DeviceInterface {
80
81
  // Extension points for custom decoding paths
81
82
  // ------------------------------------------
82
83
 
83
- // Override to return true if this device interface can decode packets
84
- // directly. This means that the following two member functions can both
85
- // be called:
86
- //
87
- // 1. sendPacket()
88
- // 2. receiveFrame()
89
- virtual bool canDecodePacketDirectly() const {
90
- return false;
91
- }
92
-
93
- // Moral equivalent of avcodec_send_packet()
94
84
  // Returns AVSUCCESS on success, AVERROR(EAGAIN) if decoder queue full, or
95
85
  // other AVERROR on failure
96
- virtual int sendPacket([[maybe_unused]] ReferenceAVPacket& avPacket) {
86
+ // Default implementation uses FFmpeg directly
87
+ virtual int sendPacket(ReferenceAVPacket& avPacket) {
97
88
  TORCH_CHECK(
98
- false,
99
- "Send/receive packet decoding not implemented for this device interface");
100
- return AVERROR(ENOSYS);
89
+ codecContext_ != nullptr,
90
+ "Codec context not available for default packet sending");
91
+ return avcodec_send_packet(codecContext_.get(), avPacket.get());
101
92
  }
102
93
 
103
94
  // Send an EOF packet to flush the decoder
104
95
  // Returns AVSUCCESS on success, or other AVERROR on failure
96
+ // Default implementation uses FFmpeg directly
105
97
  virtual int sendEOFPacket() {
106
98
  TORCH_CHECK(
107
- false, "Send EOF packet not implemented for this device interface");
108
- return AVERROR(ENOSYS);
99
+ codecContext_ != nullptr,
100
+ "Codec context not available for default EOF packet sending");
101
+ return avcodec_send_packet(codecContext_.get(), nullptr);
109
102
  }
110
103
 
111
- // Moral equivalent of avcodec_receive_frame()
112
104
  // Returns AVSUCCESS on success, AVERROR(EAGAIN) if no frame ready,
113
105
  // AVERROR_EOF if end of stream, or other AVERROR on failure
114
- virtual int receiveFrame([[maybe_unused]] UniqueAVFrame& avFrame) {
106
+ // Default implementation uses FFmpeg directly
107
+ virtual int receiveFrame(UniqueAVFrame& avFrame) {
115
108
  TORCH_CHECK(
116
- false,
117
- "Send/receive packet decoding not implemented for this device interface");
118
- return AVERROR(ENOSYS);
109
+ codecContext_ != nullptr,
110
+ "Codec context not available for default frame receiving");
111
+ return avcodec_receive_frame(codecContext_.get(), avFrame.get());
119
112
  }
120
113
 
121
114
  // Flush remaining frames from decoder
122
115
  virtual void flush() {
123
- // Default implementation is no-op for standard decoders
124
- // Custom decoders can override this method
116
+ TORCH_CHECK(
117
+ codecContext_ != nullptr,
118
+ "Codec context not available for default flushing");
119
+ avcodec_flush_buffers(codecContext_.get());
120
+ }
121
+
122
+ virtual std::string getDetails() {
123
+ return "";
125
124
  }
126
125
 
127
126
  protected:
128
127
  torch::Device device_;
128
+ SharedAVCodecContext codecContext_;
129
129
  };
130
130
 
131
131
  using CreateDeviceInterfaceFn =
@@ -141,7 +141,7 @@ void validateDeviceInterface(
141
141
 
142
142
  std::unique_ptr<DeviceInterface> createDeviceInterface(
143
143
  const torch::Device& device,
144
- const std::string_view variant = "default");
144
+ const std::string_view variant = "ffmpeg");
145
145
 
146
146
  torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame);
147
147
 
@@ -4,10 +4,6 @@
4
4
  #include "src/torchcodec/_core/Encoder.h"
5
5
  #include "torch/types.h"
6
6
 
7
- extern "C" {
8
- #include <libavutil/pixdesc.h>
9
- }
10
-
11
7
  namespace facebook::torchcodec {
12
8
 
13
9
  namespace {
@@ -542,10 +538,17 @@ torch::Tensor validateFrames(const torch::Tensor& frames) {
542
538
  } // namespace
543
539
 
544
540
  VideoEncoder::~VideoEncoder() {
541
+ // TODO-VideoEncoder: Unify destructor with ~AudioEncoder()
545
542
  if (avFormatContext_ && avFormatContext_->pb) {
546
- avio_flush(avFormatContext_->pb);
547
- avio_close(avFormatContext_->pb);
548
- avFormatContext_->pb = nullptr;
543
+ if (avFormatContext_->pb->error == 0) {
544
+ avio_flush(avFormatContext_->pb);
545
+ }
546
+ if (!avioContextHolder_) {
547
+ if (avFormatContext_->pb->error == 0) {
548
+ avio_close(avFormatContext_->pb);
549
+ }
550
+ avFormatContext_->pb = nullptr;
551
+ }
549
552
  }
550
553
  }
551
554
 
@@ -581,6 +584,36 @@ VideoEncoder::VideoEncoder(
581
584
  initializeEncoder(videoStreamOptions);
582
585
  }
583
586
 
587
+ VideoEncoder::VideoEncoder(
588
+ const torch::Tensor& frames,
589
+ int frameRate,
590
+ std::string_view formatName,
591
+ std::unique_ptr<AVIOContextHolder> avioContextHolder,
592
+ const VideoStreamOptions& videoStreamOptions)
593
+ : frames_(validateFrames(frames)),
594
+ inFrameRate_(frameRate),
595
+ avioContextHolder_(std::move(avioContextHolder)) {
596
+ setFFmpegLogLevel();
597
+ // Map mkv -> matroska when used as format name
598
+ formatName = (formatName == "mkv") ? "matroska" : formatName;
599
+ AVFormatContext* avFormatContext = nullptr;
600
+ int status = avformat_alloc_output_context2(
601
+ &avFormatContext, nullptr, formatName.data(), nullptr);
602
+
603
+ TORCH_CHECK(
604
+ avFormatContext != nullptr,
605
+ "Couldn't allocate AVFormatContext. ",
606
+ "Check the desired format? Got format=",
607
+ formatName,
608
+ ". ",
609
+ getFFMPEGErrorStringFromErrorCode(status));
610
+ avFormatContext_.reset(avFormatContext);
611
+
612
+ avFormatContext_->pb = avioContextHolder_->getAVIOContext();
613
+
614
+ initializeEncoder(videoStreamOptions);
615
+ }
616
+
584
617
  void VideoEncoder::initializeEncoder(
585
618
  const VideoStreamOptions& videoStreamOptions) {
586
619
  const AVCodec* avCodec =
@@ -751,6 +784,17 @@ UniqueAVFrame VideoEncoder::convertTensorToAVFrame(
751
784
  return avFrame;
752
785
  }
753
786
 
787
+ torch::Tensor VideoEncoder::encodeToTensor() {
788
+ TORCH_CHECK(
789
+ avioContextHolder_ != nullptr,
790
+ "Cannot encode to tensor, avio tensor context doesn't exist.");
791
+ encode();
792
+ auto avioToTensorContext =
793
+ dynamic_cast<AVIOToTensorContext*>(avioContextHolder_.get());
794
+ TORCH_CHECK(avioToTensorContext != nullptr, "Invalid AVIO context holder.");
795
+ return avioToTensorContext->getOutputTensor();
796
+ }
797
+
754
798
  void VideoEncoder::encodeFrame(
755
799
  AutoAVPacket& autoAVPacket,
756
800
  const UniqueAVFrame& avFrame) {
@@ -141,8 +141,17 @@ class VideoEncoder {
141
141
  std::string_view fileName,
142
142
  const VideoStreamOptions& videoStreamOptions);
143
143
 
144
+ VideoEncoder(
145
+ const torch::Tensor& frames,
146
+ int frameRate,
147
+ std::string_view formatName,
148
+ std::unique_ptr<AVIOContextHolder> avioContextHolder,
149
+ const VideoStreamOptions& videoStreamOptions);
150
+
144
151
  void encode();
145
152
 
153
+ torch::Tensor encodeToTensor();
154
+
146
155
  private:
147
156
  void initializeEncoder(const VideoStreamOptions& videoStreamOptions);
148
157
  UniqueAVFrame convertTensorToAVFrame(
@@ -153,7 +162,7 @@ class VideoEncoder {
153
162
 
154
163
  UniqueEncodingAVFormatContext avFormatContext_;
155
164
  UniqueAVCodecContext avCodecContext_;
156
- AVStream* avStream_;
165
+ AVStream* avStream_ = nullptr;
157
166
  UniqueSwsContext swsContext_;
158
167
 
159
168
  const torch::Tensor frames_;
@@ -167,6 +176,8 @@ class VideoEncoder {
167
176
  int outHeight_ = -1;
168
177
  AVPixelFormat outPixelFormat_ = AV_PIX_FMT_NONE;
169
178
 
179
+ std::unique_ptr<AVIOContextHolder> avioContextHolder_;
180
+
170
181
  bool encodeWasCalled_ = false;
171
182
  };
172
183
 
@@ -149,7 +149,7 @@ int getNumChannels(const UniqueAVFrame& avFrame) {
149
149
  #endif
150
150
  }
151
151
 
152
- int getNumChannels(const UniqueAVCodecContext& avCodecContext) {
152
+ int getNumChannels(const SharedAVCodecContext& avCodecContext) {
153
153
  #if LIBAVFILTER_VERSION_MAJOR > 8 || \
154
154
  (LIBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
155
155
  return avCodecContext->ch_layout.nb_channels;
@@ -71,6 +71,14 @@ using UniqueEncodingAVFormatContext = std::unique_ptr<
71
71
  using UniqueAVCodecContext = std::unique_ptr<
72
72
  AVCodecContext,
73
73
  Deleterp<AVCodecContext, void, avcodec_free_context>>;
74
+ using SharedAVCodecContext = std::shared_ptr<AVCodecContext>;
75
+
76
+ // create SharedAVCodecContext with custom deleter
77
+ inline SharedAVCodecContext makeSharedAVCodecContext(AVCodecContext* ctx) {
78
+ return SharedAVCodecContext(
79
+ ctx, Deleterp<AVCodecContext, void, avcodec_free_context>{});
80
+ }
81
+
74
82
  using UniqueAVFrame =
75
83
  std::unique_ptr<AVFrame, Deleterp<AVFrame, void, av_frame_free>>;
76
84
  using UniqueAVFilterGraph = std::unique_ptr<
@@ -171,7 +179,7 @@ const AVSampleFormat* getSupportedOutputSampleFormats(const AVCodec& avCodec);
171
179
  const AVPixelFormat* getSupportedPixelFormats(const AVCodec& avCodec);
172
180
 
173
181
  int getNumChannels(const UniqueAVFrame& avFrame);
174
- int getNumChannels(const UniqueAVCodecContext& avCodecContext);
182
+ int getNumChannels(const SharedAVCodecContext& avCodecContext);
175
183
 
176
184
  void setDefaultChannelLayout(
177
185
  UniqueAVCodecContext& avCodecContext,
@@ -130,7 +130,8 @@ FilterGraph::FilterGraph(
130
130
  TORCH_CHECK(
131
131
  status >= 0,
132
132
  "Failed to configure filter graph: ",
133
- getFFMPEGErrorStringFromErrorCode(status));
133
+ getFFMPEGErrorStringFromErrorCode(status),
134
+ ", provided filters: " + filtersContext.filtergraphStr);
134
135
  }
135
136
 
136
137
  UniqueAVFrame FilterGraph::convert(const UniqueAVFrame& avFrame) {
@@ -8,6 +8,11 @@
8
8
 
9
9
  namespace facebook::torchcodec {
10
10
 
11
+ FrameDims::FrameDims(int height, int width) : height(height), width(width) {
12
+ TORCH_CHECK(height > 0, "FrameDims.height must be > 0, got: ", height);
13
+ TORCH_CHECK(width > 0, "FrameDims.width must be > 0, got: ", width);
14
+ }
15
+
11
16
  FrameBatchOutput::FrameBatchOutput(
12
17
  int64_t numFrames,
13
18
  const FrameDims& outputDims,
torchcodec/_core/Frame.h CHANGED
@@ -19,7 +19,7 @@ struct FrameDims {
19
19
 
20
20
  FrameDims() = default;
21
21
 
22
- FrameDims(int h, int w) : height(h), width(w) {}
22
+ FrameDims(int h, int w);
23
23
  };
24
24
 
25
25
  // All public video decoding entry points return either a FrameOutput or a