PyPI - torchcodec - Versions diffs - 0.7.0__cp313-cp313-win_amd64.whl → 0.8.1__cp313-cp313-win_amd64.whl - Mend

torchcodec 0.7.0__cp313-cp313-win_amd64.whl → 0.8.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchcodec might be problematic. Click here for more details.

Files changed (66) hide show

torchcodec/_core/AVIOTensorContext.cpp +23 -16
torchcodec/_core/AVIOTensorContext.h +2 -1
torchcodec/_core/BetaCudaDeviceInterface.cpp +718 -0
torchcodec/_core/BetaCudaDeviceInterface.h +193 -0
torchcodec/_core/CMakeLists.txt +18 -3
torchcodec/_core/CUDACommon.cpp +330 -0
torchcodec/_core/CUDACommon.h +51 -0
torchcodec/_core/Cache.h +6 -20
torchcodec/_core/CpuDeviceInterface.cpp +195 -108
torchcodec/_core/CpuDeviceInterface.h +84 -19
torchcodec/_core/CudaDeviceInterface.cpp +227 -376
torchcodec/_core/CudaDeviceInterface.h +38 -6
torchcodec/_core/DeviceInterface.cpp +57 -19
torchcodec/_core/DeviceInterface.h +97 -16
torchcodec/_core/Encoder.cpp +346 -9
torchcodec/_core/Encoder.h +62 -1
torchcodec/_core/FFMPEGCommon.cpp +190 -3
torchcodec/_core/FFMPEGCommon.h +27 -1
torchcodec/_core/FilterGraph.cpp +30 -22
torchcodec/_core/FilterGraph.h +15 -1
torchcodec/_core/Frame.cpp +22 -7
torchcodec/_core/Frame.h +15 -61
torchcodec/_core/Metadata.h +2 -2
torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
torchcodec/_core/NVDECCache.cpp +60 -0
torchcodec/_core/NVDECCache.h +102 -0
torchcodec/_core/SingleStreamDecoder.cpp +196 -201
torchcodec/_core/SingleStreamDecoder.h +42 -15
torchcodec/_core/StreamOptions.h +16 -6
torchcodec/_core/Transform.cpp +87 -0
torchcodec/_core/Transform.h +84 -0
torchcodec/_core/__init__.py +4 -0
torchcodec/_core/custom_ops.cpp +257 -32
torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +61 -1
torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
torchcodec/_core/ops.py +147 -44
torchcodec/_core/pybind_ops.cpp +22 -59
torchcodec/_samplers/video_clip_sampler.py +7 -19
torchcodec/decoders/__init__.py +1 -0
torchcodec/decoders/_decoder_utils.py +61 -1
torchcodec/decoders/_video_decoder.py +46 -20
torchcodec/libtorchcodec_core4.dll +0 -0
torchcodec/libtorchcodec_core5.dll +0 -0
torchcodec/libtorchcodec_core6.dll +0 -0
torchcodec/libtorchcodec_core7.dll +0 -0
torchcodec/libtorchcodec_core8.dll +0 -0
torchcodec/libtorchcodec_custom_ops4.dll +0 -0
torchcodec/libtorchcodec_custom_ops5.dll +0 -0
torchcodec/libtorchcodec_custom_ops6.dll +0 -0
torchcodec/libtorchcodec_custom_ops7.dll +0 -0
torchcodec/libtorchcodec_custom_ops8.dll +0 -0
torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
torchcodec/libtorchcodec_pybind_ops8.pyd +0 -0
torchcodec/samplers/_time_based.py +8 -0
torchcodec/version.py +1 -1
{torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/METADATA +29 -16
torchcodec-0.8.1.dist-info/RECORD +82 -0
{torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/WHEEL +1 -1
torchcodec-0.7.0.dist-info/RECORD +0 -67
{torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/licenses/LICENSE +0 -0
{torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/top_level.txt +0 -0

torchcodec/_core/custom_ops.cpp CHANGED Viewed

@@ -10,6 +10,7 @@
 #include <string>
 #include "c10/core/SymIntArrayRef.h"
 #include "c10/util/Exception.h"
+#include "src/torchcodec/_core/AVIOFileLikeContext.h"
 #include "src/torchcodec/_core/AVIOTensorContext.h"
 #include "src/torchcodec/_core/Encoder.h"
 #include "src/torchcodec/_core/SingleStreamDecoder.h"
@@ -33,13 +34,22 @@ TORCH_LIBRARY(torchcodec_ns, m) {
       "encode_audio_to_file(Tensor samples, int sample_rate, str filename, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
       "encode_audio_to_tensor(Tensor samples, int sample_rate, str format, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> Tensor");
+  m.def(
+      "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
+  m.def(
+      "encode_video_to_file(Tensor frames, int frame_rate, str filename, int? crf=None) -> ()");
+  m.def(
+      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, int? crf=None) -> Tensor");
+  m.def(
+      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, int? crf=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
-  m.def("_convert_to_tensor(int decoder_ptr) -> Tensor");
   m.def(
-      "_add_video_stream(Tensor(a!) decoder, *, int? width=None, int? height=None, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str? device=None, (Tensor, Tensor, Tensor)? custom_frame_mappings=None, str? color_conversion_library=None) -> ()");
+      "_create_from_file_like(int file_like_context, str? seek_mode=None) -> Tensor");
+  m.def(
+      "_add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"ffmpeg\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None, str? color_conversion_library=None) -> ()");
   m.def(
-      "add_video_stream(Tensor(a!) decoder, *, int? width=None, int? height=None, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str? device=None, (Tensor, Tensor, Tensor)? custom_frame_mappings=None) -> ()");
+      "add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"ffmpeg\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None) -> ()");
   m.def(
       "add_audio_stream(Tensor(a!) decoder, *, int? stream_index=None, int? sample_rate=None, int? num_channels=None) -> ()");
   m.def("seek_to_pts(Tensor(a!) decoder, float seconds) -> ()");
@@ -49,7 +59,7 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "get_frame_at_index(Tensor(a!) decoder, *, int frame_index) -> (Tensor, Tensor, Tensor)");
   m.def(
-      "get_frames_at_indices(Tensor(a!) decoder, *, int[] frame_indices) -> (Tensor, Tensor, Tensor)");
+      "get_frames_at_indices(Tensor(a!) decoder, *, Tensor frame_indices) -> (Tensor, Tensor, Tensor)");
   m.def(
       "get_frames_in_range(Tensor(a!) decoder, *, int start, int stop, int? step=None) -> (Tensor, Tensor, Tensor)");
   m.def(
@@ -57,13 +67,14 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "get_frames_by_pts_in_range_audio(Tensor(a!) decoder, *, float start_seconds, float? stop_seconds) -> (Tensor, Tensor)");
   m.def(
-      "get_frames_by_pts(Tensor(a!) decoder, *, float[] timestamps) -> (Tensor, Tensor, Tensor)");
+      "get_frames_by_pts(Tensor(a!) decoder, *, Tensor timestamps) -> (Tensor, Tensor, Tensor)");
   m.def("_get_key_frame_indices(Tensor(a!) decoder) -> Tensor");
   m.def("get_json_metadata(Tensor(a!) decoder) -> str");
   m.def("get_container_json_metadata(Tensor(a!) decoder) -> str");
   m.def(
       "get_stream_json_metadata(Tensor(a!) decoder, int stream_index) -> str");
   m.def("_get_json_ffmpeg_library_versions() -> str");
+  m.def("_get_backend_details(Tensor(a!) decoder) -> str");
   m.def(
       "_test_frame_pts_equality(Tensor(a!) decoder, *, int frame_index, float pts_seconds_to_test) -> bool");
   m.def("scan_all_streams_to_update_metadata(Tensor(a!) decoder) -> ()");
@@ -165,6 +176,103 @@ std::string mapToJson(const std::map<std::string, std::string>& metadataMap) {
   return ss.str();
 }
+SingleStreamDecoder::SeekMode seekModeFromString(std::string_view seekMode) {
+  if (seekMode == "exact") {
+    return SingleStreamDecoder::SeekMode::exact;
+  } else if (seekMode == "approximate") {
+    return SingleStreamDecoder::SeekMode::approximate;
+  } else if (seekMode == "custom_frame_mappings") {
+    return SingleStreamDecoder::SeekMode::custom_frame_mappings;
+  } else {
+    TORCH_CHECK(false, "Invalid seek mode: " + std::string(seekMode));
+  }
+}
+int checkedToPositiveInt(const std::string& str) {
+  int ret = 0;
+  try {
+    ret = std::stoi(str);
+  } catch (const std::invalid_argument&) {
+    TORCH_CHECK(false, "String cannot be converted to an int:" + str);
+  } catch (const std::out_of_range&) {
+    TORCH_CHECK(false, "String would become integer out of range:" + str);
+  }
+  TORCH_CHECK(ret > 0, "String must be a positive integer:" + str);
+  return ret;
+}
+// Resize transform specs take the form:
+//
+//   "resize, <height>, <width>"
+//
+// Where "resize" is the string literal and <height> and <width> are positive
+// integers.
+Transform* makeResizeTransform(
+    const std::vector<std::string>& resizeTransformSpec) {
+  TORCH_CHECK(
+      resizeTransformSpec.size() == 3,
+      "resizeTransformSpec must have 3 elements including its name");
+  int height = checkedToPositiveInt(resizeTransformSpec[1]);
+  int width = checkedToPositiveInt(resizeTransformSpec[2]);
+  return new ResizeTransform(FrameDims(height, width));
+}
+// Crop transform specs take the form:
+//
+//   "crop, <height>, <width>, <x>, <y>"
+//
+// Where "crop" is the string literal and <height>, <width>, <x> and <y> are
+// positive integers. <x> and <y> are the x and y coordinates of the top left
+// corner of the crop. Note that we follow the PyTorch convention of (height,
+// width) for specifying image dimensions; FFmpeg uses (width, height).
+Transform* makeCropTransform(
+    const std::vector<std::string>& cropTransformSpec) {
+  TORCH_CHECK(
+      cropTransformSpec.size() == 5,
+      "cropTransformSpec must have 5 elements including its name");
+  int height = checkedToPositiveInt(cropTransformSpec[1]);
+  int width = checkedToPositiveInt(cropTransformSpec[2]);
+  int x = checkedToPositiveInt(cropTransformSpec[3]);
+  int y = checkedToPositiveInt(cropTransformSpec[4]);
+  return new CropTransform(FrameDims(height, width), x, y);
+}
+std::vector<std::string> split(const std::string& str, char delimiter) {
+  std::vector<std::string> tokens;
+  std::string token;
+  std::istringstream tokenStream(str);
+  while (std::getline(tokenStream, token, delimiter)) {
+    tokens.push_back(token);
+  }
+  return tokens;
+}
+// The transformSpecsRaw string is always in the format:
+//
+//   "name1, param1, param2, ...; name2, param1, param2, ...; ..."
+//
+// Where "nameX" is the name of the transform, and "paramX" are the parameters.
+std::vector<Transform*> makeTransforms(const std::string& transformSpecsRaw) {
+  std::vector<Transform*> transforms;
+  std::vector<std::string> transformSpecs = split(transformSpecsRaw, ';');
+  for (const std::string& transformSpecRaw : transformSpecs) {
+    std::vector<std::string> transformSpec = split(transformSpecRaw, ',');
+    TORCH_CHECK(
+        transformSpec.size() >= 1,
+        "Invalid transform spec: " + transformSpecRaw);
+    auto name = transformSpec[0];
+    if (name == "resize") {
+      transforms.push_back(makeResizeTransform(transformSpec));
+    } else if (name == "crop") {
+      transforms.push_back(makeCropTransform(transformSpec));
+    } else {
+      TORCH_CHECK(false, "Invalid transform name: " + name);
+    }
+  }
+  return transforms;
+}
 } // namespace
 // ==============================
@@ -203,33 +311,47 @@ at::Tensor create_from_tensor(
     realSeek = seekModeFromString(seek_mode.value());
   }
-  auto contextHolder = std::make_unique<AVIOFromTensorContext>(video_tensor);
+  auto avioContextHolder =
+      std::make_unique<AVIOFromTensorContext>(video_tensor);
   std::unique_ptr<SingleStreamDecoder> uniqueDecoder =
-      std::make_unique<SingleStreamDecoder>(std::move(contextHolder), realSeek);
+      std::make_unique<SingleStreamDecoder>(
+          std::move(avioContextHolder), realSeek);
   return wrapDecoderPointerToTensor(std::move(uniqueDecoder));
 }
-at::Tensor _convert_to_tensor(int64_t decoder_ptr) {
-  auto decoder = reinterpret_cast<SingleStreamDecoder*>(decoder_ptr);
-  std::unique_ptr<SingleStreamDecoder> uniqueDecoder(decoder);
+at::Tensor _create_from_file_like(
+    int64_t file_like_context,
+    std::optional<std::string_view> seek_mode) {
+  auto fileLikeContext =
+      reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
+  TORCH_CHECK(
+      fileLikeContext != nullptr, "file_like_context must be a valid pointer");
+  std::unique_ptr<AVIOFileLikeContext> avioContextHolder(fileLikeContext);
+  SingleStreamDecoder::SeekMode realSeek = SingleStreamDecoder::SeekMode::exact;
+  if (seek_mode.has_value()) {
+    realSeek = seekModeFromString(seek_mode.value());
+  }
+  std::unique_ptr<SingleStreamDecoder> uniqueDecoder =
+      std::make_unique<SingleStreamDecoder>(
+          std::move(avioContextHolder), realSeek);
   return wrapDecoderPointerToTensor(std::move(uniqueDecoder));
 }
 void _add_video_stream(
     at::Tensor& decoder,
-    std::optional<int64_t> width = std::nullopt,
-    std::optional<int64_t> height = std::nullopt,
     std::optional<int64_t> num_threads = std::nullopt,
     std::optional<std::string_view> dimension_order = std::nullopt,
     std::optional<int64_t> stream_index = std::nullopt,
-    std::optional<std::string_view> device = std::nullopt,
+    std::string_view device = "cpu",
+    std::string_view device_variant = "ffmpeg",
+    std::string_view transform_specs = "",
     std::optional<std::tuple<at::Tensor, at::Tensor, at::Tensor>>
         custom_frame_mappings = std::nullopt,
     std::optional<std::string_view> color_conversion_library = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
-  videoStreamOptions.width = width;
-  videoStreamOptions.height = height;
   videoStreamOptions.ffmpegThreadCount = num_threads;
   if (dimension_order.has_value()) {
@@ -253,37 +375,46 @@ void _add_video_stream(
           ". color_conversion_library must be either filtergraph or swscale.");
     }
   }
-  if (device.has_value()) {
-    videoStreamOptions.device = createTorchDevice(std::string(device.value()));
-  }
+  validateDeviceInterface(std::string(device), std::string(device_variant));
+  videoStreamOptions.device = torch::Device(std::string(device));
+  videoStreamOptions.deviceVariant = device_variant;
+  std::vector<Transform*> transforms =
+      makeTransforms(std::string(transform_specs));
   std::optional<SingleStreamDecoder::FrameMappings> converted_mappings =
       custom_frame_mappings.has_value()
       ? std::make_optional(makeFrameMappings(custom_frame_mappings.value()))
       : std::nullopt;
   auto videoDecoder = unwrapTensorToGetDecoder(decoder);
   videoDecoder->addVideoStream(
-      stream_index.value_or(-1), videoStreamOptions, converted_mappings);
+      stream_index.value_or(-1),
+      transforms,
+      videoStreamOptions,
+      converted_mappings);
 }
 // Add a new video stream at `stream_index` using the provided options.
 void add_video_stream(
     at::Tensor& decoder,
-    std::optional<int64_t> width = std::nullopt,
-    std::optional<int64_t> height = std::nullopt,
     std::optional<int64_t> num_threads = std::nullopt,
     std::optional<std::string_view> dimension_order = std::nullopt,
     std::optional<int64_t> stream_index = std::nullopt,
-    std::optional<std::string_view> device = std::nullopt,
+    std::string_view device = "cpu",
+    std::string_view device_variant = "ffmpeg",
+    std::string_view transform_specs = "",
     const std::optional<std::tuple<at::Tensor, at::Tensor, at::Tensor>>&
         custom_frame_mappings = std::nullopt) {
   _add_video_stream(
       decoder,
-      width,
-      height,
       num_threads,
       dimension_order,
       stream_index,
       device,
+      device_variant,
+      transform_specs,
       custom_frame_mappings);
 }
@@ -344,11 +475,9 @@ OpsFrameOutput get_frame_at_index(at::Tensor& decoder, int64_t frame_index) {
 // Return the frames at given indices for a given stream
 OpsFrameBatchOutput get_frames_at_indices(
     at::Tensor& decoder,
-    at::IntArrayRef frame_indices) {
+    const at::Tensor& frame_indices) {
   auto videoDecoder = unwrapTensorToGetDecoder(decoder);
-  std::vector<int64_t> frameIndicesVec(
-      frame_indices.begin(), frame_indices.end());
-  auto result = videoDecoder->getFramesAtIndices(frameIndicesVec);
+  auto result = videoDecoder->getFramesAtIndices(frame_indices);
   return makeOpsFrameBatchOutput(result);
 }
@@ -367,10 +496,9 @@ OpsFrameBatchOutput get_frames_in_range(
 // Return the frames at given ptss for a given stream
 OpsFrameBatchOutput get_frames_by_pts(
     at::Tensor& decoder,
-    at::ArrayRef<double> timestamps) {
+    const at::Tensor& timestamps) {
   auto videoDecoder = unwrapTensorToGetDecoder(decoder);
-  std::vector<double> timestampsVec(timestamps.begin(), timestamps.end());
-  auto result = videoDecoder->getFramesPlayedAt(timestampsVec);
+  auto result = videoDecoder->getFramesPlayedAt(timestamps);
   return makeOpsFrameBatchOutput(result);
 }
@@ -441,6 +569,92 @@ at::Tensor encode_audio_to_tensor(
       .encodeToTensor();
 }
+void _encode_audio_to_file_like(
+    const at::Tensor& samples,
+    int64_t sample_rate,
+    std::string_view format,
+    int64_t file_like_context,
+    std::optional<int64_t> bit_rate = std::nullopt,
+    std::optional<int64_t> num_channels = std::nullopt,
+    std::optional<int64_t> desired_sample_rate = std::nullopt) {
+  auto fileLikeContext =
+      reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
+  TORCH_CHECK(
+      fileLikeContext != nullptr, "file_like_context must be a valid pointer");
+  std::unique_ptr<AVIOFileLikeContext> avioContextHolder(fileLikeContext);
+  AudioStreamOptions audioStreamOptions;
+  audioStreamOptions.bitRate = validateOptionalInt64ToInt(bit_rate, "bit_rate");
+  audioStreamOptions.numChannels =
+      validateOptionalInt64ToInt(num_channels, "num_channels");
+  audioStreamOptions.sampleRate =
+      validateOptionalInt64ToInt(desired_sample_rate, "desired_sample_rate");
+  AudioEncoder encoder(
+      samples,
+      validateInt64ToInt(sample_rate, "sample_rate"),
+      format,
+      std::move(avioContextHolder),
+      audioStreamOptions);
+  encoder.encode();
+}
+void encode_video_to_file(
+    const at::Tensor& frames,
+    int64_t frame_rate,
+    std::string_view file_name,
+    std::optional<int64_t> crf = std::nullopt) {
+  VideoStreamOptions videoStreamOptions;
+  videoStreamOptions.crf = crf;
+  VideoEncoder(
+      frames,
+      validateInt64ToInt(frame_rate, "frame_rate"),
+      file_name,
+      videoStreamOptions)
+      .encode();
+}
+at::Tensor encode_video_to_tensor(
+    const at::Tensor& frames,
+    int64_t frame_rate,
+    std::string_view format,
+    std::optional<int64_t> crf = std::nullopt) {
+  auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
+  VideoStreamOptions videoStreamOptions;
+  videoStreamOptions.crf = crf;
+  return VideoEncoder(
+             frames,
+             validateInt64ToInt(frame_rate, "frame_rate"),
+             format,
+             std::move(avioContextHolder),
+             videoStreamOptions)
+      .encodeToTensor();
+}
+void _encode_video_to_file_like(
+    const at::Tensor& frames,
+    int64_t frame_rate,
+    std::string_view format,
+    int64_t file_like_context,
+    std::optional<int64_t> crf = std::nullopt) {
+  auto fileLikeContext =
+      reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
+  TORCH_CHECK(
+      fileLikeContext != nullptr, "file_like_context must be a valid pointer");
+  std::unique_ptr<AVIOFileLikeContext> avioContextHolder(fileLikeContext);
+  VideoStreamOptions videoStreamOptions;
+  videoStreamOptions.crf = crf;
+  VideoEncoder encoder(
+      frames,
+      validateInt64ToInt(frame_rate, "frame_rate"),
+      format,
+      std::move(avioContextHolder),
+      videoStreamOptions);
+  encoder.encode();
+}
 // For testing only. We need to implement this operation as a core library
 // function because what we're testing is round-tripping pts values as
 // double-precision floating point numbers from C++ to Python and back to C++.
@@ -682,6 +896,11 @@ std::string _get_json_ffmpeg_library_versions() {
   return ss.str();
 }
+std::string get_backend_details(at::Tensor& decoder) {
+  auto videoDecoder = unwrapTensorToGetDecoder(decoder);
+  return videoDecoder->getDeviceInterfaceDetails();
+}
 // Scans video packets to get more accurate metadata like frame count, exact
 // keyframe positions, etc. Exact keyframe positions are useful for efficient
 // accurate seeking. Note that this function reads the entire video but it does
@@ -694,7 +913,7 @@ void scan_all_streams_to_update_metadata(at::Tensor& decoder) {
 TORCH_LIBRARY_IMPL(torchcodec_ns, BackendSelect, m) {
   m.impl("create_from_file", &create_from_file);
   m.impl("create_from_tensor", &create_from_tensor);
-  m.impl("_convert_to_tensor", &_convert_to_tensor);
+  m.impl("_create_from_file_like", &_create_from_file_like);
   m.impl(
       "_get_json_ffmpeg_library_versions", &_get_json_ffmpeg_library_versions);
 }
@@ -702,6 +921,10 @@ TORCH_LIBRARY_IMPL(torchcodec_ns, BackendSelect, m) {
 TORCH_LIBRARY_IMPL(torchcodec_ns, CPU, m) {
   m.impl("encode_audio_to_file", &encode_audio_to_file);
   m.impl("encode_audio_to_tensor", &encode_audio_to_tensor);
+  m.impl("_encode_audio_to_file_like", &_encode_audio_to_file_like);
+  m.impl("encode_video_to_file", &encode_video_to_file);
+  m.impl("encode_video_to_tensor", &encode_video_to_tensor);
+  m.impl("_encode_video_to_file_like", &_encode_video_to_file_like);
   m.impl("seek_to_pts", &seek_to_pts);
   m.impl("add_video_stream", &add_video_stream);
   m.impl("_add_video_stream", &_add_video_stream);
@@ -722,6 +945,8 @@ TORCH_LIBRARY_IMPL(torchcodec_ns, CPU, m) {
   m.impl(
       "scan_all_streams_to_update_metadata",
       &scan_all_streams_to_update_metadata);
+  m.impl("_get_backend_details", &get_backend_details);
 }
 } // namespace facebook::torchcodec

torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake CHANGED Viewed

@@ -44,6 +44,10 @@ if (LINUX)
         f7_sha256
         1cb946d8b7c6393c2c3ebe1f900b8de7a2885fe614c45d4ec32c9833084f2f26
     )
+    set(
+        f8_sha256
+        c55b3c1a4b5e4d5fdd7c632bea3ab6f45b4e37cc8e0999dda3f84a8ed8defad8
+    )
     set(
        f4_library_file_names
        libavutil.so.56
@@ -84,6 +88,16 @@ if (LINUX)
        libswscale.so.8
        libswresample.so.5
     )
+    set(
+       f8_library_file_names
+       libavutil.so.60
+       libavcodec.so.62
+       libavformat.so.62
+       libavdevice.so.62
+       libavfilter.so.11
+       libswscale.so.9
+       libswresample.so.6
+    )
 elseif (APPLE)
     set(lib_dir "lib")
     set(
@@ -106,6 +120,10 @@ elseif (APPLE)
         f7_sha256
         48a4fc8ce098305cfd4a58f40889249c523ca3c285f66ba704b5bad0e3ada53a
     )
+    set(
+        f8_sha256
+        beb936b76f25d2621228a12cdb67c9ae3d1eff7aa713ef8d1167ebf0c25bd5ec
+    )
     set(
        f4_library_file_names
@@ -147,6 +165,16 @@ elseif (APPLE)
        libswscale.8.dylib
        libswresample.5.dylib
     )
+    set(
+       f8_library_file_names
+       libavutil.60.dylib
+       libavcodec.62.dylib
+       libavformat.62.dylib
+       libavdevice.62.dylib
+       libavfilter.11.dylib
+       libswscale.9.dylib
+       libswresample.6.dylib
+    )
 elseif (WIN32)
     set(lib_dir "bin")
@@ -170,6 +198,10 @@ elseif (WIN32)
         f7_sha256
         ae391ace382330e912793b70b68529ee7c91026d2869b4df7e7c3e7d3656bdd5
     )
+    set(
+        f8_sha256
+        bac845ac79876b104959cb0e7b9dec772a261116344dd17d2f97e7ddfac4a73f
+    )
     set(
         f4_library_file_names
@@ -211,6 +243,16 @@ elseif (WIN32)
         swscale.lib
         swresample.lib
     )
+    set(
+        f8_library_file_names
+        avutil.lib
+        avcodec.lib
+        avformat.lib
+        avdevice.lib
+        avfilter.lib
+        swscale.lib
+        swresample.lib
+    )
 else()
     message(
         FATAL_ERROR
@@ -242,19 +284,27 @@ FetchContent_Declare(
     URL_HASH
     SHA256=${f7_sha256}
 )
+FetchContent_Declare(
+    f8
+    URL ${platform_url}/8.0.tar.gz
+    URL_HASH
+    SHA256=${f8_sha256}
+)
-FetchContent_MakeAvailable(f4 f5 f6 f7)
+FetchContent_MakeAvailable(f4 f5 f6 f7 f8)
 add_library(ffmpeg4 INTERFACE)
 add_library(ffmpeg5 INTERFACE)
 add_library(ffmpeg6 INTERFACE)
 add_library(ffmpeg7 INTERFACE)
+add_library(ffmpeg8 INTERFACE)
 # Note: the f?_SOURCE_DIR variables were set by FetchContent_MakeAvailable
 target_include_directories(ffmpeg4 INTERFACE ${f4_SOURCE_DIR}/include)
 target_include_directories(ffmpeg5 INTERFACE ${f5_SOURCE_DIR}/include)
 target_include_directories(ffmpeg6 INTERFACE ${f6_SOURCE_DIR}/include)
 target_include_directories(ffmpeg7 INTERFACE ${f7_SOURCE_DIR}/include)
+target_include_directories(ffmpeg8 INTERFACE ${f8_SOURCE_DIR}/include)
 list(
@@ -277,6 +327,11 @@ list(
     PREPEND ${f7_SOURCE_DIR}/${lib_dir}/
     OUTPUT_VARIABLE f7_library_paths
 )
+list(
+    TRANSFORM f8_library_file_names
+    PREPEND ${f8_SOURCE_DIR}/${lib_dir}/
+    OUTPUT_VARIABLE f8_library_paths
+)
 target_link_libraries(
     ffmpeg4
@@ -298,3 +353,8 @@ target_link_libraries(
     INTERFACE
     ${f7_library_paths}
 )
+target_link_libraries(
+    ffmpeg8
+    INTERFACE
+    ${f8_library_paths}
+)