PyPI - torchcodec - Versions diffs - 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl - Mend

torchcodec 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

torchcodec/__init__.py +27 -0
torchcodec/_core/AVIOContextHolder.cpp +60 -0
torchcodec/_core/AVIOContextHolder.h +64 -0
torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
torchcodec/_core/AVIOFileLikeContext.h +55 -0
torchcodec/_core/AVIOTensorContext.cpp +130 -0
torchcodec/_core/AVIOTensorContext.h +44 -0
torchcodec/_core/BetaCudaDeviceInterface.cpp +849 -0
torchcodec/_core/BetaCudaDeviceInterface.h +196 -0
torchcodec/_core/CMakeLists.txt +295 -0
torchcodec/_core/CUDACommon.cpp +330 -0
torchcodec/_core/CUDACommon.h +51 -0
torchcodec/_core/Cache.h +124 -0
torchcodec/_core/CpuDeviceInterface.cpp +509 -0
torchcodec/_core/CpuDeviceInterface.h +141 -0
torchcodec/_core/CudaDeviceInterface.cpp +602 -0
torchcodec/_core/CudaDeviceInterface.h +79 -0
torchcodec/_core/DeviceInterface.cpp +117 -0
torchcodec/_core/DeviceInterface.h +191 -0
torchcodec/_core/Encoder.cpp +1054 -0
torchcodec/_core/Encoder.h +192 -0
torchcodec/_core/FFMPEGCommon.cpp +684 -0
torchcodec/_core/FFMPEGCommon.h +314 -0
torchcodec/_core/FilterGraph.cpp +159 -0
torchcodec/_core/FilterGraph.h +59 -0
torchcodec/_core/Frame.cpp +47 -0
torchcodec/_core/Frame.h +72 -0
torchcodec/_core/Metadata.cpp +124 -0
torchcodec/_core/Metadata.h +92 -0
torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
torchcodec/_core/NVDECCache.cpp +60 -0
torchcodec/_core/NVDECCache.h +102 -0
torchcodec/_core/SingleStreamDecoder.cpp +1586 -0
torchcodec/_core/SingleStreamDecoder.h +391 -0
torchcodec/_core/StreamOptions.h +70 -0
torchcodec/_core/Transform.cpp +128 -0
torchcodec/_core/Transform.h +86 -0
torchcodec/_core/ValidationUtils.cpp +35 -0
torchcodec/_core/ValidationUtils.h +21 -0
torchcodec/_core/__init__.py +46 -0
torchcodec/_core/_metadata.py +262 -0
torchcodec/_core/custom_ops.cpp +1090 -0
torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +169 -0
torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
torchcodec/_core/ops.py +605 -0
torchcodec/_core/pybind_ops.cpp +50 -0
torchcodec/_frame.py +146 -0
torchcodec/_internally_replaced_utils.py +68 -0
torchcodec/_samplers/__init__.py +7 -0
torchcodec/_samplers/video_clip_sampler.py +419 -0
torchcodec/decoders/__init__.py +12 -0
torchcodec/decoders/_audio_decoder.py +185 -0
torchcodec/decoders/_decoder_utils.py +113 -0
torchcodec/decoders/_video_decoder.py +601 -0
torchcodec/encoders/__init__.py +2 -0
torchcodec/encoders/_audio_encoder.py +149 -0
torchcodec/encoders/_video_encoder.py +196 -0
torchcodec/libtorchcodec_core4.so +0 -0
torchcodec/libtorchcodec_core5.so +0 -0
torchcodec/libtorchcodec_core6.so +0 -0
torchcodec/libtorchcodec_core7.so +0 -0
torchcodec/libtorchcodec_core8.so +0 -0
torchcodec/libtorchcodec_custom_ops4.so +0 -0
torchcodec/libtorchcodec_custom_ops5.so +0 -0
torchcodec/libtorchcodec_custom_ops6.so +0 -0
torchcodec/libtorchcodec_custom_ops7.so +0 -0
torchcodec/libtorchcodec_custom_ops8.so +0 -0
torchcodec/libtorchcodec_pybind_ops4.so +0 -0
torchcodec/libtorchcodec_pybind_ops5.so +0 -0
torchcodec/libtorchcodec_pybind_ops6.so +0 -0
torchcodec/libtorchcodec_pybind_ops7.so +0 -0
torchcodec/libtorchcodec_pybind_ops8.so +0 -0
torchcodec/samplers/__init__.py +2 -0
torchcodec/samplers/_common.py +84 -0
torchcodec/samplers/_index_based.py +287 -0
torchcodec/samplers/_time_based.py +358 -0
torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake +76 -0
torchcodec/share/cmake/TorchCodec/ffmpeg_versions.cmake +122 -0
torchcodec/transforms/__init__.py +12 -0
torchcodec/transforms/_decoder_transforms.py +375 -0
torchcodec/version.py +2 -0
torchcodec-0.10.0.dist-info/METADATA +286 -0
torchcodec-0.10.0.dist-info/RECORD +88 -0
torchcodec-0.10.0.dist-info/WHEEL +5 -0
torchcodec-0.10.0.dist-info/licenses/LICENSE +28 -0
torchcodec-0.10.0.dist-info/top_level.txt +2 -0

torchcodec/encoders/_audio_encoder.py ADDED Viewed

@@ -0,0 +1,149 @@
+from pathlib import Path
+import torch
+from torch import Tensor
+from torchcodec import _core
+class AudioEncoder:
+    """An audio encoder.
+    Args:
+        samples (``torch.Tensor``): The samples to encode. This must be a 2D
+            tensor of shape ``(num_channels, num_samples)``, or a 1D tensor in
+            which case ``num_channels = 1`` is assumed. Values must be float
+            values in ``[-1, 1]``.
+        sample_rate (int): The sample rate of the **input** ``samples``. The
+            sample rate of the encoded output can be specified using the
+            encoding methods (``to_file``, etc.).
+    """
+    def __init__(self, samples: Tensor, *, sample_rate: int):
+        torch._C._log_api_usage_once("torchcodec.encoders.AudioEncoder")
+        # Some of these checks are also done in C++: it's OK, they're cheap, and
+        # doing them here allows to surface them when the AudioEncoder is
+        # instantiated, rather than later when the encoding methods are called.
+        if not isinstance(samples, Tensor):
+            raise ValueError(
+                f"Expected samples to be a Tensor, got {type(samples) = }."
+            )
+        if samples.ndim == 1:
+            # make it 2D and assume 1 channel
+            samples = torch.unsqueeze(samples, 0)
+        if samples.ndim != 2:
+            raise ValueError(f"Expected 1D or 2D samples, got {samples.shape = }.")
+        if samples.dtype != torch.float32:
+            raise ValueError(f"Expected float32 samples, got {samples.dtype = }.")
+        if sample_rate <= 0:
+            raise ValueError(f"{sample_rate = } must be > 0.")
+        self._samples = samples
+        self._sample_rate = sample_rate
+    def to_file(
+        self,
+        dest: str | Path,
+        *,
+        bit_rate: int | None = None,
+        num_channels: int | None = None,
+        sample_rate: int | None = None,
+    ) -> None:
+        """Encode samples into a file.
+        Args:
+            dest (str or ``pathlib.Path``): The path to the output file, e.g.
+                ``audio.mp3``. The extension of the file determines the audio
+                format and container.
+            bit_rate (int, optional): The output bit rate. Encoders typically
+                support a finite set of bit rate values, so ``bit_rate`` will be
+                matched to one of those supported values. The default is chosen
+                by FFmpeg.
+            num_channels (int, optional): The number of channels of the encoded
+                output samples. By default, the number of channels of the input
+                ``samples`` is used.
+            sample_rate (int, optional): The sample rate of the encoded output.
+                By default, the sample rate of the input ``samples`` is used.
+        """
+        _core.encode_audio_to_file(
+            samples=self._samples,
+            sample_rate=self._sample_rate,
+            filename=str(dest),
+            bit_rate=bit_rate,
+            num_channels=num_channels,
+            desired_sample_rate=sample_rate,
+        )
+    def to_tensor(
+        self,
+        format: str,
+        *,
+        bit_rate: int | None = None,
+        num_channels: int | None = None,
+        sample_rate: int | None = None,
+    ) -> Tensor:
+        """Encode samples into raw bytes, as a 1D uint8 Tensor.
+        Args:
+            format (str): The format of the encoded samples, e.g. "mp3", "wav"
+                or "flac".
+            bit_rate (int, optional): The output bit rate. Encoders typically
+                support a finite set of bit rate values, so ``bit_rate`` will be
+                matched to one of those supported values. The default is chosen
+                by FFmpeg.
+            num_channels (int, optional): The number of channels of the encoded
+                output samples. By default, the number of channels of the input
+                ``samples`` is used.
+            sample_rate (int, optional): The sample rate of the encoded output.
+                By default, the sample rate of the input ``samples`` is used.
+        Returns:
+            Tensor: The raw encoded bytes as 1D uint8 Tensor.
+        """
+        return _core.encode_audio_to_tensor(
+            samples=self._samples,
+            sample_rate=self._sample_rate,
+            format=format,
+            bit_rate=bit_rate,
+            num_channels=num_channels,
+            desired_sample_rate=sample_rate,
+        )
+    def to_file_like(
+        self,
+        file_like,
+        format: str,
+        *,
+        bit_rate: int | None = None,
+        num_channels: int | None = None,
+        sample_rate: int | None = None,
+    ) -> None:
+        """Encode samples into a file-like object.
+        Args:
+            file_like: A file-like object that supports ``write()`` and
+                ``seek()`` methods, such as io.BytesIO(), an open file in binary
+                write mode, etc. Methods must have the following signature:
+                ``write(data: bytes) -> int`` and ``seek(offset: int, whence:
+                int = 0) -> int``.
+            format (str): The format of the encoded samples, e.g. "mp3", "wav"
+                or "flac".
+            bit_rate (int, optional): The output bit rate. Encoders typically
+                support a finite set of bit rate values, so ``bit_rate`` will be
+                matched to one of those supported values. The default is chosen
+                by FFmpeg.
+            num_channels (int, optional): The number of channels of the encoded
+                output samples. By default, the number of channels of the input
+                ``samples`` is used.
+            sample_rate (int, optional): The sample rate of the encoded output.
+                By default, the sample rate of the input ``samples`` is used.
+        """
+        _core.encode_audio_to_file_like(
+            samples=self._samples,
+            sample_rate=self._sample_rate,
+            format=format,
+            file_like=file_like,
+            bit_rate=bit_rate,
+            num_channels=num_channels,
+            desired_sample_rate=sample_rate,
+        )

torchcodec/encoders/_video_encoder.py ADDED Viewed

@@ -0,0 +1,196 @@
+from pathlib import Path
+from typing import Any
+import torch
+from torch import Tensor
+from torchcodec import _core
+class VideoEncoder:
+    """A video encoder on CPU or CUDA..
+    Args:
+        frames (``torch.Tensor``): The frames to encode. This must be a 4D
+            tensor of shape ``(N, C, H, W)`` where N is the number of frames,
+            C is 3 channels (RGB), H is height, and W is width.
+            Values must be uint8 in the range ``[0, 255]``.
+            The tensor can be on CPU or CUDA. The device of the tensor
+            determines which encoder is used (CPU or GPU).
+        frame_rate (float): The frame rate of the **input** ``frames``. Also defines the encoded **output** frame rate.
+    """
+    def __init__(self, frames: Tensor, *, frame_rate: float):
+        torch._C._log_api_usage_once("torchcodec.encoders.VideoEncoder")
+        if not isinstance(frames, Tensor):
+            raise ValueError(f"Expected frames to be a Tensor, got {type(frames) = }.")
+        if frames.ndim != 4:
+            raise ValueError(f"Expected 4D frames, got {frames.shape = }.")
+        if frames.dtype != torch.uint8:
+            raise ValueError(f"Expected uint8 frames, got {frames.dtype = }.")
+        if frame_rate <= 0:
+            raise ValueError(f"{frame_rate = } must be > 0.")
+        self._frames = frames
+        self._frame_rate = frame_rate
+    def to_file(
+        self,
+        dest: str | Path,
+        *,
+        codec: str | None = None,
+        pixel_format: str | None = None,
+        crf: int | float | None = None,
+        preset: str | int | None = None,
+        extra_options: dict[str, Any] | None = None,
+    ) -> None:
+        """Encode frames into a file.
+        Args:
+            dest (str or ``pathlib.Path``): The path to the output file, e.g.
+                ``video.mp4``. The extension of the file determines the video
+                container format.
+            codec (str, optional): The codec to use for encoding (e.g., "libx264",
+                "h264"). If not specified, the default codec
+                for the container format will be used.
+                See :ref:`codec_selection` for details.
+            pixel_format (str, optional): The pixel format for encoding (e.g.,
+                "yuv420p", "yuv444p"). If not specified, uses codec's default format.
+                Must be left as ``None`` when encoding CUDA tensors.
+                See :ref:`pixel_format` for details.
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
+                mean better quality. Valid range depends on the encoder (e.g.  0-51 for libx264).
+                Defaults to None (which will use encoder's default).
+                See :ref:`crf` for details.
+            preset (str or int, optional): Encoder option that controls the tradeoff between
+                encoding encoding speed and compression (output size). Valid on the encoder (commonly
+                a string: "fast", "medium", "slow"). Defaults to None
+                (which will use encoder's default).
+                See :ref:`preset` for details.
+            extra_options (dict[str, Any], optional): A dictionary of additional
+                encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
+                See :ref:`extra_options` for details.
+        """
+        preset = str(preset) if isinstance(preset, int) else preset
+        _core.encode_video_to_file(
+            frames=self._frames,
+            frame_rate=self._frame_rate,
+            filename=str(dest),
+            codec=codec,
+            pixel_format=pixel_format,
+            crf=crf,
+            preset=preset,
+            extra_options=[
+                str(x) for k, v in (extra_options or {}).items() for x in (k, v)
+            ],
+        )
+    def to_tensor(
+        self,
+        format: str,
+        *,
+        codec: str | None = None,
+        pixel_format: str | None = None,
+        crf: int | float | None = None,
+        preset: str | int | None = None,
+        extra_options: dict[str, Any] | None = None,
+    ) -> Tensor:
+        """Encode frames into raw bytes, as a 1D uint8 Tensor.
+        Args:
+            format (str): The container format of the encoded frames, e.g. "mp4", "mov",
+                    "mkv", "avi", "webm", "flv", etc.
+            codec (str, optional): The codec to use for encoding (e.g., "libx264",
+                "h264"). If not specified, the default codec
+                for the container format will be used.
+                See :ref:`codec_selection` for details.
+            pixel_format (str, optional): The pixel format to encode frames into (e.g.,
+                "yuv420p", "yuv444p"). If not specified, uses codec's default format.
+                Must be left as ``None`` when encoding CUDA tensors.
+                See :ref:`pixel_format` for details.
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
+                mean better quality. Valid range depends on the encoder (e.g.  0-51 for libx264).
+                Defaults to None (which will use encoder's default).
+                See :ref:`crf` for details.
+            preset (str or int, optional): Encoder option that controls the tradeoff between
+                encoding encoding speed and compression (output size). Valid on the encoder (commonly
+                a string: "fast", "medium", "slow"). Defaults to None
+                (which will use encoder's default).
+                See :ref:`preset` for details.
+            extra_options (dict[str, Any], optional): A dictionary of additional
+                encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
+                See :ref:`extra_options` for details.
+        Returns:
+            Tensor: The raw encoded bytes as 1D uint8 Tensor on CPU regardless of the device of the input frames.
+        """
+        preset_value = str(preset) if isinstance(preset, int) else preset
+        return _core.encode_video_to_tensor(
+            frames=self._frames,
+            frame_rate=self._frame_rate,
+            format=format,
+            codec=codec,
+            pixel_format=pixel_format,
+            crf=crf,
+            preset=preset_value,
+            extra_options=[
+                str(x) for k, v in (extra_options or {}).items() for x in (k, v)
+            ],
+        )
+    def to_file_like(
+        self,
+        file_like,
+        format: str,
+        *,
+        codec: str | None = None,
+        pixel_format: str | None = None,
+        crf: int | float | None = None,
+        preset: str | int | None = None,
+        extra_options: dict[str, Any] | None = None,
+    ) -> None:
+        """Encode frames into a file-like object.
+        Args:
+            file_like: A file-like object that supports ``write()`` and
+                ``seek()`` methods, such as io.BytesIO(), an open file in binary
+                write mode, etc. Methods must have the following signature:
+                ``write(data: bytes) -> int`` and ``seek(offset: int, whence:
+                int = 0) -> int``.
+            format (str): The container format of the encoded frames, e.g. "mp4", "mov",
+                "mkv", "avi", "webm", "flv", etc.
+            codec (str, optional): The codec to use for encoding (e.g., "libx264",
+                "h264"). If not specified, the default codec
+                for the container format will be used.
+                See :ref:`codec_selection` for details.
+            pixel_format (str, optional): The pixel format for encoding (e.g.,
+                "yuv420p", "yuv444p"). If not specified, uses codec's default format.
+                Must be left as ``None`` when encoding CUDA tensors.
+                See :ref:`pixel_format` for details.
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
+                mean better quality. Valid range depends on the encoder (e.g.  0-51 for libx264).
+                Defaults to None (which will use encoder's default).
+                See :ref:`crf` for details.
+            preset (str or int, optional): Encoder option that controls the tradeoff between
+                encoding encoding speed and compression (output size). Valid on the encoder (commonly
+                a string: "fast", "medium", "slow"). Defaults to None
+                (which will use encoder's default).
+                See :ref:`preset` for details.
+            extra_options (dict[str, Any], optional): A dictionary of additional
+                encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
+                See :ref:`extra_options` for details.
+        """
+        preset = str(preset) if isinstance(preset, int) else preset
+        _core.encode_video_to_file_like(
+            frames=self._frames,
+            frame_rate=self._frame_rate,
+            format=format,
+            file_like=file_like,
+            codec=codec,
+            pixel_format=pixel_format,
+            crf=crf,
+            preset=preset,
+            extra_options=[
+                str(x) for k, v in (extra_options or {}).items() for x in (k, v)
+            ],
+        )

torchcodec/libtorchcodec_core4.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_core5.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_core6.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_core7.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_core8.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_custom_ops4.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_custom_ops5.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_custom_ops6.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_custom_ops7.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_custom_ops8.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_pybind_ops4.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_pybind_ops5.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_pybind_ops6.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_pybind_ops7.so ADDED Viewed

Binary file

torchcodec/libtorchcodec_pybind_ops8.so ADDED Viewed

Binary file

torchcodec/samplers/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from ._index_based import clips_at_random_indices, clips_at_regular_indices
2	+ from ._time_based import clips_at_random_timestamps, clips_at_regular_timestamps

torchcodec/samplers/_common.py ADDED Viewed

@@ -0,0 +1,84 @@
+from collections.abc import Callable
+from torchcodec import FrameBatch
+_LIST_OF_INT_OR_FLOAT = list[int] | list[float]
+def _repeat_last_policy(
+    values: _LIST_OF_INT_OR_FLOAT, desired_len: int
+) -> _LIST_OF_INT_OR_FLOAT:
+    # values = [1, 2, 3], desired_len = 5
+    # output = [1, 2, 3, 3, 3]
+    values += [values[-1]] * (desired_len - len(values))
+    return values
+def _wrap_policy(
+    values: _LIST_OF_INT_OR_FLOAT, desired_len: int
+) -> _LIST_OF_INT_OR_FLOAT:
+    # values = [1, 2, 3], desired_len = 5
+    # output = [1, 2, 3, 1, 2]
+    return (values * (desired_len // len(values) + 1))[:desired_len]
+def _error_policy(
+    frames_indices: _LIST_OF_INT_OR_FLOAT, desired_len: int
+) -> _LIST_OF_INT_OR_FLOAT:
+    raise ValueError(
+        "You set the 'error' policy, and the sampler tried to decode a frame "
+        "that is beyond the number of frames in the video. "
+        "Try to leave sampling_range_end to its default value?"
+    )
+_POLICY_FUNCTION_TYPE = Callable[[_LIST_OF_INT_OR_FLOAT, int], _LIST_OF_INT_OR_FLOAT]
+_POLICY_FUNCTIONS: dict[str, _POLICY_FUNCTION_TYPE] = {
+    "repeat_last": _repeat_last_policy,
+    "wrap": _wrap_policy,
+    "error": _error_policy,
+}
+def _validate_common_params(*, decoder, num_frames_per_clip, policy):
+    if len(decoder) < 1:
+        raise ValueError(
+            f"Decoder must have at least one frame, found {len(decoder)} frames."
+        )
+    if num_frames_per_clip <= 0:
+        raise ValueError(
+            f"num_frames_per_clip ({num_frames_per_clip}) must be strictly positive"
+        )
+    if policy not in _POLICY_FUNCTIONS.keys():
+        raise ValueError(
+            f"Invalid policy ({policy}). Supported values are {_POLICY_FUNCTIONS.keys()}."
+        )
+def _reshape_4d_framebatch_into_5d(
+    *,
+    frames: FrameBatch,
+    num_clips: int,
+    num_frames_per_clip: int,
+) -> FrameBatch:
+    last_3_dims = frames.data.shape[-3:]
+    return FrameBatch(
+        data=frames.data.view(num_clips, num_frames_per_clip, *last_3_dims),
+        pts_seconds=frames.pts_seconds.view(num_clips, num_frames_per_clip),
+        duration_seconds=frames.duration_seconds.view(num_clips, num_frames_per_clip),
+    )
+_FRAMEBATCH_RETURN_DOCS = """
+    Returns:
+        FrameBatch:
+            The sampled :term:`clips`, as a 5D :class:`~torchcodec.FrameBatch`.
+            The shape of the ``data`` field is (``num_clips``,
+            ``num_frames_per_clips``, ...) where ... is (H, W, C) or (C, H, W)
+            depending on the ``dimension_order`` parameter of
+            :class:`~torchcodec.decoders.VideoDecoder`. The shape of the
+            ``pts_seconds`` and ``duration_seconds`` fields is (``num_clips``,
+            ``num_frames_per_clips``).
+"""