torchcodec 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. torchcodec/__init__.py +27 -0
  2. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  3. torchcodec/_core/AVIOContextHolder.h +64 -0
  4. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  5. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  6. torchcodec/_core/AVIOTensorContext.cpp +130 -0
  7. torchcodec/_core/AVIOTensorContext.h +44 -0
  8. torchcodec/_core/BetaCudaDeviceInterface.cpp +849 -0
  9. torchcodec/_core/BetaCudaDeviceInterface.h +196 -0
  10. torchcodec/_core/CMakeLists.txt +295 -0
  11. torchcodec/_core/CUDACommon.cpp +330 -0
  12. torchcodec/_core/CUDACommon.h +51 -0
  13. torchcodec/_core/Cache.h +124 -0
  14. torchcodec/_core/CpuDeviceInterface.cpp +509 -0
  15. torchcodec/_core/CpuDeviceInterface.h +141 -0
  16. torchcodec/_core/CudaDeviceInterface.cpp +602 -0
  17. torchcodec/_core/CudaDeviceInterface.h +79 -0
  18. torchcodec/_core/DeviceInterface.cpp +117 -0
  19. torchcodec/_core/DeviceInterface.h +191 -0
  20. torchcodec/_core/Encoder.cpp +1054 -0
  21. torchcodec/_core/Encoder.h +192 -0
  22. torchcodec/_core/FFMPEGCommon.cpp +684 -0
  23. torchcodec/_core/FFMPEGCommon.h +314 -0
  24. torchcodec/_core/FilterGraph.cpp +159 -0
  25. torchcodec/_core/FilterGraph.h +59 -0
  26. torchcodec/_core/Frame.cpp +47 -0
  27. torchcodec/_core/Frame.h +72 -0
  28. torchcodec/_core/Metadata.cpp +124 -0
  29. torchcodec/_core/Metadata.h +92 -0
  30. torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
  31. torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
  32. torchcodec/_core/NVDECCache.cpp +60 -0
  33. torchcodec/_core/NVDECCache.h +102 -0
  34. torchcodec/_core/SingleStreamDecoder.cpp +1586 -0
  35. torchcodec/_core/SingleStreamDecoder.h +391 -0
  36. torchcodec/_core/StreamOptions.h +70 -0
  37. torchcodec/_core/Transform.cpp +128 -0
  38. torchcodec/_core/Transform.h +86 -0
  39. torchcodec/_core/ValidationUtils.cpp +35 -0
  40. torchcodec/_core/ValidationUtils.h +21 -0
  41. torchcodec/_core/__init__.py +46 -0
  42. torchcodec/_core/_metadata.py +262 -0
  43. torchcodec/_core/custom_ops.cpp +1090 -0
  44. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +169 -0
  45. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  46. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  47. torchcodec/_core/ops.py +605 -0
  48. torchcodec/_core/pybind_ops.cpp +50 -0
  49. torchcodec/_frame.py +146 -0
  50. torchcodec/_internally_replaced_utils.py +68 -0
  51. torchcodec/_samplers/__init__.py +7 -0
  52. torchcodec/_samplers/video_clip_sampler.py +419 -0
  53. torchcodec/decoders/__init__.py +12 -0
  54. torchcodec/decoders/_audio_decoder.py +185 -0
  55. torchcodec/decoders/_decoder_utils.py +113 -0
  56. torchcodec/decoders/_video_decoder.py +601 -0
  57. torchcodec/encoders/__init__.py +2 -0
  58. torchcodec/encoders/_audio_encoder.py +149 -0
  59. torchcodec/encoders/_video_encoder.py +196 -0
  60. torchcodec/libtorchcodec_core4.so +0 -0
  61. torchcodec/libtorchcodec_core5.so +0 -0
  62. torchcodec/libtorchcodec_core6.so +0 -0
  63. torchcodec/libtorchcodec_core7.so +0 -0
  64. torchcodec/libtorchcodec_core8.so +0 -0
  65. torchcodec/libtorchcodec_custom_ops4.so +0 -0
  66. torchcodec/libtorchcodec_custom_ops5.so +0 -0
  67. torchcodec/libtorchcodec_custom_ops6.so +0 -0
  68. torchcodec/libtorchcodec_custom_ops7.so +0 -0
  69. torchcodec/libtorchcodec_custom_ops8.so +0 -0
  70. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  71. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  72. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  73. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  74. torchcodec/libtorchcodec_pybind_ops8.so +0 -0
  75. torchcodec/samplers/__init__.py +2 -0
  76. torchcodec/samplers/_common.py +84 -0
  77. torchcodec/samplers/_index_based.py +287 -0
  78. torchcodec/samplers/_time_based.py +358 -0
  79. torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake +76 -0
  80. torchcodec/share/cmake/TorchCodec/ffmpeg_versions.cmake +122 -0
  81. torchcodec/transforms/__init__.py +12 -0
  82. torchcodec/transforms/_decoder_transforms.py +375 -0
  83. torchcodec/version.py +2 -0
  84. torchcodec-0.10.0.dist-info/METADATA +286 -0
  85. torchcodec-0.10.0.dist-info/RECORD +88 -0
  86. torchcodec-0.10.0.dist-info/WHEEL +5 -0
  87. torchcodec-0.10.0.dist-info/licenses/LICENSE +28 -0
  88. torchcodec-0.10.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,185 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+
8
+ import io
9
+ from pathlib import Path
10
+
11
+ import torch
12
+ from torch import Tensor
13
+
14
+ from torchcodec import _core as core, AudioSamples
15
+ from torchcodec.decoders._decoder_utils import (
16
+ create_decoder,
17
+ ERROR_REPORTING_INSTRUCTIONS,
18
+ )
19
+
20
+
21
+ class AudioDecoder:
22
+ """A single-stream audio decoder.
23
+
24
+ This can be used to decode audio from pure audio files (e.g. mp3, wav,
25
+ etc.), or from videos that contain audio streams (e.g. mp4 videos).
26
+
27
+ Returned samples are float samples normalized in [-1, 1]
28
+
29
+ Args:
30
+ source (str, ``Pathlib.path``, bytes, ``torch.Tensor`` or file-like
31
+ object): The source of the video or audio:
32
+
33
+ - If ``str``: a local path or a URL to a video or audio file.
34
+ - If ``Pathlib.path``: a path to a local video or audio file.
35
+ - If ``bytes`` object or ``torch.Tensor``: the raw encoded audio data.
36
+ - If file-like object: we read video data from the object on demand. The object must
37
+ expose the methods `read(self, size: int) -> bytes` and
38
+ `seek(self, offset: int, whence: int) -> int`. Read more in:
39
+ :ref:`sphx_glr_generated_examples_decoding_file_like.py`.
40
+ stream_index (int, optional): Specifies which stream in the file to decode samples from.
41
+ Note that this index is absolute across all media types. If left unspecified, then
42
+ the :term:`best stream` is used.
43
+ sample_rate (int, optional): The desired output sample rate of the decoded samples.
44
+ By default, the sample rate of the source is used.
45
+ num_channels (int, optional): The desired number of channels of the decoded samples.
46
+ By default, the number of channels of the source is used.
47
+
48
+ Attributes:
49
+ metadata (AudioStreamMetadata): Metadata of the audio stream.
50
+ stream_index (int): The stream index that this decoder is retrieving samples from. If a
51
+ stream index was provided at initialization, this is the same value. If it was left
52
+ unspecified, this is the :term:`best stream`.
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ source: str | Path | io.RawIOBase | io.BufferedReader | bytes | Tensor,
58
+ *,
59
+ stream_index: int | None = None,
60
+ sample_rate: int | None = None,
61
+ num_channels: int | None = None,
62
+ ):
63
+ torch._C._log_api_usage_once("torchcodec.decoders.AudioDecoder")
64
+ self._decoder = create_decoder(source=source, seek_mode="approximate")
65
+
66
+ container_metadata = core.get_container_metadata(self._decoder)
67
+ self.stream_index = (
68
+ container_metadata.best_audio_stream_index
69
+ if stream_index is None
70
+ else stream_index
71
+ )
72
+ if self.stream_index is None:
73
+ raise ValueError(
74
+ "The best audio stream is unknown and there is no specified stream. "
75
+ + ERROR_REPORTING_INSTRUCTIONS
76
+ )
77
+ if self.stream_index >= len(container_metadata.streams):
78
+ raise ValueError(
79
+ f"The stream at index {stream_index} is not a valid stream."
80
+ )
81
+
82
+ self.metadata = container_metadata.streams[self.stream_index]
83
+ if not isinstance(self.metadata, core._metadata.AudioStreamMetadata):
84
+ raise ValueError(
85
+ f"The stream at index {stream_index} is not an audio stream. "
86
+ )
87
+
88
+ self._desired_sample_rate = (
89
+ sample_rate if sample_rate is not None else self.metadata.sample_rate
90
+ )
91
+
92
+ core.add_audio_stream(
93
+ self._decoder,
94
+ stream_index=stream_index,
95
+ sample_rate=sample_rate,
96
+ num_channels=num_channels,
97
+ )
98
+
99
+ def get_all_samples(self) -> AudioSamples:
100
+ """Returns all the audio samples from the source.
101
+
102
+ To decode samples in a specific range, use
103
+ :meth:`~torchcodec.decoders.AudioDecoder.get_samples_played_in_range`.
104
+
105
+ Returns:
106
+ AudioSamples: The samples within the file.
107
+ """
108
+ return self.get_samples_played_in_range()
109
+
110
+ def get_samples_played_in_range(
111
+ self, start_seconds: float = 0.0, stop_seconds: float | None = None
112
+ ) -> AudioSamples:
113
+ """Returns audio samples in the given range.
114
+
115
+ Samples are in the half open range [start_seconds, stop_seconds).
116
+
117
+ To decode all the samples from beginning to end, you can call this
118
+ method while leaving ``start_seconds`` and ``stop_seconds`` to their
119
+ default values, or use
120
+ :meth:`~torchcodec.decoders.AudioDecoder.get_all_samples` as a more
121
+ convenient alias.
122
+
123
+ Args:
124
+ start_seconds (float): Time, in seconds, of the start of the
125
+ range. Default: 0.
126
+ stop_seconds (float or None): Time, in seconds, of the end of the
127
+ range. As a half open range, the end is excluded. Default: None,
128
+ which decodes samples until the end.
129
+
130
+ Returns:
131
+ AudioSamples: The samples within the specified range.
132
+ """
133
+ if stop_seconds is not None and not start_seconds <= stop_seconds:
134
+ raise ValueError(
135
+ f"Invalid start seconds: {start_seconds}. It must be less than or equal to stop seconds ({stop_seconds})."
136
+ )
137
+ frames, first_pts = core.get_frames_by_pts_in_range_audio(
138
+ self._decoder,
139
+ start_seconds=start_seconds,
140
+ stop_seconds=stop_seconds,
141
+ )
142
+ first_pts = first_pts.item()
143
+
144
+ # x = frame boundaries
145
+ #
146
+ # first_pts last_pts
147
+ # v v
148
+ # ....x..........x..........x...........x..........x..........x.....
149
+ # ^ ^
150
+ # start_seconds stop_seconds
151
+ #
152
+ # We want to return the samples in [start_seconds, stop_seconds). But
153
+ # because the core API is based on frames, the `frames` tensor contains
154
+ # the samples in [first_pts, last_pts)
155
+ # So we do some basic math to figure out the position of the view that
156
+ # we'll return.
157
+
158
+ sample_rate = self._desired_sample_rate
159
+ # TODO: metadata's sample_rate should probably not be Optional
160
+ assert sample_rate is not None # mypy.
161
+
162
+ if first_pts < start_seconds:
163
+ offset_beginning = round((start_seconds - first_pts) * sample_rate)
164
+ output_pts_seconds = start_seconds
165
+ else:
166
+ # In normal cases we'll have first_pts <= start_pts, but in some
167
+ # edge cases it's possible to have first_pts > start_seconds,
168
+ # typically if the stream's first frame's pts isn't exactly 0.
169
+ offset_beginning = 0
170
+ output_pts_seconds = first_pts
171
+
172
+ num_samples = frames.shape[1]
173
+ last_pts = first_pts + num_samples / sample_rate
174
+ if stop_seconds is not None and stop_seconds < last_pts:
175
+ offset_end = num_samples - round((last_pts - stop_seconds) * sample_rate)
176
+ else:
177
+ offset_end = num_samples
178
+
179
+ data = frames[:, offset_beginning:offset_end]
180
+ return AudioSamples(
181
+ data=data,
182
+ pts_seconds=output_pts_seconds,
183
+ duration_seconds=data.shape[1] / sample_rate,
184
+ sample_rate=sample_rate,
185
+ )
@@ -0,0 +1,113 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+
8
+ import contextvars
9
+ import io
10
+
11
+ from collections.abc import Generator
12
+ from contextlib import contextmanager
13
+ from pathlib import Path
14
+
15
+ from torch import Tensor
16
+ from torchcodec import _core as core
17
+
18
+ ERROR_REPORTING_INSTRUCTIONS = """
19
+ This should never happen. Please report an issue following the steps in
20
+ https://github.com/pytorch/torchcodec/issues/new?assignees=&labels=&projects=&template=bug-report.yml.
21
+ """
22
+
23
+
24
+ def create_decoder(
25
+ *,
26
+ source: str | Path | io.RawIOBase | io.BufferedReader | bytes | Tensor,
27
+ seek_mode: str,
28
+ ) -> Tensor:
29
+ if isinstance(source, str):
30
+ return core.create_from_file(source, seek_mode)
31
+ elif isinstance(source, Path):
32
+ return core.create_from_file(str(source), seek_mode)
33
+ elif isinstance(source, io.RawIOBase) or isinstance(source, io.BufferedReader):
34
+ return core.create_from_file_like(source, seek_mode)
35
+ elif isinstance(source, bytes):
36
+ return core.create_from_bytes(source, seek_mode)
37
+ elif isinstance(source, Tensor):
38
+ return core.create_from_tensor(source, seek_mode)
39
+ elif isinstance(source, io.TextIOBase):
40
+ raise TypeError(
41
+ "source is for reading text, likely from open(..., 'r'). Try with 'rb' for binary reading?"
42
+ )
43
+ elif hasattr(source, "read") and hasattr(source, "seek"):
44
+ # This check must be after checking for text-based reading. Also placing
45
+ # it last in general to be defensive: hasattr is a blunt instrument. We
46
+ # could use the inspect module to check for methods with the right
47
+ # signature.
48
+ return core.create_from_file_like(source, seek_mode)
49
+
50
+ raise TypeError(
51
+ f"Unknown source type: {type(source)}. "
52
+ "Supported types are str, Path, bytes, Tensor and file-like objects with "
53
+ "read(self, size: int) -> bytes and "
54
+ "seek(self, offset: int, whence: int) -> int methods."
55
+ )
56
+
57
+
58
+ # Thread-local and async-safe storage for the current CUDA backend
59
+ _CUDA_BACKEND: contextvars.ContextVar[str] = contextvars.ContextVar(
60
+ "_CUDA_BACKEND", default="ffmpeg"
61
+ )
62
+
63
+
64
+ @contextmanager
65
+ def set_cuda_backend(backend: str) -> Generator[None, None, None]:
66
+ """Context Manager to set the CUDA backend for :class:`~torchcodec.decoders.VideoDecoder`.
67
+
68
+ This context manager allows you to specify which CUDA backend implementation
69
+ to use when creating :class:`~torchcodec.decoders.VideoDecoder` instances
70
+ with CUDA devices.
71
+
72
+ .. note::
73
+ **We recommend trying the "beta" backend instead of the default "ffmpeg"
74
+ backend!** The beta backend is faster, and will eventually become the
75
+ default in future versions. It may have rough edges that we'll polish
76
+ over time, but it's already quite stable and ready for adoption. Let us
77
+ know what you think!
78
+
79
+ Only the creation of the decoder needs to be inside the context manager, the
80
+ decoding methods can be called outside of it. You still need to pass
81
+ ``device="cuda"`` when creating the
82
+ :class:`~torchcodec.decoders.VideoDecoder` instance. If a CUDA device isn't
83
+ specified, this context manager will have no effect. See example below.
84
+
85
+ This is thread-safe and async-safe.
86
+
87
+ Args:
88
+ backend (str): The CUDA backend to use. Can be "ffmpeg" (default) or
89
+ "beta". We recommend trying "beta" as it's faster!
90
+
91
+ Example:
92
+ >>> with set_cuda_backend("beta"):
93
+ ... decoder = VideoDecoder("video.mp4", device="cuda")
94
+ ...
95
+ ... # Only the decoder creation needs to be part of the context manager.
96
+ ... # Decoder will now the beta CUDA implementation:
97
+ ... decoder.get_frame_at(0)
98
+ """
99
+ backend = backend.lower()
100
+ if backend not in ("ffmpeg", "beta"):
101
+ raise ValueError(
102
+ f"Invalid CUDA backend ({backend}). Supported values are 'ffmpeg' and 'beta'."
103
+ )
104
+
105
+ previous_state = _CUDA_BACKEND.set(backend)
106
+ try:
107
+ yield
108
+ finally:
109
+ _CUDA_BACKEND.reset(previous_state)
110
+
111
+
112
+ def _get_cuda_backend() -> str:
113
+ return _CUDA_BACKEND.get()