torchcodec 0.3.0__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (59) hide show
  1. torchcodec/.dylibs/libc++.1.0.dylib +0 -0
  2. torchcodec/.dylibs/libpython3.9.dylib +0 -0
  3. torchcodec/__init__.py +16 -0
  4. torchcodec/_core/AVIOBytesContext.cpp +70 -0
  5. torchcodec/_core/AVIOBytesContext.h +32 -0
  6. torchcodec/_core/AVIOContextHolder.cpp +50 -0
  7. torchcodec/_core/AVIOContextHolder.h +65 -0
  8. torchcodec/_core/AVIOFileLikeContext.cpp +80 -0
  9. torchcodec/_core/AVIOFileLikeContext.h +54 -0
  10. torchcodec/_core/CMakeLists.txt +237 -0
  11. torchcodec/_core/CudaDeviceInterface.cpp +289 -0
  12. torchcodec/_core/CudaDeviceInterface.h +34 -0
  13. torchcodec/_core/DeviceInterface.cpp +88 -0
  14. torchcodec/_core/DeviceInterface.h +66 -0
  15. torchcodec/_core/Encoder.cpp +319 -0
  16. torchcodec/_core/Encoder.h +39 -0
  17. torchcodec/_core/FFMPEGCommon.cpp +264 -0
  18. torchcodec/_core/FFMPEGCommon.h +180 -0
  19. torchcodec/_core/Frame.h +47 -0
  20. torchcodec/_core/Metadata.h +70 -0
  21. torchcodec/_core/SingleStreamDecoder.cpp +1947 -0
  22. torchcodec/_core/SingleStreamDecoder.h +462 -0
  23. torchcodec/_core/StreamOptions.h +49 -0
  24. torchcodec/_core/__init__.py +39 -0
  25. torchcodec/_core/_metadata.py +277 -0
  26. torchcodec/_core/custom_ops.cpp +681 -0
  27. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +226 -0
  28. torchcodec/_core/ops.py +381 -0
  29. torchcodec/_core/pybind_ops.cpp +45 -0
  30. torchcodec/_frame.py +145 -0
  31. torchcodec/_internally_replaced_utils.py +53 -0
  32. torchcodec/_samplers/__init__.py +7 -0
  33. torchcodec/_samplers/video_clip_sampler.py +430 -0
  34. torchcodec/decoders/__init__.py +11 -0
  35. torchcodec/decoders/_audio_decoder.py +168 -0
  36. torchcodec/decoders/_decoder_utils.py +52 -0
  37. torchcodec/decoders/_video_decoder.py +399 -0
  38. torchcodec/libtorchcodec_custom_ops4.dylib +0 -0
  39. torchcodec/libtorchcodec_custom_ops5.dylib +0 -0
  40. torchcodec/libtorchcodec_custom_ops6.dylib +0 -0
  41. torchcodec/libtorchcodec_custom_ops7.dylib +0 -0
  42. torchcodec/libtorchcodec_decoder4.dylib +0 -0
  43. torchcodec/libtorchcodec_decoder5.dylib +0 -0
  44. torchcodec/libtorchcodec_decoder6.dylib +0 -0
  45. torchcodec/libtorchcodec_decoder7.dylib +0 -0
  46. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  47. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  48. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  49. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  50. torchcodec/samplers/__init__.py +2 -0
  51. torchcodec/samplers/_common.py +84 -0
  52. torchcodec/samplers/_index_based.py +285 -0
  53. torchcodec/samplers/_time_based.py +348 -0
  54. torchcodec/version.py +2 -0
  55. torchcodec-0.3.0.dist-info/LICENSE +28 -0
  56. torchcodec-0.3.0.dist-info/METADATA +280 -0
  57. torchcodec-0.3.0.dist-info/RECORD +59 -0
  58. torchcodec-0.3.0.dist-info/WHEEL +5 -0
  59. torchcodec-0.3.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,168 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import io
8
+ from pathlib import Path
9
+ from typing import Optional, Union
10
+
11
+ from torch import Tensor
12
+
13
+ from torchcodec import _core as core, AudioSamples
14
+ from torchcodec.decoders._decoder_utils import (
15
+ create_decoder,
16
+ ERROR_REPORTING_INSTRUCTIONS,
17
+ )
18
+
19
+
20
+ class AudioDecoder:
21
+ """A single-stream audio decoder.
22
+
23
+ This can be used to decode audio from pure audio files (e.g. mp3, wav,
24
+ etc.), or from videos that contain audio streams (e.g. mp4 videos).
25
+
26
+ Returned samples are float samples normalized in [-1, 1]
27
+
28
+ Args:
29
+ source (str, ``Pathlib.path``, bytes, ``torch.Tensor`` or file-like object): The source of the video:
30
+
31
+ - If ``str``: a local path or a URL to a video or audio file.
32
+ - If ``Pathlib.path``: a path to a local video or audio file.
33
+ - If ``bytes`` object or ``torch.Tensor``: the raw encoded audio data.
34
+ - If file-like object: we read video data from the object on demand. The object must
35
+ expose the methods `read(self, size: int) -> bytes` and
36
+ `seek(self, offset: int, whence: int) -> bytes`. Read more in:
37
+ :ref:`sphx_glr_generated_examples_file_like.py`.
38
+ stream_index (int, optional): Specifies which stream in the file to decode samples from.
39
+ Note that this index is absolute across all media types. If left unspecified, then
40
+ the :term:`best stream` is used.
41
+ sample_rate (int, optional): The desired output sample rate of the decoded samples.
42
+ By default, the samples are returned in their original sample rate.
43
+
44
+ Attributes:
45
+ metadata (AudioStreamMetadata): Metadata of the audio stream.
46
+ stream_index (int): The stream index that this decoder is retrieving samples from. If a
47
+ stream index was provided at initialization, this is the same value. If it was left
48
+ unspecified, this is the :term:`best stream`.
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ source: Union[str, Path, io.RawIOBase, io.BufferedReader, bytes, Tensor],
54
+ *,
55
+ stream_index: Optional[int] = None,
56
+ sample_rate: Optional[int] = None,
57
+ ):
58
+ self._decoder = create_decoder(source=source, seek_mode="approximate")
59
+
60
+ core.add_audio_stream(
61
+ self._decoder, stream_index=stream_index, sample_rate=sample_rate
62
+ )
63
+
64
+ container_metadata = core.get_container_metadata(self._decoder)
65
+ self.stream_index = (
66
+ container_metadata.best_audio_stream_index
67
+ if stream_index is None
68
+ else stream_index
69
+ )
70
+ if self.stream_index is None:
71
+ raise ValueError(
72
+ "The best audio stream is unknown and there is no specified stream. "
73
+ + ERROR_REPORTING_INSTRUCTIONS
74
+ )
75
+ self.metadata = container_metadata.streams[self.stream_index]
76
+ assert isinstance(self.metadata, core.AudioStreamMetadata) # mypy
77
+
78
+ self._desired_sample_rate = (
79
+ sample_rate if sample_rate is not None else self.metadata.sample_rate
80
+ )
81
+
82
+ def get_all_samples(self) -> AudioSamples:
83
+ """Returns all the audio samples from the source.
84
+
85
+ To decode samples in a specific range, use
86
+ :meth:`~torchcodec.decoders.AudioDecoder.get_samples_played_in_range`.
87
+
88
+ Returns:
89
+ AudioSamples: The samples within the file.
90
+ """
91
+ return self.get_samples_played_in_range()
92
+
93
+ def get_samples_played_in_range(
94
+ self, start_seconds: float = 0.0, stop_seconds: Optional[float] = None
95
+ ) -> AudioSamples:
96
+ """Returns audio samples in the given range.
97
+
98
+ Samples are in the half open range [start_seconds, stop_seconds).
99
+
100
+ To decode all the samples from beginning to end, you can call this
101
+ method while leaving ``start_seconds`` and ``stop_seconds`` to their
102
+ default values, or use
103
+ :meth:`~torchcodec.decoders.AudioDecoder.get_all_samples` as a more
104
+ convenient alias.
105
+
106
+ Args:
107
+ start_seconds (float): Time, in seconds, of the start of the
108
+ range. Default: 0.
109
+ stop_seconds (float or None): Time, in seconds, of the end of the
110
+ range. As a half open range, the end is excluded. Default: None,
111
+ which decodes samples until the end.
112
+
113
+ Returns:
114
+ AudioSamples: The samples within the specified range.
115
+ """
116
+ if stop_seconds is not None and not start_seconds <= stop_seconds:
117
+ raise ValueError(
118
+ f"Invalid start seconds: {start_seconds}. It must be less than or equal to stop seconds ({stop_seconds})."
119
+ )
120
+ frames, first_pts = core.get_frames_by_pts_in_range_audio(
121
+ self._decoder,
122
+ start_seconds=start_seconds,
123
+ stop_seconds=stop_seconds,
124
+ )
125
+ first_pts = first_pts.item()
126
+
127
+ # x = frame boundaries
128
+ #
129
+ # first_pts last_pts
130
+ # v v
131
+ # ....x..........x..........x...........x..........x..........x.....
132
+ # ^ ^
133
+ # start_seconds stop_seconds
134
+ #
135
+ # We want to return the samples in [start_seconds, stop_seconds). But
136
+ # because the core API is based on frames, the `frames` tensor contains
137
+ # the samples in [first_pts, last_pts)
138
+ # So we do some basic math to figure out the position of the view that
139
+ # we'll return.
140
+
141
+ sample_rate = self._desired_sample_rate
142
+ # TODO: metadata's sample_rate should probably not be Optional
143
+ assert sample_rate is not None # mypy.
144
+
145
+ if first_pts < start_seconds:
146
+ offset_beginning = round((start_seconds - first_pts) * sample_rate)
147
+ output_pts_seconds = start_seconds
148
+ else:
149
+ # In normal cases we'll have first_pts <= start_pts, but in some
150
+ # edge cases it's possible to have first_pts > start_seconds,
151
+ # typically if the stream's first frame's pts isn't exactly 0.
152
+ offset_beginning = 0
153
+ output_pts_seconds = first_pts
154
+
155
+ num_samples = frames.shape[1]
156
+ last_pts = first_pts + num_samples / sample_rate
157
+ if stop_seconds is not None and stop_seconds < last_pts:
158
+ offset_end = num_samples - round((last_pts - stop_seconds) * sample_rate)
159
+ else:
160
+ offset_end = num_samples
161
+
162
+ data = frames[:, offset_beginning:offset_end]
163
+ return AudioSamples(
164
+ data=data,
165
+ pts_seconds=output_pts_seconds,
166
+ duration_seconds=data.shape[1] / sample_rate,
167
+ sample_rate=sample_rate,
168
+ )
@@ -0,0 +1,52 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import io
8
+ from pathlib import Path
9
+
10
+ from typing import Union
11
+
12
+ from torch import Tensor
13
+ from torchcodec import _core as core
14
+
15
+ ERROR_REPORTING_INSTRUCTIONS = """
16
+ This should never happen. Please report an issue following the steps in
17
+ https://github.com/pytorch/torchcodec/issues/new?assignees=&labels=&projects=&template=bug-report.yml.
18
+ """
19
+
20
+
21
+ def create_decoder(
22
+ *,
23
+ source: Union[str, Path, io.RawIOBase, io.BufferedReader, bytes, Tensor],
24
+ seek_mode: str,
25
+ ) -> Tensor:
26
+ if isinstance(source, str):
27
+ return core.create_from_file(source, seek_mode)
28
+ elif isinstance(source, Path):
29
+ return core.create_from_file(str(source), seek_mode)
30
+ elif isinstance(source, io.RawIOBase) or isinstance(source, io.BufferedReader):
31
+ return core.create_from_file_like(source, seek_mode)
32
+ elif isinstance(source, bytes):
33
+ return core.create_from_bytes(source, seek_mode)
34
+ elif isinstance(source, Tensor):
35
+ return core.create_from_tensor(source, seek_mode)
36
+ elif isinstance(source, io.TextIOBase):
37
+ raise TypeError(
38
+ "source is for reading text, likely from open(..., 'r'). Try with 'rb' for binary reading?"
39
+ )
40
+ elif hasattr(source, "read") and hasattr(source, "seek"):
41
+ # This check must be after checking for text-based reading. Also placing
42
+ # it last in general to be defensive: hasattr is a blunt instrument. We
43
+ # could use the inspect module to check for methods with the right
44
+ # signature.
45
+ return core.create_from_file_like(source, seek_mode)
46
+
47
+ raise TypeError(
48
+ f"Unknown source type: {type(source)}. "
49
+ "Supported types are str, Path, bytes, Tensor and file-like objects with "
50
+ "read(self, size: int) -> bytes and "
51
+ "seek(self, offset: int, whence: int) -> bytes methods."
52
+ )
@@ -0,0 +1,399 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import io
8
+ import numbers
9
+ from pathlib import Path
10
+ from typing import Literal, Optional, Tuple, Union
11
+
12
+ from torch import device as torch_device, Tensor
13
+
14
+ from torchcodec import _core as core, Frame, FrameBatch
15
+ from torchcodec.decoders._decoder_utils import (
16
+ create_decoder,
17
+ ERROR_REPORTING_INSTRUCTIONS,
18
+ )
19
+
20
+
21
+ class VideoDecoder:
22
+ """A single-stream video decoder.
23
+
24
+ Args:
25
+ source (str, ``Pathlib.path``, bytes, ``torch.Tensor`` or file-like object): The source of the video:
26
+
27
+ - If ``str``: a local path or a URL to a video file.
28
+ - If ``Pathlib.path``: a path to a local video file.
29
+ - If ``bytes`` object or ``torch.Tensor``: the raw encoded video data.
30
+ - If file-like object: we read video data from the object on demand. The object must
31
+ expose the methods `read(self, size: int) -> bytes` and
32
+ `seek(self, offset: int, whence: int) -> bytes`. Read more in:
33
+ :ref:`sphx_glr_generated_examples_file_like.py`.
34
+ stream_index (int, optional): Specifies which stream in the video to decode frames from.
35
+ Note that this index is absolute across all media types. If left unspecified, then
36
+ the :term:`best stream` is used.
37
+ dimension_order(str, optional): The dimension order of the decoded frames.
38
+ This can be either "NCHW" (default) or "NHWC", where N is the batch
39
+ size, C is the number of channels, H is the height, and W is the
40
+ width of the frames.
41
+ .. note::
42
+
43
+ Frames are natively decoded in NHWC format by the underlying
44
+ FFmpeg implementation. Converting those into NCHW format is a
45
+ cheap no-copy operation that allows these frames to be
46
+ transformed using the `torchvision transforms
47
+ <https://pytorch.org/vision/stable/transforms.html>`_.
48
+ num_ffmpeg_threads (int, optional): The number of threads to use for decoding.
49
+ Use 1 for single-threaded decoding which may be best if you are running multiple
50
+ instances of ``VideoDecoder`` in parallel. Use a higher number for multi-threaded
51
+ decoding which is best if you are running a single instance of ``VideoDecoder``.
52
+ Passing 0 lets FFmpeg decide on the number of threads.
53
+ Default: 1.
54
+ device (str or torch.device, optional): The device to use for decoding. Default: "cpu".
55
+ seek_mode (str, optional): Determines if frame access will be "exact" or
56
+ "approximate". Exact guarantees that requesting frame i will always
57
+ return frame i, but doing so requires an initial :term:`scan` of the
58
+ file. Approximate is faster as it avoids scanning the file, but less
59
+ accurate as it uses the file's metadata to calculate where i
60
+ probably is. Default: "exact".
61
+ Read more about this parameter in:
62
+ :ref:`sphx_glr_generated_examples_approximate_mode.py`
63
+
64
+
65
+ Attributes:
66
+ metadata (VideoStreamMetadata): Metadata of the video stream.
67
+ stream_index (int): The stream index that this decoder is retrieving frames from. If a
68
+ stream index was provided at initialization, this is the same value. If it was left
69
+ unspecified, this is the :term:`best stream`.
70
+ """
71
+
72
+ def __init__(
73
+ self,
74
+ source: Union[str, Path, io.RawIOBase, io.BufferedReader, bytes, Tensor],
75
+ *,
76
+ stream_index: Optional[int] = None,
77
+ dimension_order: Literal["NCHW", "NHWC"] = "NCHW",
78
+ num_ffmpeg_threads: int = 1,
79
+ device: Optional[Union[str, torch_device]] = "cpu",
80
+ seek_mode: Literal["exact", "approximate"] = "exact",
81
+ ):
82
+ allowed_seek_modes = ("exact", "approximate")
83
+ if seek_mode not in allowed_seek_modes:
84
+ raise ValueError(
85
+ f"Invalid seek mode ({seek_mode}). "
86
+ f"Supported values are {', '.join(allowed_seek_modes)}."
87
+ )
88
+
89
+ self._decoder = create_decoder(source=source, seek_mode=seek_mode)
90
+
91
+ allowed_dimension_orders = ("NCHW", "NHWC")
92
+ if dimension_order not in allowed_dimension_orders:
93
+ raise ValueError(
94
+ f"Invalid dimension order ({dimension_order}). "
95
+ f"Supported values are {', '.join(allowed_dimension_orders)}."
96
+ )
97
+
98
+ if num_ffmpeg_threads is None:
99
+ raise ValueError(f"{num_ffmpeg_threads = } should be an int.")
100
+
101
+ if isinstance(device, torch_device):
102
+ device = str(device)
103
+
104
+ core.add_video_stream(
105
+ self._decoder,
106
+ stream_index=stream_index,
107
+ dimension_order=dimension_order,
108
+ num_threads=num_ffmpeg_threads,
109
+ device=device,
110
+ )
111
+
112
+ (
113
+ self.metadata,
114
+ self.stream_index,
115
+ self._begin_stream_seconds,
116
+ self._end_stream_seconds,
117
+ self._num_frames,
118
+ ) = _get_and_validate_stream_metadata(
119
+ decoder=self._decoder, stream_index=stream_index
120
+ )
121
+
122
+ def __len__(self) -> int:
123
+ return self._num_frames
124
+
125
+ def _getitem_int(self, key: int) -> Tensor:
126
+ assert isinstance(key, int)
127
+
128
+ if key < 0:
129
+ key += self._num_frames
130
+ if key >= self._num_frames or key < 0:
131
+ raise IndexError(
132
+ f"Index {key} is out of bounds; length is {self._num_frames}"
133
+ )
134
+
135
+ frame_data, *_ = core.get_frame_at_index(self._decoder, frame_index=key)
136
+ return frame_data
137
+
138
+ def _getitem_slice(self, key: slice) -> Tensor:
139
+ assert isinstance(key, slice)
140
+
141
+ start, stop, step = key.indices(len(self))
142
+ frame_data, *_ = core.get_frames_in_range(
143
+ self._decoder,
144
+ start=start,
145
+ stop=stop,
146
+ step=step,
147
+ )
148
+ return frame_data
149
+
150
+ def __getitem__(self, key: Union[numbers.Integral, slice]) -> Tensor:
151
+ """Return frame or frames as tensors, at the given index or range.
152
+
153
+ .. note::
154
+
155
+ If you need to decode multiple frames, we recommend using the batch
156
+ methods instead, since they are faster:
157
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_at`,
158
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_in_range`,
159
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_at`, and
160
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_in_range`.
161
+
162
+ Args:
163
+ key(int or slice): The index or range of frame(s) to retrieve.
164
+
165
+ Returns:
166
+ torch.Tensor: The frame or frames at the given index or range.
167
+ """
168
+ if isinstance(key, numbers.Integral):
169
+ return self._getitem_int(int(key))
170
+ elif isinstance(key, slice):
171
+ return self._getitem_slice(key)
172
+
173
+ raise TypeError(
174
+ f"Unsupported key type: {type(key)}. Supported types are int and slice."
175
+ )
176
+
177
+ def _get_key_frame_indices(self) -> list[int]:
178
+ return core._get_key_frame_indices(self._decoder)
179
+
180
+ def get_frame_at(self, index: int) -> Frame:
181
+ """Return a single frame at the given index.
182
+
183
+ .. note::
184
+
185
+ If you need to decode multiple frames, we recommend using the batch
186
+ methods instead, since they are faster:
187
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_at`,
188
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_in_range`,
189
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_at`,
190
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_in_range`.
191
+
192
+ Args:
193
+ index (int): The index of the frame to retrieve.
194
+
195
+ Returns:
196
+ Frame: The frame at the given index.
197
+ """
198
+
199
+ if not 0 <= index < self._num_frames:
200
+ raise IndexError(
201
+ f"Index {index} is out of bounds; must be in the range [0, {self._num_frames})."
202
+ )
203
+ data, pts_seconds, duration_seconds = core.get_frame_at_index(
204
+ self._decoder, frame_index=index
205
+ )
206
+ return Frame(
207
+ data=data,
208
+ pts_seconds=pts_seconds.item(),
209
+ duration_seconds=duration_seconds.item(),
210
+ )
211
+
212
+ def get_frames_at(self, indices: list[int]) -> FrameBatch:
213
+ """Return frames at the given indices.
214
+
215
+ Args:
216
+ indices (list of int): The indices of the frames to retrieve.
217
+
218
+ Returns:
219
+ FrameBatch: The frames at the given indices.
220
+ """
221
+
222
+ data, pts_seconds, duration_seconds = core.get_frames_at_indices(
223
+ self._decoder, frame_indices=indices
224
+ )
225
+ return FrameBatch(
226
+ data=data,
227
+ pts_seconds=pts_seconds,
228
+ duration_seconds=duration_seconds,
229
+ )
230
+
231
+ def get_frames_in_range(self, start: int, stop: int, step: int = 1) -> FrameBatch:
232
+ """Return multiple frames at the given index range.
233
+
234
+ Frames are in [start, stop).
235
+
236
+ Args:
237
+ start (int): Index of the first frame to retrieve.
238
+ stop (int): End of indexing range (exclusive, as per Python
239
+ conventions).
240
+ step (int, optional): Step size between frames. Default: 1.
241
+
242
+ Returns:
243
+ FrameBatch: The frames within the specified range.
244
+ """
245
+ if not 0 <= start < self._num_frames:
246
+ raise IndexError(
247
+ f"Start index {start} is out of bounds; must be in the range [0, {self._num_frames})."
248
+ )
249
+ if stop < start:
250
+ raise IndexError(
251
+ f"Stop index ({stop}) must not be less than the start index ({start})."
252
+ )
253
+ if not step > 0:
254
+ raise IndexError(f"Step ({step}) must be greater than 0.")
255
+ frames = core.get_frames_in_range(
256
+ self._decoder,
257
+ start=start,
258
+ stop=stop,
259
+ step=step,
260
+ )
261
+ return FrameBatch(*frames)
262
+
263
+ def get_frame_played_at(self, seconds: float) -> Frame:
264
+ """Return a single frame played at the given timestamp in seconds.
265
+
266
+ .. note::
267
+
268
+ If you need to decode multiple frames, we recommend using the batch
269
+ methods instead, since they are faster:
270
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_at`,
271
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_in_range`,
272
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_at`,
273
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_in_range`.
274
+
275
+ Args:
276
+ seconds (float): The time stamp in seconds when the frame is played.
277
+
278
+ Returns:
279
+ Frame: The frame that is played at ``seconds``.
280
+ """
281
+ if not self._begin_stream_seconds <= seconds < self._end_stream_seconds:
282
+ raise IndexError(
283
+ f"Invalid pts in seconds: {seconds}. "
284
+ f"It must be greater than or equal to {self._begin_stream_seconds} "
285
+ f"and less than {self._end_stream_seconds}."
286
+ )
287
+ data, pts_seconds, duration_seconds = core.get_frame_at_pts(
288
+ self._decoder, seconds
289
+ )
290
+ return Frame(
291
+ data=data,
292
+ pts_seconds=pts_seconds.item(),
293
+ duration_seconds=duration_seconds.item(),
294
+ )
295
+
296
+ def get_frames_played_at(self, seconds: list[float]) -> FrameBatch:
297
+ """Return frames played at the given timestamps in seconds.
298
+
299
+ Args:
300
+ seconds (list of float): The timestamps in seconds when the frames are played.
301
+
302
+ Returns:
303
+ FrameBatch: The frames that are played at ``seconds``.
304
+ """
305
+ data, pts_seconds, duration_seconds = core.get_frames_by_pts(
306
+ self._decoder, timestamps=seconds
307
+ )
308
+ return FrameBatch(
309
+ data=data,
310
+ pts_seconds=pts_seconds,
311
+ duration_seconds=duration_seconds,
312
+ )
313
+
314
+ def get_frames_played_in_range(
315
+ self, start_seconds: float, stop_seconds: float
316
+ ) -> FrameBatch:
317
+ """Returns multiple frames in the given range.
318
+
319
+ Frames are in the half open range [start_seconds, stop_seconds). Each
320
+ returned frame's :term:`pts`, in seconds, is inside of the half open
321
+ range.
322
+
323
+ Args:
324
+ start_seconds (float): Time, in seconds, of the start of the
325
+ range.
326
+ stop_seconds (float): Time, in seconds, of the end of the
327
+ range. As a half open range, the end is excluded.
328
+
329
+ Returns:
330
+ FrameBatch: The frames within the specified range.
331
+ """
332
+ if not start_seconds <= stop_seconds:
333
+ raise ValueError(
334
+ f"Invalid start seconds: {start_seconds}. It must be less than or equal to stop seconds ({stop_seconds})."
335
+ )
336
+ if not self._begin_stream_seconds <= start_seconds < self._end_stream_seconds:
337
+ raise ValueError(
338
+ f"Invalid start seconds: {start_seconds}. "
339
+ f"It must be greater than or equal to {self._begin_stream_seconds} "
340
+ f"and less than or equal to {self._end_stream_seconds}."
341
+ )
342
+ if not stop_seconds <= self._end_stream_seconds:
343
+ raise ValueError(
344
+ f"Invalid stop seconds: {stop_seconds}. "
345
+ f"It must be less than or equal to {self._end_stream_seconds}."
346
+ )
347
+ frames = core.get_frames_by_pts_in_range(
348
+ self._decoder,
349
+ start_seconds=start_seconds,
350
+ stop_seconds=stop_seconds,
351
+ )
352
+ return FrameBatch(*frames)
353
+
354
+
355
+ def _get_and_validate_stream_metadata(
356
+ *,
357
+ decoder: Tensor,
358
+ stream_index: Optional[int] = None,
359
+ ) -> Tuple[core._metadata.VideoStreamMetadata, int, float, float, int]:
360
+
361
+ container_metadata = core.get_container_metadata(decoder)
362
+
363
+ if stream_index is None:
364
+ if (stream_index := container_metadata.best_video_stream_index) is None:
365
+ raise ValueError(
366
+ "The best video stream is unknown and there is no specified stream. "
367
+ + ERROR_REPORTING_INSTRUCTIONS
368
+ )
369
+
370
+ metadata = container_metadata.streams[stream_index]
371
+ assert isinstance(metadata, core._metadata.VideoStreamMetadata) # mypy
372
+
373
+ if metadata.begin_stream_seconds is None:
374
+ raise ValueError(
375
+ "The minimum pts value in seconds is unknown. "
376
+ + ERROR_REPORTING_INSTRUCTIONS
377
+ )
378
+ begin_stream_seconds = metadata.begin_stream_seconds
379
+
380
+ if metadata.end_stream_seconds is None:
381
+ raise ValueError(
382
+ "The maximum pts value in seconds is unknown. "
383
+ + ERROR_REPORTING_INSTRUCTIONS
384
+ )
385
+ end_stream_seconds = metadata.end_stream_seconds
386
+
387
+ if metadata.num_frames is None:
388
+ raise ValueError(
389
+ "The number of frames is unknown. " + ERROR_REPORTING_INSTRUCTIONS
390
+ )
391
+ num_frames = metadata.num_frames
392
+
393
+ return (
394
+ metadata,
395
+ stream_index,
396
+ begin_stream_seconds,
397
+ end_stream_seconds,
398
+ num_frames,
399
+ )
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,2 @@
1
+ from ._index_based import clips_at_random_indices, clips_at_regular_indices
2
+ from ._time_based import clips_at_random_timestamps, clips_at_regular_timestamps