torchcodec 0.3.0__cp313-cp313-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (57) hide show
  1. torchcodec/__init__.py +16 -0
  2. torchcodec/_core/AVIOBytesContext.cpp +70 -0
  3. torchcodec/_core/AVIOBytesContext.h +32 -0
  4. torchcodec/_core/AVIOContextHolder.cpp +50 -0
  5. torchcodec/_core/AVIOContextHolder.h +65 -0
  6. torchcodec/_core/AVIOFileLikeContext.cpp +80 -0
  7. torchcodec/_core/AVIOFileLikeContext.h +54 -0
  8. torchcodec/_core/CMakeLists.txt +237 -0
  9. torchcodec/_core/CudaDeviceInterface.cpp +289 -0
  10. torchcodec/_core/CudaDeviceInterface.h +34 -0
  11. torchcodec/_core/DeviceInterface.cpp +88 -0
  12. torchcodec/_core/DeviceInterface.h +66 -0
  13. torchcodec/_core/Encoder.cpp +319 -0
  14. torchcodec/_core/Encoder.h +39 -0
  15. torchcodec/_core/FFMPEGCommon.cpp +264 -0
  16. torchcodec/_core/FFMPEGCommon.h +180 -0
  17. torchcodec/_core/Frame.h +47 -0
  18. torchcodec/_core/Metadata.h +70 -0
  19. torchcodec/_core/SingleStreamDecoder.cpp +1947 -0
  20. torchcodec/_core/SingleStreamDecoder.h +462 -0
  21. torchcodec/_core/StreamOptions.h +49 -0
  22. torchcodec/_core/__init__.py +39 -0
  23. torchcodec/_core/_metadata.py +277 -0
  24. torchcodec/_core/custom_ops.cpp +681 -0
  25. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +226 -0
  26. torchcodec/_core/ops.py +381 -0
  27. torchcodec/_core/pybind_ops.cpp +45 -0
  28. torchcodec/_frame.py +145 -0
  29. torchcodec/_internally_replaced_utils.py +53 -0
  30. torchcodec/_samplers/__init__.py +7 -0
  31. torchcodec/_samplers/video_clip_sampler.py +430 -0
  32. torchcodec/decoders/__init__.py +11 -0
  33. torchcodec/decoders/_audio_decoder.py +168 -0
  34. torchcodec/decoders/_decoder_utils.py +52 -0
  35. torchcodec/decoders/_video_decoder.py +399 -0
  36. torchcodec/libtorchcodec_custom_ops4.so +0 -0
  37. torchcodec/libtorchcodec_custom_ops5.so +0 -0
  38. torchcodec/libtorchcodec_custom_ops6.so +0 -0
  39. torchcodec/libtorchcodec_custom_ops7.so +0 -0
  40. torchcodec/libtorchcodec_decoder4.so +0 -0
  41. torchcodec/libtorchcodec_decoder5.so +0 -0
  42. torchcodec/libtorchcodec_decoder6.so +0 -0
  43. torchcodec/libtorchcodec_decoder7.so +0 -0
  44. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  45. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  46. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  47. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  48. torchcodec/samplers/__init__.py +2 -0
  49. torchcodec/samplers/_common.py +84 -0
  50. torchcodec/samplers/_index_based.py +285 -0
  51. torchcodec/samplers/_time_based.py +348 -0
  52. torchcodec/version.py +2 -0
  53. torchcodec-0.3.0.dist-info/LICENSE +28 -0
  54. torchcodec-0.3.0.dist-info/METADATA +280 -0
  55. torchcodec-0.3.0.dist-info/RECORD +57 -0
  56. torchcodec-0.3.0.dist-info/WHEEL +5 -0
  57. torchcodec-0.3.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,277 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import dataclasses
8
+ import json
9
+ import pathlib
10
+ from dataclasses import dataclass
11
+ from typing import List, Optional, Union
12
+
13
+ import torch
14
+
15
+ from torchcodec._core.ops import (
16
+ _get_container_json_metadata,
17
+ _get_stream_json_metadata,
18
+ create_from_file,
19
+ )
20
+
21
+
22
+ SPACES = " "
23
+
24
+
25
+ @dataclass
26
+ class StreamMetadata:
27
+ duration_seconds_from_header: Optional[float]
28
+ """Duration of the stream, in seconds, obtained from the header (float or
29
+ None). This could be inaccurate."""
30
+ begin_stream_seconds_from_header: Optional[float]
31
+ """Beginning of the stream, in seconds, obtained from the header (float or
32
+ None). Usually, this is equal to 0."""
33
+ bit_rate: Optional[float]
34
+ """Bit rate of the stream, in seconds (float or None)."""
35
+ codec: Optional[str]
36
+ """Codec (str or None)."""
37
+ stream_index: int
38
+ """Index of the stream that this metadata refers to (int)."""
39
+
40
+ def __repr__(self):
41
+ s = self.__class__.__name__ + ":\n"
42
+ for field in dataclasses.fields(self):
43
+ s += f"{SPACES}{field.name}: {getattr(self, field.name)}\n"
44
+ return s
45
+
46
+
47
+ @dataclass
48
+ class VideoStreamMetadata(StreamMetadata):
49
+ """Metadata of a single video stream."""
50
+
51
+ begin_stream_seconds_from_content: Optional[float]
52
+ """Beginning of the stream, in seconds (float or None).
53
+ Conceptually, this corresponds to the first frame's :term:`pts`. It is only
54
+ computed when a :term:`scan` is done as min(frame.pts) across all frames in
55
+ the stream. Usually, this is equal to 0."""
56
+ end_stream_seconds_from_content: Optional[float]
57
+ """End of the stream, in seconds (float or None).
58
+ Conceptually, this corresponds to last_frame.pts + last_frame.duration. It
59
+ is only computed when a :term:`scan` is done as max(frame.pts +
60
+ frame.duration) across all frames in the stream. Note that no frame is
61
+ played at this time value, so calling
62
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` with this
63
+ value would result in an error. Retrieving the last frame is best done by
64
+ simply indexing the :class:`~torchcodec.decoders.VideoDecoder` object with
65
+ ``[-1]``.
66
+ """
67
+ width: Optional[int]
68
+ """Width of the frames (int or None)."""
69
+ height: Optional[int]
70
+ """Height of the frames (int or None)."""
71
+ num_frames_from_header: Optional[int]
72
+ """Number of frames, from the stream's metadata. This is potentially
73
+ inaccurate. We recommend using the ``num_frames`` attribute instead.
74
+ (int or None)."""
75
+ num_frames_from_content: Optional[int]
76
+ """Number of frames computed by TorchCodec by scanning the stream's
77
+ content (the scan doesn't involve decoding). This is more accurate
78
+ than ``num_frames_from_header``. We recommend using the
79
+ ``num_frames`` attribute instead. (int or None)."""
80
+ average_fps_from_header: Optional[float]
81
+ """Averate fps of the stream, obtained from the header (float or None).
82
+ We recommend using the ``average_fps`` attribute instead."""
83
+
84
+ @property
85
+ def duration_seconds(self) -> Optional[float]:
86
+ """Duration of the stream in seconds. We try to calculate the duration
87
+ from the actual frames if a :term:`scan` was performed. Otherwise we
88
+ fall back to ``duration_seconds_from_header``.
89
+ """
90
+ if (
91
+ self.end_stream_seconds_from_content is None
92
+ or self.begin_stream_seconds_from_content is None
93
+ ):
94
+ return self.duration_seconds_from_header
95
+ return (
96
+ self.end_stream_seconds_from_content
97
+ - self.begin_stream_seconds_from_content
98
+ )
99
+
100
+ @property
101
+ def begin_stream_seconds(self) -> float:
102
+ """Beginning of the stream, in seconds (float). Conceptually, this
103
+ corresponds to the first frame's :term:`pts`. If
104
+ ``begin_stream_seconds_from_content`` is not None, then it is returned.
105
+ Otherwise, this value is 0.
106
+ """
107
+ if self.begin_stream_seconds_from_content is None:
108
+ return 0
109
+ else:
110
+ return self.begin_stream_seconds_from_content
111
+
112
+ @property
113
+ def end_stream_seconds(self) -> Optional[float]:
114
+ """End of the stream, in seconds (float or None).
115
+ Conceptually, this corresponds to last_frame.pts + last_frame.duration.
116
+ If ``end_stream_seconds_from_content`` is not None, then that value is
117
+ returned. Otherwise, returns ``duration_seconds``.
118
+ """
119
+ if self.end_stream_seconds_from_content is None:
120
+ return self.duration_seconds
121
+ else:
122
+ return self.end_stream_seconds_from_content
123
+
124
+ @property
125
+ def num_frames(self) -> Optional[int]:
126
+ """Number of frames in the stream. This corresponds to
127
+ ``num_frames_from_content`` if a :term:`scan` was made, otherwise it
128
+ corresponds to ``num_frames_from_header``.
129
+ """
130
+ if self.num_frames_from_content is not None:
131
+ return self.num_frames_from_content
132
+ else:
133
+ return self.num_frames_from_header
134
+
135
+ @property
136
+ def average_fps(self) -> Optional[float]:
137
+ """Average fps of the stream. If a :term:`scan` was perfomed, this is
138
+ computed from the number of frames and the duration of the stream.
139
+ Otherwise we fall back to ``average_fps_from_header``.
140
+ """
141
+ if (
142
+ self.end_stream_seconds_from_content is None
143
+ or self.begin_stream_seconds_from_content is None
144
+ or self.num_frames is None
145
+ ):
146
+ return self.average_fps_from_header
147
+ return self.num_frames / (
148
+ self.end_stream_seconds_from_content
149
+ - self.begin_stream_seconds_from_content
150
+ )
151
+
152
+ def __repr__(self):
153
+ s = super().__repr__()
154
+ s += f"{SPACES}duration_seconds: {self.duration_seconds}\n"
155
+ s += f"{SPACES}begin_stream_seconds: {self.begin_stream_seconds}\n"
156
+ s += f"{SPACES}end_stream_seconds: {self.end_stream_seconds}\n"
157
+ s += f"{SPACES}num_frames: {self.num_frames}\n"
158
+ s += f"{SPACES}average_fps: {self.average_fps}\n"
159
+ return s
160
+
161
+
162
+ @dataclass
163
+ class AudioStreamMetadata(StreamMetadata):
164
+ """Metadata of a single audio stream."""
165
+
166
+ sample_rate: Optional[int]
167
+ """The original sample rate."""
168
+ num_channels: Optional[int]
169
+ """The number of channels (1 for mono, 2 for stereo, etc.)"""
170
+ sample_format: Optional[str]
171
+ """The original sample format, as described by FFmpeg. E.g. 'fltp', 's32', etc."""
172
+
173
+ def __repr__(self):
174
+ return super().__repr__()
175
+
176
+
177
+ @dataclass
178
+ class ContainerMetadata:
179
+ duration_seconds_from_header: Optional[float]
180
+ bit_rate_from_header: Optional[float]
181
+ best_video_stream_index: Optional[int]
182
+ best_audio_stream_index: Optional[int]
183
+
184
+ streams: List[StreamMetadata]
185
+
186
+ @property
187
+ def duration_seconds(self) -> Optional[float]:
188
+ raise NotImplementedError("Decide on logic and implement this!")
189
+
190
+ @property
191
+ def bit_rate(self) -> Optional[float]:
192
+ raise NotImplementedError("Decide on logic and implement this!")
193
+
194
+ @property
195
+ def best_video_stream(self) -> VideoStreamMetadata:
196
+ if self.best_video_stream_index is None:
197
+ raise ValueError("The best video stream is unknown.")
198
+ metadata = self.streams[self.best_video_stream_index]
199
+ assert isinstance(metadata, VideoStreamMetadata) # mypy <3
200
+ return metadata
201
+
202
+ @property
203
+ def best_audio_stream(self) -> AudioStreamMetadata:
204
+ if self.best_audio_stream_index is None:
205
+ raise ValueError("The best audio stream is unknown.")
206
+ metadata = self.streams[self.best_audio_stream_index]
207
+ assert isinstance(metadata, AudioStreamMetadata) # mypy <3
208
+ return metadata
209
+
210
+
211
+ # TODO-AUDIO: This is user-facing. Should this just be `get_metadata`, without
212
+ # the "container" name in it? Same below.
213
+ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
214
+ """Return container metadata from a decoder.
215
+
216
+ The accuracy of the metadata and the availability of some returned fields
217
+ depends on whether a full scan was performed by the decoder.
218
+ """
219
+
220
+ container_dict = json.loads(_get_container_json_metadata(decoder))
221
+ streams_metadata: List[StreamMetadata] = []
222
+ for stream_index in range(container_dict["numStreams"]):
223
+ stream_dict = json.loads(_get_stream_json_metadata(decoder, stream_index))
224
+ common_meta = dict(
225
+ duration_seconds_from_header=stream_dict.get("durationSeconds"),
226
+ bit_rate=stream_dict.get("bitRate"),
227
+ begin_stream_seconds_from_header=stream_dict.get("beginStreamFromHeader"),
228
+ codec=stream_dict.get("codec"),
229
+ stream_index=stream_index,
230
+ )
231
+ if stream_dict["mediaType"] == "video":
232
+ streams_metadata.append(
233
+ VideoStreamMetadata(
234
+ begin_stream_seconds_from_content=stream_dict.get(
235
+ "minPtsSecondsFromScan"
236
+ ),
237
+ end_stream_seconds_from_content=stream_dict.get(
238
+ "maxPtsSecondsFromScan"
239
+ ),
240
+ width=stream_dict.get("width"),
241
+ height=stream_dict.get("height"),
242
+ num_frames_from_header=stream_dict.get("numFrames"),
243
+ num_frames_from_content=stream_dict.get("numFramesFromScan"),
244
+ average_fps_from_header=stream_dict.get("averageFps"),
245
+ **common_meta,
246
+ )
247
+ )
248
+ elif stream_dict["mediaType"] == "audio":
249
+ streams_metadata.append(
250
+ AudioStreamMetadata(
251
+ sample_rate=stream_dict.get("sampleRate"),
252
+ num_channels=stream_dict.get("numChannels"),
253
+ sample_format=stream_dict.get("sampleFormat"),
254
+ **common_meta,
255
+ )
256
+ )
257
+ else:
258
+ # This is neither a video nor audio stream. Could be e.g. subtitles.
259
+ # We still need to add a dummy entry so that len(streams_metadata)
260
+ # is consistent with the number of streams.
261
+ streams_metadata.append(StreamMetadata(**common_meta))
262
+
263
+ return ContainerMetadata(
264
+ duration_seconds_from_header=container_dict.get("durationSeconds"),
265
+ bit_rate_from_header=container_dict.get("bitRate"),
266
+ best_video_stream_index=container_dict.get("bestVideoStreamIndex"),
267
+ best_audio_stream_index=container_dict.get("bestAudioStreamIndex"),
268
+ streams=streams_metadata,
269
+ )
270
+
271
+
272
+ def get_container_metadata_from_header(
273
+ filename: Union[str, pathlib.Path]
274
+ ) -> ContainerMetadata:
275
+ return get_container_metadata(
276
+ create_from_file(str(filename), seek_mode="approximate")
277
+ )