torchcodec 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. torchcodec/__init__.py +27 -0
  2. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  3. torchcodec/_core/AVIOContextHolder.h +64 -0
  4. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  5. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  6. torchcodec/_core/AVIOTensorContext.cpp +130 -0
  7. torchcodec/_core/AVIOTensorContext.h +44 -0
  8. torchcodec/_core/BetaCudaDeviceInterface.cpp +849 -0
  9. torchcodec/_core/BetaCudaDeviceInterface.h +196 -0
  10. torchcodec/_core/CMakeLists.txt +295 -0
  11. torchcodec/_core/CUDACommon.cpp +330 -0
  12. torchcodec/_core/CUDACommon.h +51 -0
  13. torchcodec/_core/Cache.h +124 -0
  14. torchcodec/_core/CpuDeviceInterface.cpp +509 -0
  15. torchcodec/_core/CpuDeviceInterface.h +141 -0
  16. torchcodec/_core/CudaDeviceInterface.cpp +602 -0
  17. torchcodec/_core/CudaDeviceInterface.h +79 -0
  18. torchcodec/_core/DeviceInterface.cpp +117 -0
  19. torchcodec/_core/DeviceInterface.h +191 -0
  20. torchcodec/_core/Encoder.cpp +1054 -0
  21. torchcodec/_core/Encoder.h +192 -0
  22. torchcodec/_core/FFMPEGCommon.cpp +684 -0
  23. torchcodec/_core/FFMPEGCommon.h +314 -0
  24. torchcodec/_core/FilterGraph.cpp +159 -0
  25. torchcodec/_core/FilterGraph.h +59 -0
  26. torchcodec/_core/Frame.cpp +47 -0
  27. torchcodec/_core/Frame.h +72 -0
  28. torchcodec/_core/Metadata.cpp +124 -0
  29. torchcodec/_core/Metadata.h +92 -0
  30. torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
  31. torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
  32. torchcodec/_core/NVDECCache.cpp +60 -0
  33. torchcodec/_core/NVDECCache.h +102 -0
  34. torchcodec/_core/SingleStreamDecoder.cpp +1586 -0
  35. torchcodec/_core/SingleStreamDecoder.h +391 -0
  36. torchcodec/_core/StreamOptions.h +70 -0
  37. torchcodec/_core/Transform.cpp +128 -0
  38. torchcodec/_core/Transform.h +86 -0
  39. torchcodec/_core/ValidationUtils.cpp +35 -0
  40. torchcodec/_core/ValidationUtils.h +21 -0
  41. torchcodec/_core/__init__.py +46 -0
  42. torchcodec/_core/_metadata.py +262 -0
  43. torchcodec/_core/custom_ops.cpp +1090 -0
  44. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +169 -0
  45. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  46. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  47. torchcodec/_core/ops.py +605 -0
  48. torchcodec/_core/pybind_ops.cpp +50 -0
  49. torchcodec/_frame.py +146 -0
  50. torchcodec/_internally_replaced_utils.py +68 -0
  51. torchcodec/_samplers/__init__.py +7 -0
  52. torchcodec/_samplers/video_clip_sampler.py +419 -0
  53. torchcodec/decoders/__init__.py +12 -0
  54. torchcodec/decoders/_audio_decoder.py +185 -0
  55. torchcodec/decoders/_decoder_utils.py +113 -0
  56. torchcodec/decoders/_video_decoder.py +601 -0
  57. torchcodec/encoders/__init__.py +2 -0
  58. torchcodec/encoders/_audio_encoder.py +149 -0
  59. torchcodec/encoders/_video_encoder.py +196 -0
  60. torchcodec/libtorchcodec_core4.so +0 -0
  61. torchcodec/libtorchcodec_core5.so +0 -0
  62. torchcodec/libtorchcodec_core6.so +0 -0
  63. torchcodec/libtorchcodec_core7.so +0 -0
  64. torchcodec/libtorchcodec_core8.so +0 -0
  65. torchcodec/libtorchcodec_custom_ops4.so +0 -0
  66. torchcodec/libtorchcodec_custom_ops5.so +0 -0
  67. torchcodec/libtorchcodec_custom_ops6.so +0 -0
  68. torchcodec/libtorchcodec_custom_ops7.so +0 -0
  69. torchcodec/libtorchcodec_custom_ops8.so +0 -0
  70. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  71. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  72. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  73. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  74. torchcodec/libtorchcodec_pybind_ops8.so +0 -0
  75. torchcodec/samplers/__init__.py +2 -0
  76. torchcodec/samplers/_common.py +84 -0
  77. torchcodec/samplers/_index_based.py +287 -0
  78. torchcodec/samplers/_time_based.py +358 -0
  79. torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake +76 -0
  80. torchcodec/share/cmake/TorchCodec/ffmpeg_versions.cmake +122 -0
  81. torchcodec/transforms/__init__.py +12 -0
  82. torchcodec/transforms/_decoder_transforms.py +375 -0
  83. torchcodec/version.py +2 -0
  84. torchcodec-0.10.0.dist-info/METADATA +286 -0
  85. torchcodec-0.10.0.dist-info/RECORD +88 -0
  86. torchcodec-0.10.0.dist-info/WHEEL +5 -0
  87. torchcodec-0.10.0.dist-info/licenses/LICENSE +28 -0
  88. torchcodec-0.10.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,35 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "ValidationUtils.h"
8
+ #include <limits>
9
+ #include "c10/util/Exception.h"
10
+
11
+ namespace facebook::torchcodec {
12
+
13
+ int validateInt64ToInt(int64_t value, const std::string& parameterName) {
14
+ TORCH_CHECK(
15
+ value >= std::numeric_limits<int>::min() &&
16
+ value <= std::numeric_limits<int>::max(),
17
+ parameterName,
18
+ "=",
19
+ value,
20
+ " is out of range for int type.");
21
+
22
+ return static_cast<int>(value);
23
+ }
24
+
25
+ std::optional<int> validateOptionalInt64ToInt(
26
+ const std::optional<int64_t>& value,
27
+ const std::string& parameterName) {
28
+ if (value.has_value()) {
29
+ return validateInt64ToInt(value.value(), parameterName);
30
+ } else {
31
+ return std::nullopt;
32
+ }
33
+ }
34
+
35
+ } // namespace facebook::torchcodec
@@ -0,0 +1,21 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <cstdint>
10
+ #include <optional>
11
+ #include <string>
12
+
13
+ namespace facebook::torchcodec {
14
+
15
+ int validateInt64ToInt(int64_t value, const std::string& parameterName);
16
+
17
+ std::optional<int> validateOptionalInt64ToInt(
18
+ const std::optional<int64_t>& value,
19
+ const std::string& parameterName);
20
+
21
+ } // namespace facebook::torchcodec
@@ -0,0 +1,46 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+
8
+ from ._metadata import (
9
+ AudioStreamMetadata,
10
+ ContainerMetadata,
11
+ get_container_metadata,
12
+ get_container_metadata_from_header,
13
+ VideoStreamMetadata,
14
+ )
15
+ from .ops import (
16
+ _add_video_stream,
17
+ _get_backend_details,
18
+ _get_key_frame_indices,
19
+ _test_frame_pts_equality,
20
+ add_audio_stream,
21
+ add_video_stream,
22
+ core_library_path,
23
+ create_from_bytes,
24
+ create_from_file,
25
+ create_from_file_like,
26
+ create_from_tensor,
27
+ encode_audio_to_file,
28
+ encode_audio_to_file_like,
29
+ encode_audio_to_tensor,
30
+ encode_video_to_file,
31
+ encode_video_to_file_like,
32
+ encode_video_to_tensor,
33
+ ffmpeg_major_version,
34
+ get_ffmpeg_library_versions,
35
+ get_frame_at_index,
36
+ get_frame_at_pts,
37
+ get_frames_at_indices,
38
+ get_frames_by_pts,
39
+ get_frames_by_pts_in_range,
40
+ get_frames_by_pts_in_range_audio,
41
+ get_frames_in_range,
42
+ get_json_metadata,
43
+ get_next_frame,
44
+ scan_all_streams_to_update_metadata,
45
+ seek_to_pts,
46
+ )
@@ -0,0 +1,262 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+
8
+ import dataclasses
9
+ import json
10
+ import pathlib
11
+ from dataclasses import dataclass
12
+ from fractions import Fraction
13
+
14
+ import torch
15
+
16
+ from torchcodec._core.ops import (
17
+ _get_container_json_metadata,
18
+ _get_stream_json_metadata,
19
+ create_from_file,
20
+ )
21
+
22
+
23
+ SPACES = " "
24
+
25
+
26
+ @dataclass
27
+ class StreamMetadata:
28
+ duration_seconds_from_header: float | None
29
+ """Duration of the stream, in seconds, obtained from the header (float or
30
+ None). This could be inaccurate."""
31
+ begin_stream_seconds_from_header: float | None
32
+ """Beginning of the stream, in seconds, obtained from the header (float or
33
+ None). Usually, this is equal to 0."""
34
+ bit_rate: float | None
35
+ """Bit rate of the stream, in seconds (float or None)."""
36
+ codec: str | None
37
+ """Codec (str or None)."""
38
+ stream_index: int
39
+ """Index of the stream that this metadata refers to (int)."""
40
+
41
+ # Computed fields (computed in C++ with fallback logic)
42
+ duration_seconds: float | None
43
+ """Duration of the stream in seconds. We try to calculate the duration
44
+ from the actual frames if a :term:`scan` was performed. Otherwise we
45
+ fall back to ``duration_seconds_from_header``. If that value is also None,
46
+ we instead calculate the duration from ``num_frames_from_header`` and
47
+ ``average_fps_from_header``. If all of those are unavailable, we fall back
48
+ to the container-level ``duration_seconds_from_header``.
49
+ """
50
+ begin_stream_seconds: float | None
51
+ """Beginning of the stream, in seconds (float). Conceptually, this
52
+ corresponds to the first frame's :term:`pts`. If a :term:`scan` was performed
53
+ and ``begin_stream_seconds_from_content`` is not None, then it is returned.
54
+ Otherwise, this value is 0.
55
+ """
56
+
57
+ def __repr__(self):
58
+ s = self.__class__.__name__ + ":\n"
59
+ for field in dataclasses.fields(self):
60
+ s += f"{SPACES}{field.name}: {getattr(self, field.name)}\n"
61
+ return s
62
+
63
+
64
+ @dataclass
65
+ class VideoStreamMetadata(StreamMetadata):
66
+ """Metadata of a single video stream."""
67
+
68
+ begin_stream_seconds_from_content: float | None
69
+ """Beginning of the stream, in seconds (float or None).
70
+ Conceptually, this corresponds to the first frame's :term:`pts`. It is only
71
+ computed when a :term:`scan` is done as min(frame.pts) across all frames in
72
+ the stream. Usually, this is equal to 0."""
73
+ end_stream_seconds_from_content: float | None
74
+ """End of the stream, in seconds (float or None).
75
+ Conceptually, this corresponds to last_frame.pts + last_frame.duration. It
76
+ is only computed when a :term:`scan` is done as max(frame.pts +
77
+ frame.duration) across all frames in the stream. Note that no frame is
78
+ played at this time value, so calling
79
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` with this
80
+ value would result in an error. Retrieving the last frame is best done by
81
+ simply indexing the :class:`~torchcodec.decoders.VideoDecoder` object with
82
+ ``[-1]``.
83
+ """
84
+ width: int | None
85
+ """Width of the frames (int or None)."""
86
+ height: int | None
87
+ """Height of the frames (int or None)."""
88
+ num_frames_from_header: int | None
89
+ """Number of frames, from the stream's metadata. This is potentially
90
+ inaccurate. We recommend using the ``num_frames`` attribute instead.
91
+ (int or None)."""
92
+ num_frames_from_content: int | None
93
+ """Number of frames computed by TorchCodec by scanning the stream's
94
+ content (the scan doesn't involve decoding). This is more accurate
95
+ than ``num_frames_from_header``. We recommend using the
96
+ ``num_frames`` attribute instead. (int or None)."""
97
+ average_fps_from_header: float | None
98
+ """Averate fps of the stream, obtained from the header (float or None).
99
+ We recommend using the ``average_fps`` attribute instead."""
100
+ pixel_aspect_ratio: Fraction | None
101
+ """Pixel Aspect Ratio (PAR), also known as Sample Aspect Ratio
102
+ (SAR --- not to be confused with Storage Aspect Ratio, also SAR),
103
+ is the ratio between the width and height of each pixel
104
+ (``fractions.Fraction`` or None)."""
105
+
106
+ # Computed fields (computed in C++ with fallback logic)
107
+ end_stream_seconds: float | None
108
+ """End of the stream, in seconds (float or None).
109
+ Conceptually, this corresponds to last_frame.pts + last_frame.duration.
110
+ If :term:`scan` was performed and``end_stream_seconds_from_content`` is not None, then that value is
111
+ returned. Otherwise, returns ``duration_seconds``.
112
+ """
113
+ num_frames: int | None
114
+ """Number of frames in the stream (int or None).
115
+ This corresponds to ``num_frames_from_content`` if a :term:`scan` was made,
116
+ otherwise it corresponds to ``num_frames_from_header``. If that value is also
117
+ None, the number of frames is calculated from the duration and the average fps.
118
+ """
119
+ average_fps: float | None
120
+ """Average fps of the stream. If a :term:`scan` was perfomed, this is
121
+ computed from the number of frames and the duration of the stream.
122
+ Otherwise we fall back to ``average_fps_from_header``.
123
+ """
124
+
125
+ def __repr__(self):
126
+ return super().__repr__()
127
+
128
+
129
+ @dataclass
130
+ class AudioStreamMetadata(StreamMetadata):
131
+ """Metadata of a single audio stream."""
132
+
133
+ sample_rate: int | None
134
+ """The original sample rate."""
135
+ num_channels: int | None
136
+ """The number of channels (1 for mono, 2 for stereo, etc.)"""
137
+ sample_format: str | None
138
+ """The original sample format, as described by FFmpeg. E.g. 'fltp', 's32', etc."""
139
+
140
+ def __repr__(self):
141
+ return super().__repr__()
142
+
143
+
144
+ @dataclass
145
+ class ContainerMetadata:
146
+ duration_seconds_from_header: float | None
147
+ bit_rate_from_header: float | None
148
+ best_video_stream_index: int | None
149
+ best_audio_stream_index: int | None
150
+
151
+ streams: list[StreamMetadata]
152
+
153
+ @property
154
+ def duration_seconds(self) -> float | None:
155
+ raise NotImplementedError("Decide on logic and implement this!")
156
+
157
+ @property
158
+ def bit_rate(self) -> float | None:
159
+ raise NotImplementedError("Decide on logic and implement this!")
160
+
161
+ @property
162
+ def best_video_stream(self) -> VideoStreamMetadata:
163
+ if self.best_video_stream_index is None:
164
+ raise ValueError("The best video stream is unknown.")
165
+ metadata = self.streams[self.best_video_stream_index]
166
+ assert isinstance(metadata, VideoStreamMetadata) # mypy <3
167
+ return metadata
168
+
169
+ @property
170
+ def best_audio_stream(self) -> AudioStreamMetadata:
171
+ if self.best_audio_stream_index is None:
172
+ raise ValueError("The best audio stream is unknown.")
173
+ metadata = self.streams[self.best_audio_stream_index]
174
+ assert isinstance(metadata, AudioStreamMetadata) # mypy <3
175
+ return metadata
176
+
177
+
178
+ def _get_optional_par_fraction(stream_dict):
179
+ try:
180
+ return Fraction(
181
+ stream_dict["sampleAspectRatioNum"],
182
+ stream_dict["sampleAspectRatioDen"],
183
+ )
184
+ except KeyError:
185
+ return None
186
+
187
+
188
+ # TODO-AUDIO: This is user-facing. Should this just be `get_metadata`, without
189
+ # the "container" name in it? Same below.
190
+ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
191
+ """Return container metadata from a decoder.
192
+
193
+ The accuracy of the metadata and the availability of some returned fields
194
+ depends on whether a full scan was performed by the decoder.
195
+ """
196
+
197
+ container_dict = json.loads(_get_container_json_metadata(decoder))
198
+ streams_metadata: list[StreamMetadata] = []
199
+ for stream_index in range(container_dict["numStreams"]):
200
+ stream_dict = json.loads(_get_stream_json_metadata(decoder, stream_index))
201
+ common_meta = dict(
202
+ duration_seconds_from_header=stream_dict.get("durationSecondsFromHeader"),
203
+ duration_seconds=stream_dict.get("durationSeconds"),
204
+ bit_rate=stream_dict.get("bitRate"),
205
+ begin_stream_seconds_from_header=stream_dict.get(
206
+ "beginStreamSecondsFromHeader"
207
+ ),
208
+ begin_stream_seconds=stream_dict.get("beginStreamSeconds"),
209
+ codec=stream_dict.get("codec"),
210
+ stream_index=stream_index,
211
+ )
212
+ if stream_dict["mediaType"] == "video":
213
+ streams_metadata.append(
214
+ VideoStreamMetadata(
215
+ begin_stream_seconds_from_content=stream_dict.get(
216
+ "beginStreamSecondsFromContent"
217
+ ),
218
+ end_stream_seconds_from_content=stream_dict.get(
219
+ "endStreamSecondsFromContent"
220
+ ),
221
+ end_stream_seconds=stream_dict.get("endStreamSeconds"),
222
+ num_frames=stream_dict.get("numFrames"),
223
+ average_fps=stream_dict.get("averageFps"),
224
+ width=stream_dict.get("width"),
225
+ height=stream_dict.get("height"),
226
+ num_frames_from_header=stream_dict.get("numFramesFromHeader"),
227
+ num_frames_from_content=stream_dict.get("numFramesFromContent"),
228
+ average_fps_from_header=stream_dict.get("averageFpsFromHeader"),
229
+ pixel_aspect_ratio=_get_optional_par_fraction(stream_dict),
230
+ **common_meta,
231
+ )
232
+ )
233
+ elif stream_dict["mediaType"] == "audio":
234
+ streams_metadata.append(
235
+ AudioStreamMetadata(
236
+ sample_rate=stream_dict.get("sampleRate"),
237
+ num_channels=stream_dict.get("numChannels"),
238
+ sample_format=stream_dict.get("sampleFormat"),
239
+ **common_meta,
240
+ )
241
+ )
242
+ else:
243
+ # This is neither a video nor audio stream. Could be e.g. subtitles.
244
+ # We still need to add a dummy entry so that len(streams_metadata)
245
+ # is consistent with the number of streams.
246
+ streams_metadata.append(StreamMetadata(**common_meta))
247
+
248
+ return ContainerMetadata(
249
+ duration_seconds_from_header=container_dict.get("durationSecondsFromHeader"),
250
+ bit_rate_from_header=container_dict.get("bitRate"),
251
+ best_video_stream_index=container_dict.get("bestVideoStreamIndex"),
252
+ best_audio_stream_index=container_dict.get("bestAudioStreamIndex"),
253
+ streams=streams_metadata,
254
+ )
255
+
256
+
257
+ def get_container_metadata_from_header(
258
+ filename: str | pathlib.Path,
259
+ ) -> ContainerMetadata:
260
+ return get_container_metadata(
261
+ create_from_file(str(filename), seek_mode="approximate")
262
+ )