torchcodec 0.7.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (67) hide show
  1. torchcodec/__init__.py +16 -0
  2. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  3. torchcodec/_core/AVIOContextHolder.h +64 -0
  4. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  5. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  6. torchcodec/_core/AVIOTensorContext.cpp +123 -0
  7. torchcodec/_core/AVIOTensorContext.h +43 -0
  8. torchcodec/_core/CMakeLists.txt +292 -0
  9. torchcodec/_core/Cache.h +138 -0
  10. torchcodec/_core/CpuDeviceInterface.cpp +266 -0
  11. torchcodec/_core/CpuDeviceInterface.h +70 -0
  12. torchcodec/_core/CudaDeviceInterface.cpp +514 -0
  13. torchcodec/_core/CudaDeviceInterface.h +37 -0
  14. torchcodec/_core/DeviceInterface.cpp +79 -0
  15. torchcodec/_core/DeviceInterface.h +67 -0
  16. torchcodec/_core/Encoder.cpp +514 -0
  17. torchcodec/_core/Encoder.h +123 -0
  18. torchcodec/_core/FFMPEGCommon.cpp +421 -0
  19. torchcodec/_core/FFMPEGCommon.h +227 -0
  20. torchcodec/_core/FilterGraph.cpp +142 -0
  21. torchcodec/_core/FilterGraph.h +45 -0
  22. torchcodec/_core/Frame.cpp +32 -0
  23. torchcodec/_core/Frame.h +118 -0
  24. torchcodec/_core/Metadata.h +72 -0
  25. torchcodec/_core/SingleStreamDecoder.cpp +1715 -0
  26. torchcodec/_core/SingleStreamDecoder.h +380 -0
  27. torchcodec/_core/StreamOptions.h +53 -0
  28. torchcodec/_core/ValidationUtils.cpp +35 -0
  29. torchcodec/_core/ValidationUtils.h +21 -0
  30. torchcodec/_core/__init__.py +40 -0
  31. torchcodec/_core/_metadata.py +317 -0
  32. torchcodec/_core/custom_ops.cpp +727 -0
  33. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +300 -0
  34. torchcodec/_core/ops.py +455 -0
  35. torchcodec/_core/pybind_ops.cpp +87 -0
  36. torchcodec/_frame.py +145 -0
  37. torchcodec/_internally_replaced_utils.py +67 -0
  38. torchcodec/_samplers/__init__.py +7 -0
  39. torchcodec/_samplers/video_clip_sampler.py +430 -0
  40. torchcodec/decoders/__init__.py +11 -0
  41. torchcodec/decoders/_audio_decoder.py +177 -0
  42. torchcodec/decoders/_decoder_utils.py +52 -0
  43. torchcodec/decoders/_video_decoder.py +464 -0
  44. torchcodec/encoders/__init__.py +1 -0
  45. torchcodec/encoders/_audio_encoder.py +150 -0
  46. torchcodec/libtorchcodec_core4.dll +0 -0
  47. torchcodec/libtorchcodec_core5.dll +0 -0
  48. torchcodec/libtorchcodec_core6.dll +0 -0
  49. torchcodec/libtorchcodec_core7.dll +0 -0
  50. torchcodec/libtorchcodec_custom_ops4.dll +0 -0
  51. torchcodec/libtorchcodec_custom_ops5.dll +0 -0
  52. torchcodec/libtorchcodec_custom_ops6.dll +0 -0
  53. torchcodec/libtorchcodec_custom_ops7.dll +0 -0
  54. torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
  55. torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
  56. torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
  57. torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
  58. torchcodec/samplers/__init__.py +2 -0
  59. torchcodec/samplers/_common.py +84 -0
  60. torchcodec/samplers/_index_based.py +287 -0
  61. torchcodec/samplers/_time_based.py +350 -0
  62. torchcodec/version.py +2 -0
  63. torchcodec-0.7.0.dist-info/METADATA +242 -0
  64. torchcodec-0.7.0.dist-info/RECORD +67 -0
  65. torchcodec-0.7.0.dist-info/WHEEL +5 -0
  66. torchcodec-0.7.0.dist-info/licenses/LICENSE +28 -0
  67. torchcodec-0.7.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,350 @@
1
+ from typing import Literal, Optional
2
+
3
+ import torch
4
+
5
+ from torchcodec import FrameBatch
6
+ from torchcodec.samplers._common import (
7
+ _FRAMEBATCH_RETURN_DOCS,
8
+ _POLICY_FUNCTION_TYPE,
9
+ _POLICY_FUNCTIONS,
10
+ _reshape_4d_framebatch_into_5d,
11
+ _validate_common_params,
12
+ )
13
+
14
+
15
+ def _validate_params_time_based(
16
+ *,
17
+ decoder,
18
+ num_clips,
19
+ seconds_between_clip_starts,
20
+ seconds_between_frames,
21
+ ):
22
+
23
+ if (num_clips is None and seconds_between_clip_starts is None) or (
24
+ num_clips is not None and seconds_between_clip_starts is not None
25
+ ):
26
+ raise ValueError("This is internal only and should never happen.")
27
+
28
+ if seconds_between_clip_starts is not None and seconds_between_clip_starts <= 0:
29
+ raise ValueError(
30
+ f"seconds_between_clip_starts ({seconds_between_clip_starts}) must be > 0"
31
+ )
32
+
33
+ if num_clips is not None and num_clips <= 0:
34
+ raise ValueError(f"num_clips ({num_clips}) must be > 0")
35
+
36
+ if decoder.metadata.average_fps is None:
37
+ raise ValueError(
38
+ "Could not infer average fps from video metadata. "
39
+ "Try using an index-based sampler instead."
40
+ )
41
+
42
+ # Note that metadata.begin_stream_seconds is a property that will always yield a valid
43
+ # value; if it is not present in the actual metadata, the metadata object will return 0.
44
+ # Hence, we do not test for it here and only test metadata.end_stream_seconds.
45
+ if decoder.metadata.end_stream_seconds is None:
46
+ raise ValueError(
47
+ "Could not infer stream end from video metadata. "
48
+ "Try using an index-based sampler instead."
49
+ )
50
+
51
+ average_frame_duration_seconds = 1 / decoder.metadata.average_fps
52
+ if seconds_between_frames is None:
53
+ seconds_between_frames = average_frame_duration_seconds
54
+ elif seconds_between_frames <= 0:
55
+ raise ValueError(
56
+ f"seconds_between_clip_starts ({seconds_between_clip_starts}) must be > 0, got"
57
+ )
58
+
59
+ return seconds_between_frames
60
+
61
+
62
+ def _validate_sampling_range_time_based(
63
+ *,
64
+ num_frames_per_clip,
65
+ seconds_between_frames,
66
+ sampling_range_start,
67
+ sampling_range_end,
68
+ begin_stream_seconds,
69
+ end_stream_seconds,
70
+ ):
71
+
72
+ if sampling_range_start is None:
73
+ sampling_range_start = begin_stream_seconds
74
+ else:
75
+ if sampling_range_start < begin_stream_seconds:
76
+ raise ValueError(
77
+ f"sampling_range_start ({sampling_range_start}) must be at least {begin_stream_seconds}"
78
+ )
79
+ if sampling_range_start >= end_stream_seconds:
80
+ raise ValueError(
81
+ f"sampling_range_start ({sampling_range_start}) must be smaller than {end_stream_seconds}"
82
+ )
83
+
84
+ if sampling_range_end is None:
85
+ # We allow a clip to start anywhere within
86
+ # [sampling_range_start, sampling_range_end)
87
+ # When sampling_range_end is None, we want to automatically set it to
88
+ # the largest possible value such that the sampled frames in any clip
89
+ # are within the bounds of the video duration (in other words, we don't
90
+ # want to have to resort to the `policy`).
91
+ # I.e. we want to guarantee that for all frames in any clip we have
92
+ # pts < end_stream_seconds.
93
+ #
94
+ # The frames of a clip will be sampled at the following pts:
95
+ # clip_timestamps = [
96
+ # clip_start + 0 * seconds_between_frames,
97
+ # clip_start + 1 * seconds_between_frames,
98
+ # clip_start + 2 * seconds_between_frames,
99
+ # ...
100
+ # clip_start + (num_frames_per_clip - 1) * seconds_between_frames,
101
+ # ]
102
+ # To guarantee that any such value is < end_stream_seconds, we only need
103
+ # to guarantee that
104
+ # clip_start < end_stream_seconds - (num_frames_per_clip - 1) * seconds_between_frames
105
+ #
106
+ # So that's the value of sampling_range_end we want to use.
107
+ sampling_range_end = (
108
+ end_stream_seconds - (num_frames_per_clip - 1) * seconds_between_frames
109
+ )
110
+ elif sampling_range_end <= begin_stream_seconds:
111
+ raise ValueError(
112
+ f"sampling_range_end ({sampling_range_end}) must be at least {begin_stream_seconds}"
113
+ )
114
+
115
+ if sampling_range_start >= sampling_range_end:
116
+ raise ValueError(
117
+ f"sampling_range_start ({sampling_range_start}) must be smaller than sampling_range_end ({sampling_range_end})"
118
+ )
119
+
120
+ sampling_range_end = min(sampling_range_end, end_stream_seconds)
121
+
122
+ return sampling_range_start, sampling_range_end
123
+
124
+
125
+ def _build_all_clips_timestamps(
126
+ *,
127
+ clip_start_seconds: torch.Tensor, # 1D float tensor
128
+ num_frames_per_clip: int,
129
+ seconds_between_frames: float,
130
+ end_stream_seconds: float,
131
+ policy_fun: _POLICY_FUNCTION_TYPE,
132
+ ) -> list[float]:
133
+
134
+ all_clips_timestamps: list[float] = []
135
+ for start_seconds in clip_start_seconds:
136
+ clip_timestamps = [
137
+ timestamp
138
+ for i in range(num_frames_per_clip)
139
+ if (timestamp := start_seconds + i * seconds_between_frames)
140
+ < end_stream_seconds
141
+ ]
142
+
143
+ if len(clip_timestamps) < num_frames_per_clip:
144
+ clip_timestamps = policy_fun(clip_timestamps, num_frames_per_clip)
145
+ all_clips_timestamps += clip_timestamps
146
+
147
+ return all_clips_timestamps
148
+
149
+
150
+ def _generic_time_based_sampler(
151
+ kind: Literal["random", "regular"],
152
+ decoder,
153
+ *,
154
+ num_clips: Optional[int], # mutually exclusive with seconds_between_clip_starts
155
+ seconds_between_clip_starts: Optional[float],
156
+ num_frames_per_clip: int,
157
+ seconds_between_frames: Optional[float],
158
+ # None means "begining", which may not always be 0
159
+ sampling_range_start: Optional[float],
160
+ sampling_range_end: Optional[float], # interval is [start, end).
161
+ policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
162
+ ) -> FrameBatch:
163
+ # Note: *everywhere*, sampling_range_end denotes the upper bound of where a
164
+ # clip can start. This is an *open* upper bound, i.e. we will make sure no
165
+ # clip starts exactly at (or above) sampling_range_end.
166
+
167
+ _validate_common_params(
168
+ decoder=decoder,
169
+ num_frames_per_clip=num_frames_per_clip,
170
+ policy=policy,
171
+ )
172
+
173
+ seconds_between_frames = _validate_params_time_based(
174
+ decoder=decoder,
175
+ num_clips=num_clips,
176
+ seconds_between_clip_starts=seconds_between_clip_starts,
177
+ seconds_between_frames=seconds_between_frames,
178
+ )
179
+
180
+ sampling_range_start, sampling_range_end = _validate_sampling_range_time_based(
181
+ num_frames_per_clip=num_frames_per_clip,
182
+ seconds_between_frames=seconds_between_frames,
183
+ sampling_range_start=sampling_range_start,
184
+ sampling_range_end=sampling_range_end,
185
+ begin_stream_seconds=decoder.metadata.begin_stream_seconds,
186
+ end_stream_seconds=decoder.metadata.end_stream_seconds,
187
+ )
188
+
189
+ if kind == "random":
190
+ assert num_clips is not None # appease type-checker
191
+ sampling_range_width = sampling_range_end - sampling_range_start
192
+ # torch.rand() returns in [0, 1)
193
+ # which ensures all clip starts are < sampling_range_end
194
+ clip_start_seconds = (
195
+ torch.rand(num_clips) * sampling_range_width + sampling_range_start
196
+ )
197
+ else:
198
+ assert seconds_between_clip_starts is not None # appease type-checker
199
+ clip_start_seconds = torch.arange(
200
+ sampling_range_start,
201
+ sampling_range_end, # excluded
202
+ seconds_between_clip_starts,
203
+ )
204
+ num_clips = len(clip_start_seconds)
205
+
206
+ all_clips_timestamps = _build_all_clips_timestamps(
207
+ clip_start_seconds=clip_start_seconds,
208
+ num_frames_per_clip=num_frames_per_clip,
209
+ seconds_between_frames=seconds_between_frames,
210
+ end_stream_seconds=decoder.metadata.end_stream_seconds,
211
+ policy_fun=_POLICY_FUNCTIONS[policy],
212
+ )
213
+
214
+ frames = decoder.get_frames_played_at(seconds=all_clips_timestamps)
215
+ return _reshape_4d_framebatch_into_5d(
216
+ frames=frames,
217
+ num_clips=num_clips,
218
+ num_frames_per_clip=num_frames_per_clip,
219
+ )
220
+
221
+
222
+ def clips_at_random_timestamps(
223
+ decoder,
224
+ *,
225
+ num_clips: int = 1,
226
+ num_frames_per_clip: int = 1,
227
+ seconds_between_frames: Optional[float] = None,
228
+ # None means "begining", which may not always be 0
229
+ sampling_range_start: Optional[float] = None,
230
+ sampling_range_end: Optional[float] = None, # interval is [start, end).
231
+ policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
232
+ ) -> FrameBatch:
233
+ # See docstring below
234
+ torch._C._log_api_usage_once("torchcodec.samplers.clips_at_random_timestamps")
235
+ return _generic_time_based_sampler(
236
+ kind="random",
237
+ decoder=decoder,
238
+ num_clips=num_clips,
239
+ seconds_between_clip_starts=None,
240
+ num_frames_per_clip=num_frames_per_clip,
241
+ seconds_between_frames=seconds_between_frames,
242
+ sampling_range_start=sampling_range_start,
243
+ sampling_range_end=sampling_range_end,
244
+ policy=policy,
245
+ )
246
+
247
+
248
+ def clips_at_regular_timestamps(
249
+ decoder,
250
+ *,
251
+ seconds_between_clip_starts: float,
252
+ num_frames_per_clip: int = 1,
253
+ seconds_between_frames: Optional[float] = None,
254
+ # None means "begining", which may not always be 0
255
+ sampling_range_start: Optional[float] = None,
256
+ sampling_range_end: Optional[float] = None, # interval is [start, end).
257
+ policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
258
+ ) -> FrameBatch:
259
+ # See docstring below
260
+ torch._C._log_api_usage_once("torchcodec.samplers.clips_at_regular_timestamps")
261
+ return _generic_time_based_sampler(
262
+ kind="regular",
263
+ decoder=decoder,
264
+ num_clips=None,
265
+ seconds_between_clip_starts=seconds_between_clip_starts,
266
+ num_frames_per_clip=num_frames_per_clip,
267
+ seconds_between_frames=seconds_between_frames,
268
+ sampling_range_start=sampling_range_start,
269
+ sampling_range_end=sampling_range_end,
270
+ policy=policy,
271
+ )
272
+
273
+
274
+ _COMMON_DOCS = """
275
+ {maybe_note}
276
+
277
+ Args:
278
+ decoder (VideoDecoder): The :class:`~torchcodec.decoders.VideoDecoder`
279
+ instance to sample clips from.
280
+ {num_clips_or_seconds_between_clip_starts}
281
+ num_frames_per_clip (int, optional): The number of frames per clips. Default: 1.
282
+ seconds_between_frames (float or None, optional): The time (in seconds)
283
+ between each frame within a clip. More accurately, this defines the
284
+ time between the *frame sampling point*, i.e. the timestamps at
285
+ which we sample the frames. Because frames span intervals in time ,
286
+ the resulting start of frames within a clip may not be exactly
287
+ spaced by ``seconds_between_frames`` - but on average, they will be.
288
+ Default is None, which is set to the average frame duration
289
+ (``1/average_fps``).
290
+ sampling_range_start (float or None, optional): The start of the
291
+ sampling range, which defines the first timestamp (in seconds) that
292
+ a clip may *start* at. Default: None, which corresponds to the start
293
+ of the video. (Note: some videos start at negative values, which is
294
+ why the default is not 0).
295
+ sampling_range_end (float or None, optional): The end of the sampling
296
+ range, which defines the last timestamp (in seconds) that a clip may
297
+ *start* at. This value is exclusive, i.e. a clip may only start within
298
+ [``sampling_range_start``, ``sampling_range_end``). If None
299
+ (default), the value is set automatically such that the clips never
300
+ span beyond the end of the video, i.e. it is set to
301
+ ``end_video_seconds - (num_frames_per_clip - 1) *
302
+ seconds_between_frames``. When a clip spans beyond the end of the
303
+ video, the ``policy`` parameter defines how to construct such clip.
304
+ policy (str, optional): Defines how to construct clips that span beyond
305
+ the end of the video. This is best described with an example:
306
+ assuming the last valid (seekable) timestamp in a video is 10.9, and
307
+ a clip was sampled to start at timestamp 10.5, with
308
+ ``num_frames_per_clip=5`` and ``seconds_between_frames=0.2``, the
309
+ sampling timestamps of the frames in the clip are supposed to be
310
+ [10.5, 10.7, 10.9, 11.1, 11.2]. But 11.1 and 11.2 are invalid
311
+ timestamps, so the ``policy`` parameter defines how to replace those
312
+ frames, with valid sampling timestamps:
313
+
314
+ - "repeat_last": repeats the last valid frame of the clip. We would
315
+ get frames sampled at timestamps [10.5, 10.7, 10.9, 10.9, 10.9].
316
+ - "wrap": wraps around to the beginning of the clip. We would get
317
+ frames sampled at timestamps [10.5, 10.7, 10.9, 10.5, 10.7].
318
+ - "error": raises an error.
319
+
320
+ Default is "repeat_last". Note that when ``sampling_range_end=None``
321
+ (default), this policy parameter is unlikely to be relevant.
322
+
323
+ {return_docs}
324
+ """
325
+
326
+
327
+ _NUM_CLIPS_DOCS = """
328
+ num_clips (int, optional): The number of clips to return. Default: 1.
329
+ """
330
+ clips_at_random_timestamps.__doc__ = f"""Sample :term:`clips` at random timestamps.
331
+ {_COMMON_DOCS.format(maybe_note="", num_clips_or_seconds_between_clip_starts=_NUM_CLIPS_DOCS, return_docs=_FRAMEBATCH_RETURN_DOCS)}
332
+ """
333
+
334
+
335
+ _SECONDS_BETWEEN_CLIP_STARTS = """
336
+ seconds_between_clip_starts (float): The space (in seconds) between each
337
+ clip start.
338
+ """
339
+
340
+ _NOTE_DOCS = """
341
+ .. note::
342
+ For consistency with existing sampling APIs (such as torchvision), this
343
+ sampler takes a ``seconds_between_clip_starts`` parameter instead of
344
+ ``num_clips``. If you find that supporting ``num_clips`` would be
345
+ useful, please let us know by `opening a feature request
346
+ <https://github.com/pytorch/torchcodec/issues?q=is:open+is:issue>`_.
347
+ """
348
+ clips_at_regular_timestamps.__doc__ = f"""Sample :term:`clips` at regular (equally-spaced) timestamps.
349
+ {_COMMON_DOCS.format(maybe_note=_NOTE_DOCS, num_clips_or_seconds_between_clip_starts=_SECONDS_BETWEEN_CLIP_STARTS, return_docs=_FRAMEBATCH_RETURN_DOCS)}
350
+ """
torchcodec/version.py ADDED
@@ -0,0 +1,2 @@
1
+ # Note that this file is generated during install.
2
+ __version__ = '0.7.0'
@@ -0,0 +1,242 @@
1
+ Metadata-Version: 2.4
2
+ Name: torchcodec
3
+ Version: 0.7.0
4
+ Summary: A video decoder for PyTorch
5
+ Author-email: PyTorch Team <packages@pytorch.org>
6
+ Project-URL: GitHub, https://github.com/pytorch/torchcodec
7
+ Project-URL: Documentation, https://pytorch.org/torchcodec/stable/index.html
8
+ Requires-Python: >=3.8
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Provides-Extra: dev
12
+ Requires-Dist: numpy; extra == "dev"
13
+ Requires-Dist: pytest; extra == "dev"
14
+ Requires-Dist: pillow; extra == "dev"
15
+ Dynamic: license-file
16
+
17
+ [**Installation**](#installing-torchcodec) | [**Simple Example**](#using-torchcodec) | [**Detailed Example**](https://pytorch.org/torchcodec/stable/generated_examples/) | [**Documentation**](https://pytorch.org/torchcodec) | [**Contributing**](CONTRIBUTING.md) | [**License**](#license)
18
+
19
+ # TorchCodec
20
+
21
+ TorchCodec is a Python library for decoding video and audio data into PyTorch
22
+ tensors, on CPU and CUDA GPU. It also supports audio encoding, and video
23
+ encoding will come soon! It aims to be fast, easy to use, and well integrated
24
+ into the PyTorch ecosystem. If you want to use PyTorch to train ML models on
25
+ videos and audio, TorchCodec is how you turn these into data.
26
+
27
+ We achieve these capabilities through:
28
+
29
+ * Pythonic APIs that mirror Python and PyTorch conventions.
30
+ * Relying on [FFmpeg](https://www.ffmpeg.org/) to do the decoding and encoding.
31
+ TorchCodec uses the version of FFmpeg you already have installed. FFmpeg is a
32
+ mature library with broad coverage available on most systems. It is, however,
33
+ not easy to use. TorchCodec abstracts FFmpeg's complexity to ensure it is used
34
+ correctly and efficiently.
35
+ * Returning data as PyTorch tensors, ready to be fed into PyTorch transforms
36
+ or used directly to train models.
37
+
38
+ ## Using TorchCodec
39
+
40
+ Here's a condensed summary of what you can do with TorchCodec. For more detailed
41
+ examples, [check out our
42
+ documentation](https://pytorch.org/torchcodec/stable/generated_examples/)!
43
+
44
+ #### Decoding
45
+
46
+ ```python
47
+ from torchcodec.decoders import VideoDecoder
48
+
49
+ device = "cpu" # or e.g. "cuda" !
50
+ decoder = VideoDecoder("path/to/video.mp4", device=device)
51
+
52
+ decoder.metadata
53
+ # VideoStreamMetadata:
54
+ # num_frames: 250
55
+ # duration_seconds: 10.0
56
+ # bit_rate: 31315.0
57
+ # codec: h264
58
+ # average_fps: 25.0
59
+ # ... (truncated output)
60
+
61
+ # Simple Indexing API
62
+ decoder[0] # uint8 tensor of shape [C, H, W]
63
+ decoder[0 : -1 : 20] # uint8 stacked tensor of shape [N, C, H, W]
64
+
65
+ # Indexing, with PTS and duration info:
66
+ decoder.get_frames_at(indices=[2, 100])
67
+ # FrameBatch:
68
+ # data (shape): torch.Size([2, 3, 270, 480])
69
+ # pts_seconds: tensor([0.0667, 3.3367], dtype=torch.float64)
70
+ # duration_seconds: tensor([0.0334, 0.0334], dtype=torch.float64)
71
+
72
+ # Time-based indexing with PTS and duration info
73
+ decoder.get_frames_played_at(seconds=[0.5, 10.4])
74
+ # FrameBatch:
75
+ # data (shape): torch.Size([2, 3, 270, 480])
76
+ # pts_seconds: tensor([ 0.4671, 10.3770], dtype=torch.float64)
77
+ # duration_seconds: tensor([0.0334, 0.0334], dtype=torch.float64)
78
+ ```
79
+
80
+ #### Clip sampling
81
+
82
+ ```python
83
+
84
+ from torchcodec.samplers import clips_at_regular_timestamps
85
+
86
+ clips_at_regular_timestamps(
87
+ decoder,
88
+ seconds_between_clip_starts=1.5,
89
+ num_frames_per_clip=4,
90
+ seconds_between_frames=0.1
91
+ )
92
+ # FrameBatch:
93
+ # data (shape): torch.Size([9, 4, 3, 270, 480])
94
+ # pts_seconds: tensor([[ 0.0000, 0.0667, 0.1668, 0.2669],
95
+ # [ 1.4681, 1.5682, 1.6683, 1.7684],
96
+ # [ 2.9696, 3.0697, 3.1698, 3.2699],
97
+ # ... (truncated), dtype=torch.float64)
98
+ # duration_seconds: tensor([[0.0334, 0.0334, 0.0334, 0.0334],
99
+ # [0.0334, 0.0334, 0.0334, 0.0334],
100
+ # [0.0334, 0.0334, 0.0334, 0.0334],
101
+ # ... (truncated), dtype=torch.float64)
102
+ ```
103
+
104
+ You can use the following snippet to generate a video with FFmpeg and tryout
105
+ TorchCodec:
106
+
107
+ ```bash
108
+ fontfile=/usr/share/fonts/dejavu-sans-mono-fonts/DejaVuSansMono-Bold.ttf
109
+ output_video_file=/tmp/output_video.mp4
110
+
111
+ ffmpeg -f lavfi -i \
112
+ color=size=640x400:duration=10:rate=25:color=blue \
113
+ -vf "drawtext=fontfile=${fontfile}:fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" \
114
+ ${output_video_file}
115
+ ```
116
+
117
+ ## Installing TorchCodec
118
+ ### Installing CPU-only TorchCodec
119
+
120
+ 1. Install the latest stable version of PyTorch following the
121
+ [official instructions](https://pytorch.org/get-started/locally/). For other
122
+ versions, refer to the table below for compatibility between versions of
123
+ `torch` and `torchcodec`.
124
+
125
+ 2. Install FFmpeg, if it's not already installed. Linux distributions usually
126
+ come with FFmpeg pre-installed. TorchCodec supports all major FFmpeg versions
127
+ in [4, 7].
128
+
129
+ If FFmpeg is not already installed, or you need a more recent version, an
130
+ easy way to install it is to use `conda`:
131
+
132
+ ```bash
133
+ conda install "ffmpeg<8"
134
+ # or
135
+ conda install "ffmpeg<8" -c conda-forge
136
+ ```
137
+
138
+ 3. Install TorchCodec:
139
+
140
+ ```bash
141
+ pip install torchcodec
142
+ ```
143
+
144
+ The following table indicates the compatibility between versions of
145
+ `torchcodec`, `torch` and Python.
146
+
147
+ | `torchcodec` | `torch` | Python |
148
+ | ------------------ | ------------------ | ------------------- |
149
+ | `main` / `nightly` | `main` / `nightly` | `>=3.10`, `<=3.13` |
150
+ | `0.6` | `2.8` | `>=3.9`, `<=3.13` |
151
+ | `0.5` | `2.7` | `>=3.9`, `<=3.13` |
152
+ | `0.4` | `2.7` | `>=3.9`, `<=3.13` |
153
+ | `0.3` | `2.7` | `>=3.9`, `<=3.13` |
154
+ | `0.2` | `2.6` | `>=3.9`, `<=3.13` |
155
+ | `0.1` | `2.5` | `>=3.9`, `<=3.12` |
156
+ | `0.0.3` | `2.4` | `>=3.8`, `<=3.12` |
157
+
158
+ ### Installing CUDA-enabled TorchCodec
159
+
160
+ First, make sure you have a GPU that has NVDEC hardware that can decode the
161
+ format you want. Refer to Nvidia's GPU support matrix for more details
162
+ [here](https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new).
163
+
164
+ 1. Install Pytorch corresponding to your CUDA Toolkit using the
165
+ [official instructions](https://pytorch.org/get-started/locally/). You'll
166
+ need the `libnpp` and `libnvrtc` CUDA libraries, which are usually part of
167
+ the CUDA Toolkit.
168
+
169
+ 2. Install or compile FFmpeg with NVDEC support.
170
+ TorchCodec with CUDA should work with FFmpeg versions in [4, 7].
171
+
172
+ If FFmpeg is not already installed, or you need a more recent version, an
173
+ easy way to install it is to use `conda`:
174
+
175
+ ```bash
176
+ conda install "ffmpeg<8"
177
+ # or
178
+ conda install "ffmpeg<8" -c conda-forge
179
+ ```
180
+
181
+ If you are building FFmpeg from source you can follow Nvidia's guide to
182
+ configuring and installing FFmpeg with NVDEC support
183
+ [here](https://docs.nvidia.com/video-technologies/video-codec-sdk/12.0/ffmpeg-with-nvidia-gpu/index.html).
184
+
185
+ After installing FFmpeg make sure it has NVDEC support when you list the supported
186
+ decoders:
187
+
188
+ ```bash
189
+ ffmpeg -decoders | grep -i nvidia
190
+ # This should show a line like this:
191
+ # V..... h264_cuvid Nvidia CUVID H264 decoder (codec h264)
192
+ ```
193
+
194
+ To check that FFmpeg libraries work with NVDEC correctly you can decode a sample video:
195
+
196
+ ```bash
197
+ ffmpeg -hwaccel cuda -hwaccel_output_format cuda -i test/resources/nasa_13013.mp4 -f null -
198
+ ```
199
+
200
+ 3. Install TorchCodec by passing in an `--index-url` parameter that corresponds
201
+ to your CUDA Toolkit version, example:
202
+
203
+ ```bash
204
+ # This corresponds to CUDA Toolkit version 12.6. It should be the same one
205
+ # you used when you installed PyTorch (If you installed PyTorch with pip).
206
+ pip install torchcodec --index-url=https://download.pytorch.org/whl/cu126
207
+ ```
208
+
209
+ Note that without passing in the `--index-url` parameter, `pip` installs
210
+ the CPU-only version of TorchCodec.
211
+
212
+ ## Benchmark Results
213
+
214
+ The following was generated by running [our benchmark script](./benchmarks/decoders/generate_readme_data.py) on a lightly loaded 22-core machine with an Nvidia A100 with
215
+ 5 [NVDEC decoders](https://docs.nvidia.com/video-technologies/video-codec-sdk/12.1/nvdec-application-note/index.html#).
216
+
217
+ ![benchmark_results](./benchmarks/decoders/benchmark_readme_chart.png)
218
+
219
+ The top row is a [Mandelbrot](https://ffmpeg.org/ffmpeg-filters.html#mandelbrot) video
220
+ generated from FFmpeg that has a resolution of 1280x720 at 60 fps and is 120 seconds long.
221
+ The bottom row is [promotional video from NASA](https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4)
222
+ that has a resolution of 960x540 at 29.7 fps and is 206 seconds long. Both videos were
223
+ encoded with libx264 and yuv420p pixel format. All decoders, except for TorchVision, used FFmpeg 6.1.2. TorchVision used FFmpeg 4.2.2.
224
+
225
+ For TorchCodec, the "approx" label means that it was using [approximate mode](https://pytorch.org/torchcodec/stable/generated_examples/approximate_mode.html)
226
+ for seeking.
227
+
228
+ ## Contributing
229
+
230
+ We welcome contributions to TorchCodec! Please see our [contributing
231
+ guide](CONTRIBUTING.md) for more details.
232
+
233
+ ## License
234
+
235
+ TorchCodec is released under the [BSD 3 license](./LICENSE).
236
+
237
+ However, TorchCodec may be used with code not written by Meta which may be
238
+ distributed under different licenses.
239
+
240
+ For example, if you build TorchCodec with ENABLE_CUDA=1 or use the CUDA-enabled
241
+ release of torchcodec, please review CUDA's license here:
242
+ [Nvidia licenses](https://docs.nvidia.com/cuda/eula/index.html).
@@ -0,0 +1,67 @@
1
+ torchcodec/__init__.py,sha256=F9OpGp7psa4OKvSqaIsCL1FPGtBgqNyAd5Jn0_qUn9s,595
2
+ torchcodec/_frame.py,sha256=_LIIorsNn0nbcInGC9Mwh3Dwc3AuPDhxK4YIBFsfOww,5350
3
+ torchcodec/_internally_replaced_utils.py,sha256=5uYbY1l23FcfWSmLuwZb7j8mWXtMTTp--6sl9LQH28E,2422
4
+ torchcodec/libtorchcodec_core4.dll,sha256=7MSt1zHzCicjD3V0FwOOCl2FKfnMsx3BEvbYXDeMnj0,310784
5
+ torchcodec/libtorchcodec_core5.dll,sha256=_Yd4Nwu-uzLD9Bv-PxJCjsC1c1HG3shVCIPKyBL5vqc,310784
6
+ torchcodec/libtorchcodec_core6.dll,sha256=iwno1JkW5Nd-kQG9nuDNvkSj8Mb9E3mhFz_AKNwe9Sc,310784
7
+ torchcodec/libtorchcodec_core7.dll,sha256=NUUyMUsIHySzv5eLtzN4wYb5LAs7vK1q7QiJKcGy_Bs,310784
8
+ torchcodec/libtorchcodec_custom_ops4.dll,sha256=ZbnPajBWs_gCMbTVUvhfZKIcuCKcem4pgIvabGbjYsk,564224
9
+ torchcodec/libtorchcodec_custom_ops5.dll,sha256=m3HPCpWHOtCCt5XUGgwXkXVyvXmIaZyPgpGJ_0T98_k,564224
10
+ torchcodec/libtorchcodec_custom_ops6.dll,sha256=RlBxQsu0Wr5Ti7r180R6HOcjcnqg3XsR6hv4giUumUI,564224
11
+ torchcodec/libtorchcodec_custom_ops7.dll,sha256=SwtccchW6AECZEKZ87l-yY7MWmcrXlSxxHu46YZccXQ,564224
12
+ torchcodec/libtorchcodec_pybind_ops4.pyd,sha256=VQ9zcpbMyk9t1IoKwA_f3fyDAJd_v-Lg0DoNbEmR77s,203264
13
+ torchcodec/libtorchcodec_pybind_ops5.pyd,sha256=ZN4qH_rDgQ1ymyx1DYsB2L3TRj05mz5ehYTZUFoBaRc,203264
14
+ torchcodec/libtorchcodec_pybind_ops6.pyd,sha256=DekXV_7gAvElb3lZeo1ylKjBHfjcgyEEXIlMm6q8Vyo,203264
15
+ torchcodec/libtorchcodec_pybind_ops7.pyd,sha256=OT3Dcw8in1WXZe_AaIrJ-4DZ9A3SXuZTHMfBOI4ArV0,203264
16
+ torchcodec/version.py,sha256=vUzeNMn0u4ZIrdugClyruOvja4DvMzg12beg1CQBNpI,75
17
+ torchcodec/_core/AVIOContextHolder.cpp,sha256=ucOD1bGORgSzZfL5TxJtUVH2GAtXGVDylzWpvbJ2qJY,1628
18
+ torchcodec/_core/AVIOContextHolder.h,sha256=iYRap1vDdvK_4wylu3Ds9Dkn1k_vAdkbVJDlVni9uNY,2697
19
+ torchcodec/_core/AVIOFileLikeContext.cpp,sha256=bCMpEXhQPLWULWJRIPpNqzlio2eLE_EI7UoPuUgw70g,3450
20
+ torchcodec/_core/AVIOFileLikeContext.h,sha256=oJM9q9X3i1leplaBsU1vMW0gMt1a-xANHhgk5FID3zA,2130
21
+ torchcodec/_core/AVIOTensorContext.cpp,sha256=bNE33CeYuTsXbaZRo0Izs1pLJOQlYPtlX_ofgeBH9hM,4018
22
+ torchcodec/_core/AVIOTensorContext.h,sha256=kYO93vn1hVINRHRoP2BWzEKUoPl1WeI62cgz5y0T6OA,1106
23
+ torchcodec/_core/CMakeLists.txt,sha256=BOXfMdQxmo0WXIgFCXFYCnmYvrN7kYg2unGpAQvT7z0,10522
24
+ torchcodec/_core/Cache.h,sha256=TG3kE1i86edBR9LXsEl4nToMOOi8wPJXFJYkTbBiJdk,5210
25
+ torchcodec/_core/CpuDeviceInterface.cpp,sha256=I6aOFFaO_4hQc7EhjLAGQRWVyESUwc-J1armWlP11AU,10404
26
+ torchcodec/_core/CpuDeviceInterface.h,sha256=9GVKS5D7Zz7ijJKzzHFwBJAi4h-VZFeHBkVa1miPlGM,2149
27
+ torchcodec/_core/CudaDeviceInterface.cpp,sha256=mQvEeMI4EEzh0jDhMrEcKhUE2WLCpWGSVqZVH9awl4E,21782
28
+ torchcodec/_core/CudaDeviceInterface.h,sha256=jtUC8la5pbBPnCoy6koOOVR9BDdWWQiPfb00bFbxQW0,1064
29
+ torchcodec/_core/DeviceInterface.cpp,sha256=9tk2i_gwyxMWloQRB79qFaqbmntifU1en1Q-w9ltX3E,2310
30
+ torchcodec/_core/DeviceInterface.h,sha256=6IEa_lO3f5VW8i4gAZWn4Jm3_TggqqzNQA3ha3sxrDU,2078
31
+ torchcodec/_core/Encoder.cpp,sha256=VeWPOzzaD5Zlso1G7hc8AlqlS3khRsRoATgWPBElSQA,18555
32
+ torchcodec/_core/Encoder.h,sha256=Sd0Oz48THSzJfS3xetglB4fKjuqdOXWqvzI3h1U0HKI,6494
33
+ torchcodec/_core/FFMPEGCommon.cpp,sha256=W0ZLP-JjWs25fmAY1lTW64nfcOZvMqZ9LkUvGOXBv84,13526
34
+ torchcodec/_core/FFMPEGCommon.h,sha256=6jyWfUmz9jA9ysqehLdKLp5p5sXAtWOuNlafI9EFXqI,7525
35
+ torchcodec/_core/FilterGraph.cpp,sha256=khohAN1w4oWFlYsQTAqgBDtiwf6ygrI36jtdKACjue8,4857
36
+ torchcodec/_core/FilterGraph.h,sha256=BIqHSpjyM99MtnULHh79HRuFgc8cHXouLWcioyknaI0,1223
37
+ torchcodec/_core/Frame.cpp,sha256=xuPRBVL6yTAQqls-3xZ-YzjQXR0Z67Vnq3EQ-9okGF4,1130
38
+ torchcodec/_core/Frame.h,sha256=TKlYFdObFBO0YK5sWs1KziovDwgnCaQixm71qB0MpEg,5022
39
+ torchcodec/_core/Metadata.h,sha256=SFNyD4ey2rHmxbrjebHa6PUA-BoiytYrgi5nafNWlyI,2588
40
+ torchcodec/_core/SingleStreamDecoder.cpp,sha256=BqSXEy0_LECdtegKFa-W8Qz_CnavSUOt9en-pl8C1eQ,66958
41
+ torchcodec/_core/SingleStreamDecoder.h,sha256=Jn0i8mKppGd0y99e6Am2k5OeN2iqaWJD6-BvvEb-ub0,15594
42
+ torchcodec/_core/StreamOptions.h,sha256=Xo0TTIQZy5bk5UBZ3NLQeCxoOiDLJGm-BM2NiU_CMeY,1638
43
+ torchcodec/_core/ValidationUtils.cpp,sha256=PkBTLv_38eYG9ZUVbGijHSug3YewtD2FNiftfZ3q8S8,994
44
+ torchcodec/_core/ValidationUtils.h,sha256=XWSUL441nPNYRGCnnpul0iuWtUNgZXC_fcsxeb-oB6E,579
45
+ torchcodec/_core/__init__.py,sha256=pMKoJaSVKfOcUBM6UezF_GN0q2gUTaxQkDtFElDM_Qk,1070
46
+ torchcodec/_core/_metadata.py,sha256=LWlCQmJwJ66TjK_4bwtMARzSDVlid62dpsOVpDHk8NQ,13117
47
+ torchcodec/_core/custom_ops.cpp,sha256=4TpEaaVChJdbsyQN-hzCvsz8wr3o09VgYkwv2ZKO2ww,30231
48
+ torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake,sha256=rrXg44VfrlOJBnHBxx5odozGE9W70EFC-JM-X4iTYQ4,6900
49
+ torchcodec/_core/ops.py,sha256=9b56ImvKCwOjMKQRh6wrf7MgvsFhbnKDCkAoKFCqHTE,16623
50
+ torchcodec/_core/pybind_ops.cpp,sha256=1D8siVAiQUqWyl-I785BBeZMQs3YGSbf6R07ZrMrpPg,3092
51
+ torchcodec/_samplers/__init__.py,sha256=qWg69NiVSnMcYi68RTVFGvvp9bh1jkNha5u7q3VMRLw,258
52
+ torchcodec/_samplers/video_clip_sampler.py,sha256=ChIbqJ461_8aZ37_hzkXDrweibjnQHpvYunv5GWdfFo,18975
53
+ torchcodec/decoders/__init__.py,sha256=GeONirXzwswy5iJK61Yuj6oyfZ7GsEJp8EP1tACSWuE,414
54
+ torchcodec/decoders/_audio_decoder.py,sha256=dWbtH7OLIgcbiFQIvP9qpATvIC7XeS67P9uGM4dmsRE,7580
55
+ torchcodec/decoders/_decoder_utils.py,sha256=yM7izTYP72iY6mr3P30sAvd2wKZlWo-eopy3KVaZy8A,2089
56
+ torchcodec/decoders/_video_decoder.py,sha256=jH4KzwkYNIBk0G0k-GlgdYBz_laVojf8VgC8TOlGZxU,19012
57
+ torchcodec/encoders/__init__.py,sha256=Jw7dTzlTbzTaD_ZMrgSxUWEMs7Pn0cyxKm-_beuBIGo,50
58
+ torchcodec/encoders/_audio_encoder.py,sha256=VMZs6VTQDIMcsBxWtQEUpwq_ruILrrvNVktkswajaiM,6312
59
+ torchcodec/samplers/__init__.py,sha256=pijGrxa71UjRYHh1sRqLzwpsVXYAmHe24kMbC-rZW_M,159
60
+ torchcodec/samplers/_common.py,sha256=okgK1nVKA6mE9Li1Jex1GbzN7m26S3LtaREeF1AX-dA,2823
61
+ torchcodec/samplers/_index_based.py,sha256=8iwlYZrqTdP7CnnBbwd3PsJhQNkF-YHH802AHAEoG-g,11695
62
+ torchcodec/samplers/_time_based.py,sha256=xMltTGyaSxyba44Q5CJvFD2nyYIZKul-1sV6D2p-p1w,14651
63
+ torchcodec-0.7.0.dist-info/licenses/LICENSE,sha256=ry9-8rOEo-Fe1s7VhuGoMWcBx72CjMauQWTw26jlrI8,1502
64
+ torchcodec-0.7.0.dist-info/METADATA,sha256=Mlio9SootCV0TIuRvRKcfKkXWUEc6px3pUlhO1T0nrQ,9677
65
+ torchcodec-0.7.0.dist-info/WHEEL,sha256=pkI-s5KKCTCXRcuamRCpmUHK9lBRiVf1mC9_VUZSXgc,101
66
+ torchcodec-0.7.0.dist-info/top_level.txt,sha256=S1IZq2_jNQE_RDGwxNunVF8S1RCMXmWdAAQjLXBdu2g,21
67
+ torchcodec-0.7.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (78.1.1)
3
+ Root-Is-Purelib: false
4
+ Tag: cp311-cp311-win_amd64
5
+