torchcodec 0.8.0__cp313-cp313-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (82) hide show
  1. torchcodec/.dylibs/libc++.1.0.dylib +0 -0
  2. torchcodec/.dylibs/libpython3.13.dylib +0 -0
  3. torchcodec/__init__.py +16 -0
  4. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  5. torchcodec/_core/AVIOContextHolder.h +64 -0
  6. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  7. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  8. torchcodec/_core/AVIOTensorContext.cpp +123 -0
  9. torchcodec/_core/AVIOTensorContext.h +43 -0
  10. torchcodec/_core/BetaCudaDeviceInterface.cpp +636 -0
  11. torchcodec/_core/BetaCudaDeviceInterface.h +191 -0
  12. torchcodec/_core/CMakeLists.txt +325 -0
  13. torchcodec/_core/CUDACommon.cpp +315 -0
  14. torchcodec/_core/CUDACommon.h +46 -0
  15. torchcodec/_core/Cache.h +138 -0
  16. torchcodec/_core/CpuDeviceInterface.cpp +347 -0
  17. torchcodec/_core/CpuDeviceInterface.h +132 -0
  18. torchcodec/_core/CudaDeviceInterface.cpp +357 -0
  19. torchcodec/_core/CudaDeviceInterface.h +64 -0
  20. torchcodec/_core/DeviceInterface.cpp +117 -0
  21. torchcodec/_core/DeviceInterface.h +148 -0
  22. torchcodec/_core/Encoder.cpp +807 -0
  23. torchcodec/_core/Encoder.h +173 -0
  24. torchcodec/_core/FFMPEGCommon.cpp +608 -0
  25. torchcodec/_core/FFMPEGCommon.h +245 -0
  26. torchcodec/_core/FilterGraph.cpp +149 -0
  27. torchcodec/_core/FilterGraph.h +59 -0
  28. torchcodec/_core/Frame.cpp +42 -0
  29. torchcodec/_core/Frame.h +72 -0
  30. torchcodec/_core/Metadata.h +72 -0
  31. torchcodec/_core/NVDECCache.cpp +70 -0
  32. torchcodec/_core/NVDECCache.h +104 -0
  33. torchcodec/_core/SingleStreamDecoder.cpp +1719 -0
  34. torchcodec/_core/SingleStreamDecoder.h +405 -0
  35. torchcodec/_core/StreamOptions.h +63 -0
  36. torchcodec/_core/Transform.cpp +60 -0
  37. torchcodec/_core/Transform.h +59 -0
  38. torchcodec/_core/ValidationUtils.cpp +35 -0
  39. torchcodec/_core/ValidationUtils.h +21 -0
  40. torchcodec/_core/__init__.py +41 -0
  41. torchcodec/_core/_metadata.py +317 -0
  42. torchcodec/_core/custom_ops.cpp +875 -0
  43. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +360 -0
  44. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  45. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  46. torchcodec/_core/ops.py +498 -0
  47. torchcodec/_core/pybind_ops.cpp +50 -0
  48. torchcodec/_frame.py +145 -0
  49. torchcodec/_internally_replaced_utils.py +67 -0
  50. torchcodec/_samplers/__init__.py +7 -0
  51. torchcodec/_samplers/video_clip_sampler.py +418 -0
  52. torchcodec/decoders/__init__.py +12 -0
  53. torchcodec/decoders/_audio_decoder.py +177 -0
  54. torchcodec/decoders/_decoder_utils.py +112 -0
  55. torchcodec/decoders/_video_decoder.py +500 -0
  56. torchcodec/encoders/__init__.py +1 -0
  57. torchcodec/encoders/_audio_encoder.py +150 -0
  58. torchcodec/libtorchcodec_core4.dylib +0 -0
  59. torchcodec/libtorchcodec_core5.dylib +0 -0
  60. torchcodec/libtorchcodec_core6.dylib +0 -0
  61. torchcodec/libtorchcodec_core7.dylib +0 -0
  62. torchcodec/libtorchcodec_core8.dylib +0 -0
  63. torchcodec/libtorchcodec_custom_ops4.dylib +0 -0
  64. torchcodec/libtorchcodec_custom_ops5.dylib +0 -0
  65. torchcodec/libtorchcodec_custom_ops6.dylib +0 -0
  66. torchcodec/libtorchcodec_custom_ops7.dylib +0 -0
  67. torchcodec/libtorchcodec_custom_ops8.dylib +0 -0
  68. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  69. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  70. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  71. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  72. torchcodec/libtorchcodec_pybind_ops8.so +0 -0
  73. torchcodec/samplers/__init__.py +2 -0
  74. torchcodec/samplers/_common.py +84 -0
  75. torchcodec/samplers/_index_based.py +287 -0
  76. torchcodec/samplers/_time_based.py +358 -0
  77. torchcodec/version.py +2 -0
  78. torchcodec-0.8.0.dist-info/METADATA +253 -0
  79. torchcodec-0.8.0.dist-info/RECORD +82 -0
  80. torchcodec-0.8.0.dist-info/WHEEL +5 -0
  81. torchcodec-0.8.0.dist-info/licenses/LICENSE +28 -0
  82. torchcodec-0.8.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,498 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import io
8
+ import json
9
+ import warnings
10
+ from types import ModuleType
11
+ from typing import List, Optional, Tuple, Union
12
+
13
+ import torch
14
+ from torch.library import get_ctx, register_fake
15
+
16
+ from torchcodec._internally_replaced_utils import ( # @manual=//pytorch/torchcodec/src:internally_replaced_utils
17
+ _get_extension_path,
18
+ _get_pybind_ops_module_name,
19
+ _load_pybind11_module,
20
+ )
21
+
22
+ _pybind_ops: Optional[ModuleType] = None
23
+
24
+
25
+ def load_torchcodec_shared_libraries():
26
+ # Successively try to load the shared libraries for each version of FFmpeg
27
+ # that we support. We always start with the highest version, working our way
28
+ # down to the lowest version. Once we can load ALL shared libraries for a
29
+ # version of FFmpeg, we have succeeded and we stop.
30
+ #
31
+ # Note that we use two different methods for loading shared libraries:
32
+ #
33
+ # 1. torch.ops.load_library(): For PyTorch custom ops and the C++ only
34
+ # libraries the custom ops depend on. Loading libraries through PyTorch
35
+ # registers the custom ops with PyTorch's runtime and the ops can be
36
+ # accessed through torch.ops after loading.
37
+ #
38
+ # 2. importlib: For pybind11 modules. We load them dynamically, rather
39
+ # than using a plain import statement. A plain import statement only
40
+ # works when the module name and file name match exactly. Our shared
41
+ # libraries do not meet those conditions.
42
+
43
+ exceptions = []
44
+ for ffmpeg_major_version in (8, 7, 6, 5, 4):
45
+ pybind_ops_module_name = _get_pybind_ops_module_name(ffmpeg_major_version)
46
+ decoder_library_name = f"libtorchcodec_core{ffmpeg_major_version}"
47
+ custom_ops_library_name = f"libtorchcodec_custom_ops{ffmpeg_major_version}"
48
+ pybind_ops_library_name = f"libtorchcodec_pybind_ops{ffmpeg_major_version}"
49
+ try:
50
+ torch.ops.load_library(_get_extension_path(decoder_library_name))
51
+ torch.ops.load_library(_get_extension_path(custom_ops_library_name))
52
+
53
+ pybind_ops_library_path = _get_extension_path(pybind_ops_library_name)
54
+ global _pybind_ops
55
+ _pybind_ops = _load_pybind11_module(
56
+ pybind_ops_module_name, pybind_ops_library_path
57
+ )
58
+ return
59
+ except Exception as e:
60
+ # TODO: recording and reporting exceptions this way is OK for now as it's just for debugging,
61
+ # but we should probably handle that via a proper logging mechanism.
62
+ exceptions.append((ffmpeg_major_version, e))
63
+
64
+ traceback = (
65
+ "\n[start of libtorchcodec loading traceback]\n"
66
+ + "\n".join(f"FFmpeg version {v}: {str(e)}" for v, e in exceptions)
67
+ + "\n[end of libtorchcodec loading traceback]."
68
+ )
69
+ raise RuntimeError(
70
+ f"""Could not load libtorchcodec. Likely causes:
71
+ 1. FFmpeg is not properly installed in your environment. We support
72
+ versions 4, 5, 6 and 7.
73
+ 2. The PyTorch version ({torch.__version__}) is not compatible with
74
+ this version of TorchCodec. Refer to the version compatibility
75
+ table:
76
+ https://github.com/pytorch/torchcodec?tab=readme-ov-file#installing-torchcodec.
77
+ 3. Another runtime dependency; see exceptions below.
78
+ The following exceptions were raised as we tried to load libtorchcodec:
79
+ """
80
+ f"{traceback}"
81
+ )
82
+
83
+
84
+ load_torchcodec_shared_libraries()
85
+
86
+
87
+ # Note: We use disallow_in_graph because PyTorch does constant propagation of
88
+ # factory functions.
89
+ create_from_file = torch._dynamo.disallow_in_graph(
90
+ torch.ops.torchcodec_ns.create_from_file.default
91
+ )
92
+ encode_audio_to_file = torch._dynamo.disallow_in_graph(
93
+ torch.ops.torchcodec_ns.encode_audio_to_file.default
94
+ )
95
+ encode_video_to_file = torch._dynamo.disallow_in_graph(
96
+ torch.ops.torchcodec_ns.encode_video_to_file.default
97
+ )
98
+ encode_audio_to_tensor = torch._dynamo.disallow_in_graph(
99
+ torch.ops.torchcodec_ns.encode_audio_to_tensor.default
100
+ )
101
+ _encode_audio_to_file_like = torch._dynamo.disallow_in_graph(
102
+ torch.ops.torchcodec_ns._encode_audio_to_file_like.default
103
+ )
104
+ create_from_tensor = torch._dynamo.disallow_in_graph(
105
+ torch.ops.torchcodec_ns.create_from_tensor.default
106
+ )
107
+ _create_from_file_like = torch._dynamo.disallow_in_graph(
108
+ torch.ops.torchcodec_ns._create_from_file_like.default
109
+ )
110
+ add_video_stream = torch.ops.torchcodec_ns.add_video_stream.default
111
+ _add_video_stream = torch.ops.torchcodec_ns._add_video_stream.default
112
+ add_audio_stream = torch.ops.torchcodec_ns.add_audio_stream.default
113
+ seek_to_pts = torch.ops.torchcodec_ns.seek_to_pts.default
114
+ get_next_frame = torch.ops.torchcodec_ns.get_next_frame.default
115
+ get_frame_at_pts = torch.ops.torchcodec_ns.get_frame_at_pts.default
116
+ get_frame_at_index = torch.ops.torchcodec_ns.get_frame_at_index.default
117
+ _get_frames_at_indices_tensor_input = (
118
+ torch.ops.torchcodec_ns.get_frames_at_indices.default
119
+ )
120
+ _get_frames_by_pts_tensor_input = torch.ops.torchcodec_ns.get_frames_by_pts.default
121
+ get_frames_in_range = torch.ops.torchcodec_ns.get_frames_in_range.default
122
+ get_frames_by_pts_in_range = torch.ops.torchcodec_ns.get_frames_by_pts_in_range.default
123
+ get_frames_by_pts_in_range_audio = (
124
+ torch.ops.torchcodec_ns.get_frames_by_pts_in_range_audio.default
125
+ )
126
+ get_json_metadata = torch.ops.torchcodec_ns.get_json_metadata.default
127
+ _test_frame_pts_equality = torch.ops.torchcodec_ns._test_frame_pts_equality.default
128
+ _get_container_json_metadata = (
129
+ torch.ops.torchcodec_ns.get_container_json_metadata.default
130
+ )
131
+ _get_key_frame_indices = torch.ops.torchcodec_ns._get_key_frame_indices.default
132
+ scan_all_streams_to_update_metadata = (
133
+ torch.ops.torchcodec_ns.scan_all_streams_to_update_metadata.default
134
+ )
135
+ _get_stream_json_metadata = torch.ops.torchcodec_ns.get_stream_json_metadata.default
136
+ _get_json_ffmpeg_library_versions = (
137
+ torch.ops.torchcodec_ns._get_json_ffmpeg_library_versions.default
138
+ )
139
+
140
+
141
+ # =============================
142
+ # Functions not related to custom ops, but similar implementation to c++ ops
143
+ # =============================
144
+ def create_from_bytes(
145
+ video_bytes: bytes, seek_mode: Optional[str] = None
146
+ ) -> torch.Tensor:
147
+ with warnings.catch_warnings():
148
+ # Ignore warning stating that the underlying video_bytes buffer is
149
+ # non-writable.
150
+ warnings.filterwarnings("ignore", category=UserWarning)
151
+ buffer = torch.frombuffer(video_bytes, dtype=torch.uint8)
152
+ return create_from_tensor(buffer, seek_mode)
153
+
154
+
155
+ def create_from_file_like(
156
+ file_like: Union[io.RawIOBase, io.BufferedReader], seek_mode: Optional[str] = None
157
+ ) -> torch.Tensor:
158
+ assert _pybind_ops is not None
159
+ return _create_from_file_like(
160
+ _pybind_ops.create_file_like_context(
161
+ file_like, False # False means not for writing
162
+ ),
163
+ seek_mode,
164
+ )
165
+
166
+
167
+ def encode_audio_to_file_like(
168
+ samples: torch.Tensor,
169
+ sample_rate: int,
170
+ format: str,
171
+ file_like: Union[io.RawIOBase, io.BufferedIOBase],
172
+ bit_rate: Optional[int] = None,
173
+ num_channels: Optional[int] = None,
174
+ desired_sample_rate: Optional[int] = None,
175
+ ) -> None:
176
+ """Encode audio samples to a file-like object.
177
+
178
+ Args:
179
+ samples: Audio samples tensor
180
+ sample_rate: Sample rate in Hz
181
+ format: Audio format (e.g., "wav", "mp3", "flac")
182
+ file_like: File-like object that supports write() and seek() methods
183
+ bit_rate: Optional bit rate for encoding
184
+ num_channels: Optional number of output channels
185
+ desired_sample_rate: Optional desired sample rate for the output.
186
+ """
187
+ assert _pybind_ops is not None
188
+
189
+ if samples.dtype != torch.float32:
190
+ raise ValueError(f"samples must have dtype torch.float32, got {samples.dtype}")
191
+
192
+ _encode_audio_to_file_like(
193
+ samples,
194
+ sample_rate,
195
+ format,
196
+ _pybind_ops.create_file_like_context(file_like, True), # True means for writing
197
+ bit_rate,
198
+ num_channels,
199
+ desired_sample_rate,
200
+ )
201
+
202
+
203
+ def get_frames_at_indices(
204
+ decoder: torch.Tensor, *, frame_indices: Union[torch.Tensor, list[int]]
205
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
206
+ if isinstance(frame_indices, torch.Tensor):
207
+ # Ensure indices is the correct dtype (int64)
208
+ frame_indices = frame_indices.to(torch.int64)
209
+ else:
210
+ # Convert list to tensor for dispatch
211
+ frame_indices = torch.tensor(frame_indices)
212
+ return _get_frames_at_indices_tensor_input(decoder, frame_indices=frame_indices)
213
+
214
+
215
+ def get_frames_by_pts(
216
+ decoder: torch.Tensor, *, timestamps: Union[torch.Tensor, list[float]]
217
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
218
+ if isinstance(timestamps, torch.Tensor):
219
+ # Ensure indices is the correct dtype (float64)
220
+ timestamps = timestamps.to(torch.float64)
221
+ else:
222
+ # Convert list to tensor for dispatch
223
+ try:
224
+ timestamps = torch.tensor(timestamps, dtype=torch.float64)
225
+ except Exception as e:
226
+ raise ValueError("Couldn't convert timestamps input to a tensor") from e
227
+ return _get_frames_by_pts_tensor_input(decoder, timestamps=timestamps)
228
+
229
+
230
+ # ==============================
231
+ # Abstract impl for the operators. Needed by torch.compile.
232
+ # ==============================
233
+ @register_fake("torchcodec_ns::create_from_file")
234
+ def create_from_file_abstract(filename: str, seek_mode: Optional[str]) -> torch.Tensor:
235
+ return torch.empty([], dtype=torch.long)
236
+
237
+
238
+ @register_fake("torchcodec_ns::_create_from_file_like")
239
+ def _create_from_file_like_abstract(
240
+ file_like: int, seek_mode: Optional[str]
241
+ ) -> torch.Tensor:
242
+ return torch.empty([], dtype=torch.long)
243
+
244
+
245
+ @register_fake("torchcodec_ns::encode_audio_to_file")
246
+ def encode_audio_to_file_abstract(
247
+ samples: torch.Tensor,
248
+ sample_rate: int,
249
+ filename: str,
250
+ bit_rate: Optional[int] = None,
251
+ num_channels: Optional[int] = None,
252
+ desired_sample_rate: Optional[int] = None,
253
+ ) -> None:
254
+ return
255
+
256
+
257
+ @register_fake("torchcodec_ns::encode_video_to_file")
258
+ def encode_video_to_file_abstract(
259
+ frames: torch.Tensor,
260
+ frame_rate: int,
261
+ filename: str,
262
+ crf: Optional[int] = None,
263
+ ) -> None:
264
+ return
265
+
266
+
267
+ @register_fake("torchcodec_ns::encode_audio_to_tensor")
268
+ def encode_audio_to_tensor_abstract(
269
+ samples: torch.Tensor,
270
+ sample_rate: int,
271
+ format: str,
272
+ bit_rate: Optional[int] = None,
273
+ num_channels: Optional[int] = None,
274
+ desired_sample_rate: Optional[int] = None,
275
+ ) -> torch.Tensor:
276
+ return torch.empty([], dtype=torch.long)
277
+
278
+
279
+ @register_fake("torchcodec_ns::_encode_audio_to_file_like")
280
+ def _encode_audio_to_file_like_abstract(
281
+ samples: torch.Tensor,
282
+ sample_rate: int,
283
+ format: str,
284
+ file_like_context: int,
285
+ bit_rate: Optional[int] = None,
286
+ num_channels: Optional[int] = None,
287
+ desired_sample_rate: Optional[int] = None,
288
+ ) -> None:
289
+ return
290
+
291
+
292
+ @register_fake("torchcodec_ns::create_from_tensor")
293
+ def create_from_tensor_abstract(
294
+ video_tensor: torch.Tensor, seek_mode: Optional[str]
295
+ ) -> torch.Tensor:
296
+ return torch.empty([], dtype=torch.long)
297
+
298
+
299
+ @register_fake("torchcodec_ns::_add_video_stream")
300
+ def _add_video_stream_abstract(
301
+ decoder: torch.Tensor,
302
+ *,
303
+ num_threads: Optional[int] = None,
304
+ dimension_order: Optional[str] = None,
305
+ stream_index: Optional[int] = None,
306
+ device: str = "cpu",
307
+ device_variant: str = "default",
308
+ transform_specs: str = "",
309
+ custom_frame_mappings: Optional[
310
+ tuple[torch.Tensor, torch.Tensor, torch.Tensor]
311
+ ] = None,
312
+ color_conversion_library: Optional[str] = None,
313
+ ) -> None:
314
+ return
315
+
316
+
317
+ @register_fake("torchcodec_ns::add_video_stream")
318
+ def add_video_stream_abstract(
319
+ decoder: torch.Tensor,
320
+ *,
321
+ num_threads: Optional[int] = None,
322
+ dimension_order: Optional[str] = None,
323
+ stream_index: Optional[int] = None,
324
+ device: str = "cpu",
325
+ device_variant: str = "default",
326
+ transform_specs: str = "",
327
+ custom_frame_mappings: Optional[
328
+ tuple[torch.Tensor, torch.Tensor, torch.Tensor]
329
+ ] = None,
330
+ ) -> None:
331
+ return
332
+
333
+
334
+ @register_fake("torchcodec_ns::add_audio_stream")
335
+ def add_audio_stream_abstract(
336
+ decoder: torch.Tensor,
337
+ *,
338
+ stream_index: Optional[int] = None,
339
+ sample_rate: Optional[int] = None,
340
+ num_channels: Optional[int] = None,
341
+ ) -> None:
342
+ return
343
+
344
+
345
+ @register_fake("torchcodec_ns::seek_to_pts")
346
+ def seek_abstract(decoder: torch.Tensor, seconds: float) -> None:
347
+ return
348
+
349
+
350
+ @register_fake("torchcodec_ns::get_next_frame")
351
+ def get_next_frame_abstract(
352
+ decoder: torch.Tensor,
353
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
354
+ # Images are 3 dimensions: height, width, channels.
355
+ # The exact permutation depends on the constructor options passed in.
356
+ image_size = [get_ctx().new_dynamic_size() for _ in range(3)]
357
+ return (
358
+ torch.empty(image_size),
359
+ torch.empty([], dtype=torch.float),
360
+ torch.empty([], dtype=torch.float),
361
+ )
362
+
363
+
364
+ @register_fake("torchcodec_ns::get_frame_at_pts")
365
+ def get_frame_at_pts_abstract(
366
+ decoder: torch.Tensor, seconds: float
367
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
368
+ image_size = [get_ctx().new_dynamic_size() for _ in range(3)]
369
+ return (
370
+ torch.empty(image_size),
371
+ torch.empty([], dtype=torch.float),
372
+ torch.empty([], dtype=torch.float),
373
+ )
374
+
375
+
376
+ @register_fake("torchcodec_ns::get_frames_by_pts")
377
+ def get_frames_by_pts_abstract(
378
+ decoder: torch.Tensor,
379
+ *,
380
+ timestamps: Union[torch.Tensor, List[float]],
381
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
382
+ image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
383
+ return (
384
+ torch.empty(image_size),
385
+ torch.empty([], dtype=torch.float),
386
+ torch.empty([], dtype=torch.float),
387
+ )
388
+
389
+
390
+ @register_fake("torchcodec_ns::get_frame_at_index")
391
+ def get_frame_at_index_abstract(
392
+ decoder: torch.Tensor, *, frame_index: int
393
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
394
+ image_size = [get_ctx().new_dynamic_size() for _ in range(3)]
395
+ return (
396
+ torch.empty(image_size),
397
+ torch.empty([], dtype=torch.float),
398
+ torch.empty([], dtype=torch.float),
399
+ )
400
+
401
+
402
+ @register_fake("torchcodec_ns::get_frames_at_indices")
403
+ def get_frames_at_indices_abstract(
404
+ decoder: torch.Tensor, *, frame_indices: Union[torch.Tensor, List[int]]
405
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
406
+ image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
407
+ return (
408
+ torch.empty(image_size),
409
+ torch.empty([], dtype=torch.float),
410
+ torch.empty([], dtype=torch.float),
411
+ )
412
+
413
+
414
+ @register_fake("torchcodec_ns::get_frames_in_range")
415
+ def get_frames_in_range_abstract(
416
+ decoder: torch.Tensor,
417
+ *,
418
+ start: int,
419
+ stop: int,
420
+ step: Optional[int] = None,
421
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
422
+ image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
423
+ return (
424
+ torch.empty(image_size),
425
+ torch.empty([], dtype=torch.float),
426
+ torch.empty([], dtype=torch.float),
427
+ )
428
+
429
+
430
+ @register_fake("torchcodec_ns::get_frames_by_pts_in_range")
431
+ def get_frames_by_pts_in_range_abstract(
432
+ decoder: torch.Tensor,
433
+ *,
434
+ start_seconds: float,
435
+ stop_seconds: float,
436
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
437
+ image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
438
+ return (
439
+ torch.empty(image_size),
440
+ torch.empty([], dtype=torch.float),
441
+ torch.empty([], dtype=torch.float),
442
+ )
443
+
444
+
445
+ @register_fake("torchcodec_ns::get_frames_by_pts_in_range_audio")
446
+ def get_frames_by_pts_in_range_audio_abstract(
447
+ decoder: torch.Tensor,
448
+ *,
449
+ start_seconds: float,
450
+ stop_seconds: Optional[float] = None,
451
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
452
+ image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
453
+ return (torch.empty(image_size), torch.empty([], dtype=torch.float))
454
+
455
+
456
+ @register_fake("torchcodec_ns::_get_key_frame_indices")
457
+ def get_key_frame_indices_abstract(decoder: torch.Tensor) -> torch.Tensor:
458
+ return torch.empty([], dtype=torch.int)
459
+
460
+
461
+ @register_fake("torchcodec_ns::get_json_metadata")
462
+ def get_json_metadata_abstract(decoder: torch.Tensor) -> str:
463
+ return ""
464
+
465
+
466
+ @register_fake("torchcodec_ns::get_container_json_metadata")
467
+ def get_container_json_metadata_abstract(decoder: torch.Tensor) -> str:
468
+ return ""
469
+
470
+
471
+ @register_fake("torchcodec_ns::get_stream_json_metadata")
472
+ def get_stream_json_metadata_abstract(decoder: torch.Tensor, stream_idx: int) -> str:
473
+ return ""
474
+
475
+
476
+ @register_fake("torchcodec_ns::_test_frame_pts_equality")
477
+ def _test_frame_pts_equality_abstract(
478
+ decoder: torch.Tensor,
479
+ *,
480
+ frame_index: int,
481
+ pts_seconds_to_test: float,
482
+ ) -> bool:
483
+ return False
484
+
485
+
486
+ @register_fake("torchcodec_ns::_get_json_ffmpeg_library_versions")
487
+ def _get_json_ffmpeg_library_versions_abstract() -> str:
488
+ return ""
489
+
490
+
491
+ @register_fake("torchcodec_ns::scan_all_streams_to_update_metadata")
492
+ def scan_all_streams_to_update_metadata_abstract(decoder: torch.Tensor) -> None:
493
+ return
494
+
495
+
496
+ def get_ffmpeg_library_versions():
497
+ versions_json = _get_json_ffmpeg_library_versions()
498
+ return json.loads(versions_json)
@@ -0,0 +1,50 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include <pybind11/pybind11.h>
8
+ #include <pybind11/stl.h>
9
+ #include <cstdint>
10
+
11
+ #include "src/torchcodec/_core/AVIOFileLikeContext.h"
12
+
13
+ namespace py = pybind11;
14
+
15
+ namespace facebook::torchcodec {
16
+
17
+ // Note: It's not immediately obvous why we need both custom_ops.cpp and
18
+ // pybind_ops.cpp. We do all other Python to C++ bridging in
19
+ // custom_ops.cpp, and that even depends on pybind11, so why have an
20
+ // explicit pybind-only file?
21
+ //
22
+ // The reason is that we want to accept OWNERSHIP of a file-like object
23
+ // from the Python side. In order to do that, we need a proper
24
+ // py::object. For raw bytes, we can launder that through a tensor on the
25
+ // custom_ops.cpp side, but we can't launder a proper Python object
26
+ // through a tensor. Custom ops can't accept a proper Python object
27
+ // through py::object, so we have to do direct pybind11 here.
28
+ //
29
+ // TODO: Investigate if we can do something better here. See:
30
+ // https://github.com/pytorch/torchcodec/issues/896
31
+ // Short version is that we're laundering a pointer through an int, the
32
+ // Python side forwards that to decoder creation functions in
33
+ // custom_ops.cpp and we do another cast on that side to get a pointer
34
+ // again. We want to investigate if we can do something cleaner by
35
+ // defining proper pybind objects.
36
+ int64_t create_file_like_context(py::object file_like, bool is_for_writing) {
37
+ AVIOFileLikeContext* context =
38
+ new AVIOFileLikeContext(file_like, is_for_writing);
39
+ return reinterpret_cast<int64_t>(context);
40
+ }
41
+
42
+ #ifndef PYBIND_OPS_MODULE_NAME
43
+ #error PYBIND_OPS_MODULE_NAME must be defined!
44
+ #endif
45
+
46
+ PYBIND11_MODULE(PYBIND_OPS_MODULE_NAME, m) {
47
+ m.def("create_file_like_context", &create_file_like_context);
48
+ }
49
+
50
+ } // namespace facebook::torchcodec
torchcodec/_frame.py ADDED
@@ -0,0 +1,145 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import dataclasses
8
+ from dataclasses import dataclass
9
+ from typing import Iterable, Iterator, Union
10
+
11
+ from torch import Tensor
12
+
13
+
14
+ def _frame_repr(self):
15
+ # Utility to replace __repr__ method of dataclasses below. This prints the
16
+ # shape of the .data tensor rather than printing the (potentially very long)
17
+ # data tensor itself.
18
+ s = self.__class__.__name__ + ":\n"
19
+ spaces = " "
20
+ for field in dataclasses.fields(self):
21
+ field_name = field.name
22
+ field_val = getattr(self, field_name)
23
+ if field_name == "data":
24
+ field_name = "data (shape)"
25
+ field_val = field_val.shape
26
+ s += f"{spaces}{field_name}: {field_val}\n"
27
+ return s
28
+
29
+
30
+ @dataclass
31
+ class Frame(Iterable):
32
+ """A single video frame with associated metadata."""
33
+
34
+ data: Tensor
35
+ """The frame data as (3-D ``torch.Tensor``)."""
36
+ pts_seconds: float
37
+ """The :term:`pts` of the frame, in seconds (float)."""
38
+ duration_seconds: float
39
+ """The duration of the frame, in seconds (float)."""
40
+
41
+ def __post_init__(self):
42
+ # This is called after __init__() when a Frame is created. We can run
43
+ # input validation checks here.
44
+ if not self.data.ndim == 3:
45
+ raise ValueError(f"data must be 3-dimensional, got {self.data.shape = }")
46
+ self.pts_seconds = float(self.pts_seconds)
47
+ self.duration_seconds = float(self.duration_seconds)
48
+
49
+ def __iter__(self) -> Iterator[Union[Tensor, float]]:
50
+ for field in dataclasses.fields(self):
51
+ yield getattr(self, field.name)
52
+
53
+ def __repr__(self):
54
+ return _frame_repr(self)
55
+
56
+
57
+ @dataclass
58
+ class FrameBatch(Iterable):
59
+ """Multiple video frames with associated metadata.
60
+
61
+ The ``data`` tensor is typically 4D for sequences of frames (NHWC or NCHW),
62
+ or 5D for sequences of clips, as returned by the :ref:`samplers
63
+ <sphx_glr_generated_examples_decoding_sampling.py>`. When ``data`` is 4D (resp. 5D)
64
+ the ``pts_seconds`` and ``duration_seconds`` tensors are 1D (resp. 2D).
65
+
66
+ .. note::
67
+ The ``pts_seconds`` and ``duration_seconds`` Tensors are always returned
68
+ on CPU, even if ``data`` is on GPU.
69
+ """
70
+
71
+ data: Tensor
72
+ """The frames data (``torch.Tensor`` of uint8)."""
73
+ pts_seconds: Tensor
74
+ """The :term:`pts` of the frame, in seconds (``torch.Tensor`` of floats)."""
75
+ duration_seconds: Tensor
76
+ """The duration of the frame, in seconds (``torch.Tensor`` of floats)."""
77
+
78
+ def __post_init__(self):
79
+ # This is called after __init__() when a FrameBatch is created. We can
80
+ # run input validation checks here.
81
+ if self.data.ndim < 3:
82
+ raise ValueError(
83
+ f"data must be at least 3-dimensional, got {self.data.shape = }"
84
+ )
85
+
86
+ leading_dims = self.data.shape[:-3]
87
+ if not (leading_dims == self.pts_seconds.shape == self.duration_seconds.shape):
88
+ raise ValueError(
89
+ "Tried to create a FrameBatch but the leading dimensions of the inputs do not match. "
90
+ f"Got {self.data.shape = } so we expected the shape of pts_seconds and "
91
+ f"duration_seconds to be {leading_dims = }, but got "
92
+ f"{self.pts_seconds.shape = } and {self.duration_seconds.shape = }."
93
+ )
94
+
95
+ def __iter__(self) -> Iterator["FrameBatch"]:
96
+ for data, pts_seconds, duration_seconds in zip(
97
+ self.data, self.pts_seconds, self.duration_seconds
98
+ ):
99
+ yield FrameBatch(
100
+ data=data,
101
+ pts_seconds=pts_seconds,
102
+ duration_seconds=duration_seconds,
103
+ )
104
+
105
+ def __getitem__(self, key) -> "FrameBatch":
106
+ return FrameBatch(
107
+ data=self.data[key],
108
+ pts_seconds=self.pts_seconds[key],
109
+ duration_seconds=self.duration_seconds[key],
110
+ )
111
+
112
+ def __len__(self):
113
+ return len(self.data)
114
+
115
+ def __repr__(self):
116
+ return _frame_repr(self)
117
+
118
+
119
+ @dataclass
120
+ class AudioSamples(Iterable):
121
+ """Audio samples with associated metadata."""
122
+
123
+ data: Tensor
124
+ """The sample data (``torch.Tensor`` of float in [-1, 1], shape is ``(num_channels, num_samples)``)."""
125
+ pts_seconds: float
126
+ """The :term:`pts` of the first sample, in seconds."""
127
+ duration_seconds: float
128
+ """The duration of the samples, in seconds."""
129
+ sample_rate: int
130
+ """The sample rate of the samples, in Hz."""
131
+
132
+ def __post_init__(self):
133
+ # This is called after __init__() when a Frame is created. We can run
134
+ # input validation checks here.
135
+ if not self.data.ndim == 2:
136
+ raise ValueError(f"data must be 2-dimensional, got {self.data.shape = }")
137
+ self.pts_seconds = float(self.pts_seconds)
138
+ self.sample_rate = int(self.sample_rate)
139
+
140
+ def __iter__(self) -> Iterator[Union[Tensor, float]]:
141
+ for field in dataclasses.fields(self):
142
+ yield getattr(self, field.name)
143
+
144
+ def __repr__(self):
145
+ return _frame_repr(self)