torchcodec 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. torchcodec/__init__.py +27 -0
  2. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  3. torchcodec/_core/AVIOContextHolder.h +64 -0
  4. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  5. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  6. torchcodec/_core/AVIOTensorContext.cpp +130 -0
  7. torchcodec/_core/AVIOTensorContext.h +44 -0
  8. torchcodec/_core/BetaCudaDeviceInterface.cpp +849 -0
  9. torchcodec/_core/BetaCudaDeviceInterface.h +196 -0
  10. torchcodec/_core/CMakeLists.txt +295 -0
  11. torchcodec/_core/CUDACommon.cpp +330 -0
  12. torchcodec/_core/CUDACommon.h +51 -0
  13. torchcodec/_core/Cache.h +124 -0
  14. torchcodec/_core/CpuDeviceInterface.cpp +509 -0
  15. torchcodec/_core/CpuDeviceInterface.h +141 -0
  16. torchcodec/_core/CudaDeviceInterface.cpp +602 -0
  17. torchcodec/_core/CudaDeviceInterface.h +79 -0
  18. torchcodec/_core/DeviceInterface.cpp +117 -0
  19. torchcodec/_core/DeviceInterface.h +191 -0
  20. torchcodec/_core/Encoder.cpp +1054 -0
  21. torchcodec/_core/Encoder.h +192 -0
  22. torchcodec/_core/FFMPEGCommon.cpp +684 -0
  23. torchcodec/_core/FFMPEGCommon.h +314 -0
  24. torchcodec/_core/FilterGraph.cpp +159 -0
  25. torchcodec/_core/FilterGraph.h +59 -0
  26. torchcodec/_core/Frame.cpp +47 -0
  27. torchcodec/_core/Frame.h +72 -0
  28. torchcodec/_core/Metadata.cpp +124 -0
  29. torchcodec/_core/Metadata.h +92 -0
  30. torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
  31. torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
  32. torchcodec/_core/NVDECCache.cpp +60 -0
  33. torchcodec/_core/NVDECCache.h +102 -0
  34. torchcodec/_core/SingleStreamDecoder.cpp +1586 -0
  35. torchcodec/_core/SingleStreamDecoder.h +391 -0
  36. torchcodec/_core/StreamOptions.h +70 -0
  37. torchcodec/_core/Transform.cpp +128 -0
  38. torchcodec/_core/Transform.h +86 -0
  39. torchcodec/_core/ValidationUtils.cpp +35 -0
  40. torchcodec/_core/ValidationUtils.h +21 -0
  41. torchcodec/_core/__init__.py +46 -0
  42. torchcodec/_core/_metadata.py +262 -0
  43. torchcodec/_core/custom_ops.cpp +1090 -0
  44. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +169 -0
  45. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  46. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  47. torchcodec/_core/ops.py +605 -0
  48. torchcodec/_core/pybind_ops.cpp +50 -0
  49. torchcodec/_frame.py +146 -0
  50. torchcodec/_internally_replaced_utils.py +68 -0
  51. torchcodec/_samplers/__init__.py +7 -0
  52. torchcodec/_samplers/video_clip_sampler.py +419 -0
  53. torchcodec/decoders/__init__.py +12 -0
  54. torchcodec/decoders/_audio_decoder.py +185 -0
  55. torchcodec/decoders/_decoder_utils.py +113 -0
  56. torchcodec/decoders/_video_decoder.py +601 -0
  57. torchcodec/encoders/__init__.py +2 -0
  58. torchcodec/encoders/_audio_encoder.py +149 -0
  59. torchcodec/encoders/_video_encoder.py +196 -0
  60. torchcodec/libtorchcodec_core4.so +0 -0
  61. torchcodec/libtorchcodec_core5.so +0 -0
  62. torchcodec/libtorchcodec_core6.so +0 -0
  63. torchcodec/libtorchcodec_core7.so +0 -0
  64. torchcodec/libtorchcodec_core8.so +0 -0
  65. torchcodec/libtorchcodec_custom_ops4.so +0 -0
  66. torchcodec/libtorchcodec_custom_ops5.so +0 -0
  67. torchcodec/libtorchcodec_custom_ops6.so +0 -0
  68. torchcodec/libtorchcodec_custom_ops7.so +0 -0
  69. torchcodec/libtorchcodec_custom_ops8.so +0 -0
  70. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  71. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  72. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  73. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  74. torchcodec/libtorchcodec_pybind_ops8.so +0 -0
  75. torchcodec/samplers/__init__.py +2 -0
  76. torchcodec/samplers/_common.py +84 -0
  77. torchcodec/samplers/_index_based.py +287 -0
  78. torchcodec/samplers/_time_based.py +358 -0
  79. torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake +76 -0
  80. torchcodec/share/cmake/TorchCodec/ffmpeg_versions.cmake +122 -0
  81. torchcodec/transforms/__init__.py +12 -0
  82. torchcodec/transforms/_decoder_transforms.py +375 -0
  83. torchcodec/version.py +2 -0
  84. torchcodec-0.10.0.dist-info/METADATA +286 -0
  85. torchcodec-0.10.0.dist-info/RECORD +88 -0
  86. torchcodec-0.10.0.dist-info/WHEEL +5 -0
  87. torchcodec-0.10.0.dist-info/licenses/LICENSE +28 -0
  88. torchcodec-0.10.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,605 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+
8
+ import io
9
+ import json
10
+ import os
11
+ import shutil
12
+ import sys
13
+ import traceback
14
+ import warnings
15
+ from contextlib import nullcontext
16
+ from pathlib import Path
17
+ from types import ModuleType
18
+
19
+ import torch
20
+ from torch.library import get_ctx, register_fake
21
+
22
+ from torchcodec._internally_replaced_utils import ( # @manual=//pytorch/torchcodec/src:internally_replaced_utils
23
+ _get_extension_path,
24
+ _get_pybind_ops_module_name,
25
+ _load_pybind11_module,
26
+ )
27
+
28
+ _pybind_ops: ModuleType | None = None
29
+
30
+
31
+ def load_torchcodec_shared_libraries() -> tuple[int, str]:
32
+ # Successively try to load the shared libraries for each version of FFmpeg
33
+ # that we support. We always start with the highest version, working our way
34
+ # down to the lowest version. Once we can load ALL shared libraries for a
35
+ # version of FFmpeg, we have succeeded and we stop.
36
+ #
37
+ # Note that we use two different methods for loading shared libraries:
38
+ #
39
+ # 1. torch.ops.load_library(): For PyTorch custom ops and the C++ only
40
+ # libraries the custom ops depend on. Loading libraries through PyTorch
41
+ # registers the custom ops with PyTorch's runtime and the ops can be
42
+ # accessed through torch.ops after loading.
43
+ #
44
+ # 2. importlib: For pybind11 modules. We load them dynamically, rather
45
+ # than using a plain import statement. A plain import statement only
46
+ # works when the module name and file name match exactly. Our shared
47
+ # libraries do not meet those conditions.
48
+
49
+ exceptions = []
50
+ for ffmpeg_major_version in (8, 7, 6, 5, 4):
51
+ pybind_ops_module_name = _get_pybind_ops_module_name(ffmpeg_major_version)
52
+ core_library_name = f"libtorchcodec_core{ffmpeg_major_version}"
53
+ custom_ops_library_name = f"libtorchcodec_custom_ops{ffmpeg_major_version}"
54
+ pybind_ops_library_name = f"libtorchcodec_pybind_ops{ffmpeg_major_version}"
55
+ try:
56
+ core_library_path = _get_extension_path(core_library_name)
57
+ torch.ops.load_library(core_library_path)
58
+ torch.ops.load_library(_get_extension_path(custom_ops_library_name))
59
+
60
+ pybind_ops_library_path = _get_extension_path(pybind_ops_library_name)
61
+ global _pybind_ops
62
+ _pybind_ops = _load_pybind11_module(
63
+ pybind_ops_module_name, pybind_ops_library_path
64
+ )
65
+ return ffmpeg_major_version, core_library_path
66
+ except Exception:
67
+ # Capture the full traceback for this exception
68
+ exc_traceback = traceback.format_exc()
69
+ exceptions.append((ffmpeg_major_version, exc_traceback))
70
+
71
+ traceback_info = (
72
+ "\n[start of libtorchcodec loading traceback]\n"
73
+ + "\n".join(f"FFmpeg version {v}:\n{tb}" for v, tb in exceptions)
74
+ + "[end of libtorchcodec loading traceback]."
75
+ )
76
+ raise RuntimeError(
77
+ f"""Could not load libtorchcodec. Likely causes:
78
+ 1. FFmpeg is not properly installed in your environment. We support
79
+ versions 4, 5, 6, 7, and 8, and we attempt to load libtorchcodec
80
+ for each of those versions. Errors for versions not installed on
81
+ your system are expected; only the error for your installed FFmpeg
82
+ version is relevant. On Windows, ensure you've installed the
83
+ "full-shared" version which ships DLLs.
84
+ 2. The PyTorch version ({torch.__version__}) is not compatible with
85
+ this version of TorchCodec. Refer to the version compatibility
86
+ table:
87
+ https://github.com/pytorch/torchcodec?tab=readme-ov-file#installing-torchcodec.
88
+ 3. Another runtime dependency; see exceptions below.
89
+
90
+ The following exceptions were raised as we tried to load libtorchcodec:
91
+ """
92
+ f"{traceback_info}"
93
+ )
94
+
95
+
96
+ expose_ffmpeg_dlls = nullcontext
97
+ if sys.platform == "win32" and hasattr(os, "add_dll_directory"):
98
+ # On windows we try to locate the FFmpeg DLLs and temporarily add them to
99
+ # the DLL search path. This seems to be needed on some users machine, but
100
+ # not on our CI. We don't know why.
101
+ if ffmpeg_path := shutil.which("ffmpeg"):
102
+
103
+ def expose_ffmpeg_dlls(): # noqa: F811
104
+ ffmpeg_dir = Path(ffmpeg_path).parent
105
+ return os.add_dll_directory(str(ffmpeg_dir)) # that's the actual CM
106
+
107
+
108
+ with expose_ffmpeg_dlls():
109
+ ffmpeg_major_version, core_library_path = load_torchcodec_shared_libraries()
110
+
111
+
112
+ # Note: We use disallow_in_graph because PyTorch does constant propagation of
113
+ # factory functions.
114
+ create_from_file = torch._dynamo.disallow_in_graph(
115
+ torch.ops.torchcodec_ns.create_from_file.default
116
+ )
117
+ encode_audio_to_file = torch._dynamo.disallow_in_graph(
118
+ torch.ops.torchcodec_ns.encode_audio_to_file.default
119
+ )
120
+ encode_audio_to_tensor = torch._dynamo.disallow_in_graph(
121
+ torch.ops.torchcodec_ns.encode_audio_to_tensor.default
122
+ )
123
+ _encode_audio_to_file_like = torch._dynamo.disallow_in_graph(
124
+ torch.ops.torchcodec_ns._encode_audio_to_file_like.default
125
+ )
126
+ encode_video_to_file = torch._dynamo.disallow_in_graph(
127
+ torch.ops.torchcodec_ns.encode_video_to_file.default
128
+ )
129
+ encode_video_to_tensor = torch._dynamo.disallow_in_graph(
130
+ torch.ops.torchcodec_ns.encode_video_to_tensor.default
131
+ )
132
+ _encode_video_to_file_like = torch._dynamo.disallow_in_graph(
133
+ torch.ops.torchcodec_ns._encode_video_to_file_like.default
134
+ )
135
+ create_from_tensor = torch._dynamo.disallow_in_graph(
136
+ torch.ops.torchcodec_ns.create_from_tensor.default
137
+ )
138
+ _create_from_file_like = torch._dynamo.disallow_in_graph(
139
+ torch.ops.torchcodec_ns._create_from_file_like.default
140
+ )
141
+ add_video_stream = torch.ops.torchcodec_ns.add_video_stream.default
142
+ _add_video_stream = torch.ops.torchcodec_ns._add_video_stream.default
143
+ add_audio_stream = torch.ops.torchcodec_ns.add_audio_stream.default
144
+ seek_to_pts = torch.ops.torchcodec_ns.seek_to_pts.default
145
+ get_next_frame = torch.ops.torchcodec_ns.get_next_frame.default
146
+ get_frame_at_pts = torch.ops.torchcodec_ns.get_frame_at_pts.default
147
+ get_frame_at_index = torch.ops.torchcodec_ns.get_frame_at_index.default
148
+ _get_frames_at_indices_tensor_input = (
149
+ torch.ops.torchcodec_ns.get_frames_at_indices.default
150
+ )
151
+ _get_frames_by_pts_tensor_input = torch.ops.torchcodec_ns.get_frames_by_pts.default
152
+ get_frames_in_range = torch.ops.torchcodec_ns.get_frames_in_range.default
153
+ get_frames_by_pts_in_range = torch.ops.torchcodec_ns.get_frames_by_pts_in_range.default
154
+ get_frames_by_pts_in_range_audio = (
155
+ torch.ops.torchcodec_ns.get_frames_by_pts_in_range_audio.default
156
+ )
157
+ get_json_metadata = torch.ops.torchcodec_ns.get_json_metadata.default
158
+ _test_frame_pts_equality = torch.ops.torchcodec_ns._test_frame_pts_equality.default
159
+ _get_container_json_metadata = (
160
+ torch.ops.torchcodec_ns.get_container_json_metadata.default
161
+ )
162
+ _get_key_frame_indices = torch.ops.torchcodec_ns._get_key_frame_indices.default
163
+ scan_all_streams_to_update_metadata = (
164
+ torch.ops.torchcodec_ns.scan_all_streams_to_update_metadata.default
165
+ )
166
+ _get_stream_json_metadata = torch.ops.torchcodec_ns.get_stream_json_metadata.default
167
+ _get_json_ffmpeg_library_versions = (
168
+ torch.ops.torchcodec_ns._get_json_ffmpeg_library_versions.default
169
+ )
170
+ _get_backend_details = torch.ops.torchcodec_ns._get_backend_details.default
171
+
172
+
173
+ # =============================
174
+ # Functions not related to custom ops, but similar implementation to c++ ops
175
+ # =============================
176
+ def create_from_bytes(video_bytes: bytes, seek_mode: str | None = None) -> torch.Tensor:
177
+ with warnings.catch_warnings():
178
+ # Ignore warning stating that the underlying video_bytes buffer is
179
+ # non-writable.
180
+ warnings.filterwarnings("ignore", category=UserWarning)
181
+ buffer = torch.frombuffer(video_bytes, dtype=torch.uint8)
182
+ return create_from_tensor(buffer, seek_mode)
183
+
184
+
185
+ def create_from_file_like(
186
+ file_like: io.RawIOBase | io.BufferedReader, seek_mode: str | None = None
187
+ ) -> torch.Tensor:
188
+ assert _pybind_ops is not None
189
+ return _create_from_file_like(
190
+ _pybind_ops.create_file_like_context(
191
+ file_like, False # False means not for writing
192
+ ),
193
+ seek_mode,
194
+ )
195
+
196
+
197
+ def encode_audio_to_file_like(
198
+ samples: torch.Tensor,
199
+ sample_rate: int,
200
+ format: str,
201
+ file_like: io.RawIOBase | io.BufferedIOBase,
202
+ bit_rate: int | None = None,
203
+ num_channels: int | None = None,
204
+ desired_sample_rate: int | None = None,
205
+ ) -> None:
206
+ """Encode audio samples to a file-like object.
207
+
208
+ Args:
209
+ samples: Audio samples tensor
210
+ sample_rate: Sample rate in Hz
211
+ format: Audio format (e.g., "wav", "mp3", "flac")
212
+ file_like: File-like object that supports write() and seek() methods
213
+ bit_rate: Optional bit rate for encoding
214
+ num_channels: Optional number of output channels
215
+ desired_sample_rate: Optional desired sample rate for the output.
216
+ """
217
+ assert _pybind_ops is not None
218
+
219
+ if samples.dtype != torch.float32:
220
+ raise ValueError(f"samples must have dtype torch.float32, got {samples.dtype}")
221
+
222
+ _encode_audio_to_file_like(
223
+ samples,
224
+ sample_rate,
225
+ format,
226
+ _pybind_ops.create_file_like_context(file_like, True), # True means for writing
227
+ bit_rate,
228
+ num_channels,
229
+ desired_sample_rate,
230
+ )
231
+
232
+
233
+ def encode_video_to_file_like(
234
+ frames: torch.Tensor,
235
+ frame_rate: float,
236
+ format: str,
237
+ file_like: io.RawIOBase | io.BufferedIOBase,
238
+ codec: str | None = None,
239
+ pixel_format: str | None = None,
240
+ crf: int | float | None = None,
241
+ preset: str | None = None,
242
+ extra_options: list[str] | None = None,
243
+ ) -> None:
244
+ """Encode video frames to a file-like object.
245
+
246
+ Args:
247
+ frames: Video frames tensor. The device of the frames tensor will be used for encoding.
248
+ frame_rate: Frame rate in frames per second
249
+ format: Video format (e.g., "mp4", "mov", "mkv")
250
+ file_like: File-like object that supports write() and seek() methods
251
+ codec: Optional codec name (e.g., "libx264", "h264")
252
+ pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
253
+ crf: Optional constant rate factor for encoding quality
254
+ preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
255
+ extra_options: Optional list of extra options as flattened key-value pairs
256
+ """
257
+ assert _pybind_ops is not None
258
+
259
+ _encode_video_to_file_like(
260
+ frames,
261
+ frame_rate,
262
+ format,
263
+ _pybind_ops.create_file_like_context(file_like, True), # True means for writing
264
+ codec,
265
+ pixel_format,
266
+ crf,
267
+ preset,
268
+ extra_options,
269
+ )
270
+
271
+
272
+ def get_frames_at_indices(
273
+ decoder: torch.Tensor, *, frame_indices: torch.Tensor | list[int]
274
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
275
+ if isinstance(frame_indices, torch.Tensor):
276
+ # Ensure indices is the correct dtype (int64)
277
+ frame_indices = frame_indices.to(torch.int64)
278
+ else:
279
+ # Convert list to tensor for dispatch
280
+ frame_indices = torch.tensor(frame_indices)
281
+ return _get_frames_at_indices_tensor_input(decoder, frame_indices=frame_indices)
282
+
283
+
284
+ def get_frames_by_pts(
285
+ decoder: torch.Tensor, *, timestamps: torch.Tensor | list[float]
286
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
287
+ if isinstance(timestamps, torch.Tensor):
288
+ # Ensure indices is the correct dtype (float64)
289
+ timestamps = timestamps.to(torch.float64)
290
+ else:
291
+ # Convert list to tensor for dispatch
292
+ try:
293
+ timestamps = torch.tensor(timestamps, dtype=torch.float64)
294
+ except Exception as e:
295
+ raise ValueError("Couldn't convert timestamps input to a tensor") from e
296
+ return _get_frames_by_pts_tensor_input(decoder, timestamps=timestamps)
297
+
298
+
299
+ # ==============================
300
+ # Abstract impl for the operators. Needed by torch.compile.
301
+ # ==============================
302
+ @register_fake("torchcodec_ns::create_from_file")
303
+ def create_from_file_abstract(filename: str, seek_mode: str | None) -> torch.Tensor:
304
+ return torch.empty([], dtype=torch.long)
305
+
306
+
307
+ @register_fake("torchcodec_ns::_create_from_file_like")
308
+ def _create_from_file_like_abstract(
309
+ file_like: int, seek_mode: str | None
310
+ ) -> torch.Tensor:
311
+ return torch.empty([], dtype=torch.long)
312
+
313
+
314
+ @register_fake("torchcodec_ns::encode_audio_to_file")
315
+ def encode_audio_to_file_abstract(
316
+ samples: torch.Tensor,
317
+ sample_rate: int,
318
+ filename: str,
319
+ bit_rate: int | None = None,
320
+ num_channels: int | None = None,
321
+ desired_sample_rate: int | None = None,
322
+ ) -> None:
323
+ return
324
+
325
+
326
+ @register_fake("torchcodec_ns::encode_audio_to_tensor")
327
+ def encode_audio_to_tensor_abstract(
328
+ samples: torch.Tensor,
329
+ sample_rate: int,
330
+ format: str,
331
+ bit_rate: int | None = None,
332
+ num_channels: int | None = None,
333
+ desired_sample_rate: int | None = None,
334
+ ) -> torch.Tensor:
335
+ return torch.empty([], dtype=torch.long)
336
+
337
+
338
+ @register_fake("torchcodec_ns::_encode_audio_to_file_like")
339
+ def _encode_audio_to_file_like_abstract(
340
+ samples: torch.Tensor,
341
+ sample_rate: int,
342
+ format: str,
343
+ file_like_context: int,
344
+ bit_rate: int | None = None,
345
+ num_channels: int | None = None,
346
+ desired_sample_rate: int | None = None,
347
+ ) -> None:
348
+ return
349
+
350
+
351
+ @register_fake("torchcodec_ns::encode_video_to_file")
352
+ def encode_video_to_file_abstract(
353
+ frames: torch.Tensor,
354
+ frame_rate: float,
355
+ filename: str,
356
+ codec: str | None = None,
357
+ pixel_format: str | None = None,
358
+ preset: str | None = None,
359
+ crf: int | float | None = None,
360
+ extra_options: list[str] | None = None,
361
+ ) -> None:
362
+ return
363
+
364
+
365
+ @register_fake("torchcodec_ns::encode_video_to_tensor")
366
+ def encode_video_to_tensor_abstract(
367
+ frames: torch.Tensor,
368
+ frame_rate: float,
369
+ format: str,
370
+ codec: str | None = None,
371
+ pixel_format: str | None = None,
372
+ preset: str | None = None,
373
+ crf: int | float | None = None,
374
+ extra_options: list[str] | None = None,
375
+ ) -> torch.Tensor:
376
+ return torch.empty([], dtype=torch.long)
377
+
378
+
379
+ @register_fake("torchcodec_ns::_encode_video_to_file_like")
380
+ def _encode_video_to_file_like_abstract(
381
+ frames: torch.Tensor,
382
+ frame_rate: float,
383
+ format: str,
384
+ file_like_context: int,
385
+ codec: str | None = None,
386
+ pixel_format: str | None = None,
387
+ preset: str | None = None,
388
+ crf: int | float | None = None,
389
+ extra_options: list[str] | None = None,
390
+ ) -> None:
391
+ return
392
+
393
+
394
+ @register_fake("torchcodec_ns::create_from_tensor")
395
+ def create_from_tensor_abstract(
396
+ video_tensor: torch.Tensor, seek_mode: str | None
397
+ ) -> torch.Tensor:
398
+ return torch.empty([], dtype=torch.long)
399
+
400
+
401
+ @register_fake("torchcodec_ns::_add_video_stream")
402
+ def _add_video_stream_abstract(
403
+ decoder: torch.Tensor,
404
+ *,
405
+ num_threads: int | None = None,
406
+ dimension_order: str | None = None,
407
+ stream_index: int | None = None,
408
+ device: str = "cpu",
409
+ device_variant: str = "ffmpeg",
410
+ transform_specs: str = "",
411
+ custom_frame_mappings: (
412
+ tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None
413
+ ) = None,
414
+ color_conversion_library: str | None = None,
415
+ ) -> None:
416
+ return
417
+
418
+
419
+ @register_fake("torchcodec_ns::add_video_stream")
420
+ def add_video_stream_abstract(
421
+ decoder: torch.Tensor,
422
+ *,
423
+ num_threads: int | None = None,
424
+ dimension_order: str | None = None,
425
+ stream_index: int | None = None,
426
+ device: str = "cpu",
427
+ device_variant: str = "ffmpeg",
428
+ transform_specs: str = "",
429
+ custom_frame_mappings: (
430
+ tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None
431
+ ) = None,
432
+ ) -> None:
433
+ return
434
+
435
+
436
+ @register_fake("torchcodec_ns::add_audio_stream")
437
+ def add_audio_stream_abstract(
438
+ decoder: torch.Tensor,
439
+ *,
440
+ stream_index: int | None = None,
441
+ sample_rate: int | None = None,
442
+ num_channels: int | None = None,
443
+ ) -> None:
444
+ return
445
+
446
+
447
+ @register_fake("torchcodec_ns::seek_to_pts")
448
+ def seek_abstract(decoder: torch.Tensor, seconds: float) -> None:
449
+ return
450
+
451
+
452
+ @register_fake("torchcodec_ns::get_next_frame")
453
+ def get_next_frame_abstract(
454
+ decoder: torch.Tensor,
455
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
456
+ # Images are 3 dimensions: height, width, channels.
457
+ # The exact permutation depends on the constructor options passed in.
458
+ image_size = [get_ctx().new_dynamic_size() for _ in range(3)]
459
+ return (
460
+ torch.empty(image_size),
461
+ torch.empty([], dtype=torch.float),
462
+ torch.empty([], dtype=torch.float),
463
+ )
464
+
465
+
466
+ @register_fake("torchcodec_ns::get_frame_at_pts")
467
+ def get_frame_at_pts_abstract(
468
+ decoder: torch.Tensor, seconds: float
469
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
470
+ image_size = [get_ctx().new_dynamic_size() for _ in range(3)]
471
+ return (
472
+ torch.empty(image_size),
473
+ torch.empty([], dtype=torch.float),
474
+ torch.empty([], dtype=torch.float),
475
+ )
476
+
477
+
478
+ @register_fake("torchcodec_ns::get_frames_by_pts")
479
+ def get_frames_by_pts_abstract(
480
+ decoder: torch.Tensor,
481
+ *,
482
+ timestamps: torch.Tensor | list[float],
483
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
484
+ image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
485
+ return (
486
+ torch.empty(image_size),
487
+ torch.empty([], dtype=torch.float),
488
+ torch.empty([], dtype=torch.float),
489
+ )
490
+
491
+
492
+ @register_fake("torchcodec_ns::get_frame_at_index")
493
+ def get_frame_at_index_abstract(
494
+ decoder: torch.Tensor, *, frame_index: int
495
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
496
+ image_size = [get_ctx().new_dynamic_size() for _ in range(3)]
497
+ return (
498
+ torch.empty(image_size),
499
+ torch.empty([], dtype=torch.float),
500
+ torch.empty([], dtype=torch.float),
501
+ )
502
+
503
+
504
+ @register_fake("torchcodec_ns::get_frames_at_indices")
505
+ def get_frames_at_indices_abstract(
506
+ decoder: torch.Tensor, *, frame_indices: torch.Tensor | list[int]
507
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
508
+ image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
509
+ return (
510
+ torch.empty(image_size),
511
+ torch.empty([], dtype=torch.float),
512
+ torch.empty([], dtype=torch.float),
513
+ )
514
+
515
+
516
+ @register_fake("torchcodec_ns::get_frames_in_range")
517
+ def get_frames_in_range_abstract(
518
+ decoder: torch.Tensor,
519
+ *,
520
+ start: int,
521
+ stop: int,
522
+ step: int | None = None,
523
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
524
+ image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
525
+ return (
526
+ torch.empty(image_size),
527
+ torch.empty([], dtype=torch.float),
528
+ torch.empty([], dtype=torch.float),
529
+ )
530
+
531
+
532
+ @register_fake("torchcodec_ns::get_frames_by_pts_in_range")
533
+ def get_frames_by_pts_in_range_abstract(
534
+ decoder: torch.Tensor,
535
+ *,
536
+ start_seconds: float,
537
+ stop_seconds: float,
538
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
539
+ image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
540
+ return (
541
+ torch.empty(image_size),
542
+ torch.empty([], dtype=torch.float),
543
+ torch.empty([], dtype=torch.float),
544
+ )
545
+
546
+
547
+ @register_fake("torchcodec_ns::get_frames_by_pts_in_range_audio")
548
+ def get_frames_by_pts_in_range_audio_abstract(
549
+ decoder: torch.Tensor,
550
+ *,
551
+ start_seconds: float,
552
+ stop_seconds: float | None = None,
553
+ ) -> tuple[torch.Tensor, torch.Tensor]:
554
+ image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
555
+ return (torch.empty(image_size), torch.empty([], dtype=torch.float))
556
+
557
+
558
+ @register_fake("torchcodec_ns::_get_key_frame_indices")
559
+ def get_key_frame_indices_abstract(decoder: torch.Tensor) -> torch.Tensor:
560
+ return torch.empty([], dtype=torch.int)
561
+
562
+
563
+ @register_fake("torchcodec_ns::get_json_metadata")
564
+ def get_json_metadata_abstract(decoder: torch.Tensor) -> str:
565
+ return ""
566
+
567
+
568
+ @register_fake("torchcodec_ns::get_container_json_metadata")
569
+ def get_container_json_metadata_abstract(decoder: torch.Tensor) -> str:
570
+ return ""
571
+
572
+
573
+ @register_fake("torchcodec_ns::get_stream_json_metadata")
574
+ def get_stream_json_metadata_abstract(decoder: torch.Tensor, stream_idx: int) -> str:
575
+ return ""
576
+
577
+
578
+ @register_fake("torchcodec_ns::_test_frame_pts_equality")
579
+ def _test_frame_pts_equality_abstract(
580
+ decoder: torch.Tensor,
581
+ *,
582
+ frame_index: int,
583
+ pts_seconds_to_test: float,
584
+ ) -> bool:
585
+ return False
586
+
587
+
588
+ @register_fake("torchcodec_ns::_get_json_ffmpeg_library_versions")
589
+ def _get_json_ffmpeg_library_versions_abstract() -> str:
590
+ return ""
591
+
592
+
593
+ @register_fake("torchcodec_ns::scan_all_streams_to_update_metadata")
594
+ def scan_all_streams_to_update_metadata_abstract(decoder: torch.Tensor) -> None:
595
+ return
596
+
597
+
598
+ def get_ffmpeg_library_versions():
599
+ versions_json = _get_json_ffmpeg_library_versions()
600
+ return json.loads(versions_json)
601
+
602
+
603
+ @register_fake("torchcodec_ns::_get_backend_details")
604
+ def _get_backend_details_abstract(decoder: torch.Tensor) -> str:
605
+ return ""
@@ -0,0 +1,50 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include <pybind11/pybind11.h>
8
+ #include <pybind11/stl.h>
9
+ #include <cstdint>
10
+
11
+ #include "AVIOFileLikeContext.h"
12
+
13
+ namespace py = pybind11;
14
+
15
+ namespace facebook::torchcodec {
16
+
17
+ // Note: It's not immediately obvous why we need both custom_ops.cpp and
18
+ // pybind_ops.cpp. We do all other Python to C++ bridging in
19
+ // custom_ops.cpp, and that even depends on pybind11, so why have an
20
+ // explicit pybind-only file?
21
+ //
22
+ // The reason is that we want to accept OWNERSHIP of a file-like object
23
+ // from the Python side. In order to do that, we need a proper
24
+ // py::object. For raw bytes, we can launder that through a tensor on the
25
+ // custom_ops.cpp side, but we can't launder a proper Python object
26
+ // through a tensor. Custom ops can't accept a proper Python object
27
+ // through py::object, so we have to do direct pybind11 here.
28
+ //
29
+ // TODO: Investigate if we can do something better here. See:
30
+ // https://github.com/pytorch/torchcodec/issues/896
31
+ // Short version is that we're laundering a pointer through an int, the
32
+ // Python side forwards that to decoder creation functions in
33
+ // custom_ops.cpp and we do another cast on that side to get a pointer
34
+ // again. We want to investigate if we can do something cleaner by
35
+ // defining proper pybind objects.
36
+ int64_t create_file_like_context(py::object file_like, bool is_for_writing) {
37
+ AVIOFileLikeContext* context =
38
+ new AVIOFileLikeContext(file_like, is_for_writing);
39
+ return reinterpret_cast<int64_t>(context);
40
+ }
41
+
42
+ #ifndef PYBIND_OPS_MODULE_NAME
43
+ #error PYBIND_OPS_MODULE_NAME must be defined!
44
+ #endif
45
+
46
+ PYBIND11_MODULE(PYBIND_OPS_MODULE_NAME, m) {
47
+ m.def("create_file_like_context", &create_file_like_context);
48
+ }
49
+
50
+ } // namespace facebook::torchcodec