torchcodec 0.7.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (67) hide show
  1. torchcodec/__init__.py +16 -0
  2. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  3. torchcodec/_core/AVIOContextHolder.h +64 -0
  4. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  5. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  6. torchcodec/_core/AVIOTensorContext.cpp +123 -0
  7. torchcodec/_core/AVIOTensorContext.h +43 -0
  8. torchcodec/_core/CMakeLists.txt +292 -0
  9. torchcodec/_core/Cache.h +138 -0
  10. torchcodec/_core/CpuDeviceInterface.cpp +266 -0
  11. torchcodec/_core/CpuDeviceInterface.h +70 -0
  12. torchcodec/_core/CudaDeviceInterface.cpp +514 -0
  13. torchcodec/_core/CudaDeviceInterface.h +37 -0
  14. torchcodec/_core/DeviceInterface.cpp +79 -0
  15. torchcodec/_core/DeviceInterface.h +67 -0
  16. torchcodec/_core/Encoder.cpp +514 -0
  17. torchcodec/_core/Encoder.h +123 -0
  18. torchcodec/_core/FFMPEGCommon.cpp +421 -0
  19. torchcodec/_core/FFMPEGCommon.h +227 -0
  20. torchcodec/_core/FilterGraph.cpp +142 -0
  21. torchcodec/_core/FilterGraph.h +45 -0
  22. torchcodec/_core/Frame.cpp +32 -0
  23. torchcodec/_core/Frame.h +118 -0
  24. torchcodec/_core/Metadata.h +72 -0
  25. torchcodec/_core/SingleStreamDecoder.cpp +1715 -0
  26. torchcodec/_core/SingleStreamDecoder.h +380 -0
  27. torchcodec/_core/StreamOptions.h +53 -0
  28. torchcodec/_core/ValidationUtils.cpp +35 -0
  29. torchcodec/_core/ValidationUtils.h +21 -0
  30. torchcodec/_core/__init__.py +40 -0
  31. torchcodec/_core/_metadata.py +317 -0
  32. torchcodec/_core/custom_ops.cpp +727 -0
  33. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +300 -0
  34. torchcodec/_core/ops.py +455 -0
  35. torchcodec/_core/pybind_ops.cpp +87 -0
  36. torchcodec/_frame.py +145 -0
  37. torchcodec/_internally_replaced_utils.py +67 -0
  38. torchcodec/_samplers/__init__.py +7 -0
  39. torchcodec/_samplers/video_clip_sampler.py +430 -0
  40. torchcodec/decoders/__init__.py +11 -0
  41. torchcodec/decoders/_audio_decoder.py +177 -0
  42. torchcodec/decoders/_decoder_utils.py +52 -0
  43. torchcodec/decoders/_video_decoder.py +464 -0
  44. torchcodec/encoders/__init__.py +1 -0
  45. torchcodec/encoders/_audio_encoder.py +150 -0
  46. torchcodec/libtorchcodec_core4.dll +0 -0
  47. torchcodec/libtorchcodec_core5.dll +0 -0
  48. torchcodec/libtorchcodec_core6.dll +0 -0
  49. torchcodec/libtorchcodec_core7.dll +0 -0
  50. torchcodec/libtorchcodec_custom_ops4.dll +0 -0
  51. torchcodec/libtorchcodec_custom_ops5.dll +0 -0
  52. torchcodec/libtorchcodec_custom_ops6.dll +0 -0
  53. torchcodec/libtorchcodec_custom_ops7.dll +0 -0
  54. torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
  55. torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
  56. torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
  57. torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
  58. torchcodec/samplers/__init__.py +2 -0
  59. torchcodec/samplers/_common.py +84 -0
  60. torchcodec/samplers/_index_based.py +287 -0
  61. torchcodec/samplers/_time_based.py +350 -0
  62. torchcodec/version.py +2 -0
  63. torchcodec-0.7.0.dist-info/METADATA +242 -0
  64. torchcodec-0.7.0.dist-info/RECORD +67 -0
  65. torchcodec-0.7.0.dist-info/WHEEL +5 -0
  66. torchcodec-0.7.0.dist-info/licenses/LICENSE +28 -0
  67. torchcodec-0.7.0.dist-info/top_level.txt +2 -0
Binary file
Binary file
Binary file
@@ -0,0 +1,2 @@
1
+ from ._index_based import clips_at_random_indices, clips_at_regular_indices
2
+ from ._time_based import clips_at_random_timestamps, clips_at_regular_timestamps
@@ -0,0 +1,84 @@
1
+ from typing import Callable, Union
2
+
3
+ from torchcodec import FrameBatch
4
+
5
+ _LIST_OF_INT_OR_FLOAT = Union[list[int], list[float]]
6
+
7
+
8
+ def _repeat_last_policy(
9
+ values: _LIST_OF_INT_OR_FLOAT, desired_len: int
10
+ ) -> _LIST_OF_INT_OR_FLOAT:
11
+ # values = [1, 2, 3], desired_len = 5
12
+ # output = [1, 2, 3, 3, 3]
13
+ values += [values[-1]] * (desired_len - len(values))
14
+ return values
15
+
16
+
17
+ def _wrap_policy(
18
+ values: _LIST_OF_INT_OR_FLOAT, desired_len: int
19
+ ) -> _LIST_OF_INT_OR_FLOAT:
20
+ # values = [1, 2, 3], desired_len = 5
21
+ # output = [1, 2, 3, 1, 2]
22
+ return (values * (desired_len // len(values) + 1))[:desired_len]
23
+
24
+
25
+ def _error_policy(
26
+ frames_indices: _LIST_OF_INT_OR_FLOAT, desired_len: int
27
+ ) -> _LIST_OF_INT_OR_FLOAT:
28
+ raise ValueError(
29
+ "You set the 'error' policy, and the sampler tried to decode a frame "
30
+ "that is beyond the number of frames in the video. "
31
+ "Try to leave sampling_range_end to its default value?"
32
+ )
33
+
34
+
35
+ _POLICY_FUNCTION_TYPE = Callable[[_LIST_OF_INT_OR_FLOAT, int], _LIST_OF_INT_OR_FLOAT]
36
+
37
+ _POLICY_FUNCTIONS: dict[str, _POLICY_FUNCTION_TYPE] = {
38
+ "repeat_last": _repeat_last_policy,
39
+ "wrap": _wrap_policy,
40
+ "error": _error_policy,
41
+ }
42
+
43
+
44
+ def _validate_common_params(*, decoder, num_frames_per_clip, policy):
45
+ if len(decoder) < 1:
46
+ raise ValueError(
47
+ f"Decoder must have at least one frame, found {len(decoder)} frames."
48
+ )
49
+
50
+ if num_frames_per_clip <= 0:
51
+ raise ValueError(
52
+ f"num_frames_per_clip ({num_frames_per_clip}) must be strictly positive"
53
+ )
54
+ if policy not in _POLICY_FUNCTIONS.keys():
55
+ raise ValueError(
56
+ f"Invalid policy ({policy}). Supported values are {_POLICY_FUNCTIONS.keys()}."
57
+ )
58
+
59
+
60
+ def _reshape_4d_framebatch_into_5d(
61
+ *,
62
+ frames: FrameBatch,
63
+ num_clips: int,
64
+ num_frames_per_clip: int,
65
+ ) -> FrameBatch:
66
+ last_3_dims = frames.data.shape[-3:]
67
+ return FrameBatch(
68
+ data=frames.data.view(num_clips, num_frames_per_clip, *last_3_dims),
69
+ pts_seconds=frames.pts_seconds.view(num_clips, num_frames_per_clip),
70
+ duration_seconds=frames.duration_seconds.view(num_clips, num_frames_per_clip),
71
+ )
72
+
73
+
74
+ _FRAMEBATCH_RETURN_DOCS = """
75
+ Returns:
76
+ FrameBatch:
77
+ The sampled :term:`clips`, as a 5D :class:`~torchcodec.FrameBatch`.
78
+ The shape of the ``data`` field is (``num_clips``,
79
+ ``num_frames_per_clips``, ...) where ... is (H, W, C) or (C, H, W)
80
+ depending on the ``dimension_order`` parameter of
81
+ :class:`~torchcodec.decoders.VideoDecoder`. The shape of the
82
+ ``pts_seconds`` and ``duration_seconds`` fields is (``num_clips``,
83
+ ``num_frames_per_clips``).
84
+ """
@@ -0,0 +1,287 @@
1
+ from typing import Literal, Optional
2
+
3
+ import torch
4
+
5
+ from torchcodec import FrameBatch
6
+ from torchcodec.decoders import VideoDecoder
7
+ from torchcodec.samplers._common import (
8
+ _FRAMEBATCH_RETURN_DOCS,
9
+ _POLICY_FUNCTION_TYPE,
10
+ _POLICY_FUNCTIONS,
11
+ _reshape_4d_framebatch_into_5d,
12
+ _validate_common_params,
13
+ )
14
+
15
+
16
+ def _validate_params_index_based(*, num_clips, num_indices_between_frames):
17
+ if num_clips <= 0:
18
+ raise ValueError(f"num_clips ({num_clips}) must be > 0")
19
+
20
+ if num_indices_between_frames <= 0:
21
+ raise ValueError(
22
+ f"num_indices_between_frames ({num_indices_between_frames}) must be strictly positive"
23
+ )
24
+
25
+
26
+ def _validate_sampling_range_index_based(
27
+ *,
28
+ num_indices_between_frames,
29
+ num_frames_per_clip,
30
+ sampling_range_start,
31
+ sampling_range_end,
32
+ num_frames_in_video,
33
+ ):
34
+ if sampling_range_start < 0:
35
+ sampling_range_start = num_frames_in_video + sampling_range_start
36
+
37
+ if sampling_range_start >= num_frames_in_video:
38
+ raise ValueError(
39
+ f"sampling_range_start ({sampling_range_start}) must be smaller than "
40
+ f"the number of frames ({num_frames_in_video})."
41
+ )
42
+
43
+ clip_span = _get_clip_span(
44
+ num_indices_between_frames=num_indices_between_frames,
45
+ num_frames_per_clip=num_frames_per_clip,
46
+ )
47
+
48
+ if sampling_range_end is None:
49
+ sampling_range_end = max(num_frames_in_video - clip_span + 1, 1)
50
+ if sampling_range_start >= sampling_range_end:
51
+ raise ValueError(
52
+ f"We determined that sampling_range_end should be {sampling_range_end}, "
53
+ "but it is smaller than or equal to sampling_range_start "
54
+ f"({sampling_range_start})."
55
+ )
56
+ else:
57
+ if sampling_range_end < 0:
58
+ # Support negative values so that -1 means last frame.
59
+ sampling_range_end = num_frames_in_video + sampling_range_end
60
+ sampling_range_end = min(sampling_range_end, num_frames_in_video)
61
+ if sampling_range_start >= sampling_range_end:
62
+ raise ValueError(
63
+ f"sampling_range_start ({sampling_range_start}) must be smaller than "
64
+ f"sampling_range_end ({sampling_range_end})."
65
+ )
66
+
67
+ return sampling_range_start, sampling_range_end
68
+
69
+
70
+ def _get_clip_span(*, num_indices_between_frames, num_frames_per_clip):
71
+ """Return the span of a clip, i.e. the number of frames (or indices)
72
+ between the first and last frame in the clip, both included.
73
+
74
+ This isn't the same as the number of frames in a clip!
75
+ Example: f means a frame in the clip, x means a frame excluded from the clip
76
+ num_frames_per_clip = 4
77
+ num_indices_between_frames = 1, clip = ffff , span = 4
78
+ num_indices_between_frames = 2, clip = fxfxfxf , span = 7
79
+ num_indices_between_frames = 3, clip = fxxfxxfxxf, span = 10
80
+ """
81
+ return num_indices_between_frames * (num_frames_per_clip - 1) + 1
82
+
83
+
84
+ def _build_all_clips_indices(
85
+ *,
86
+ clip_start_indices: torch.Tensor, # 1D int tensor
87
+ num_frames_per_clip: int,
88
+ num_indices_between_frames: int,
89
+ num_frames_in_video: int,
90
+ policy_fun: _POLICY_FUNCTION_TYPE,
91
+ ) -> list[int]:
92
+ # From the clip_start_indices [f_00, f_10, f_20, ...]
93
+ # and from the rest of the parameters, return the list of all the frame
94
+ # indices that make up all the clips.
95
+ # I.e. the output is [f_00, f_01, f_02, f_03, f_10, f_11, f_12, f_13, ...]
96
+ # where f_01 is the index of frame 1 in clip 0.
97
+ #
98
+ # All clips in the output are of length num_frames_per_clip (=4 in example
99
+ # above). When the frame indices go beyond num_frames_in_video, we force the
100
+ # frame indices back to valid values by applying the user's policy (wrap,
101
+ # repeat, etc.).
102
+ all_clips_indices: list[int] = []
103
+
104
+ clip_span = _get_clip_span(
105
+ num_indices_between_frames=num_indices_between_frames,
106
+ num_frames_per_clip=num_frames_per_clip,
107
+ )
108
+
109
+ for start_index in clip_start_indices:
110
+ frame_index_upper_bound = min(start_index + clip_span, num_frames_in_video)
111
+ frame_indices = list(
112
+ range(start_index, frame_index_upper_bound, num_indices_between_frames)
113
+ )
114
+ if len(frame_indices) < num_frames_per_clip:
115
+ frame_indices = policy_fun(frame_indices, num_frames_per_clip) # type: ignore[assignment]
116
+ all_clips_indices += frame_indices
117
+ return all_clips_indices
118
+
119
+
120
+ def _generic_index_based_sampler(
121
+ kind: Literal["random", "regular"],
122
+ decoder: VideoDecoder,
123
+ *,
124
+ num_clips: int,
125
+ num_frames_per_clip: int,
126
+ num_indices_between_frames: int,
127
+ sampling_range_start: int,
128
+ sampling_range_end: Optional[int], # interval is [start, end).
129
+ # Important note: sampling_range_end defines the upper bound of where a clip
130
+ # can *start*, not where a clip can end.
131
+ policy: Literal["repeat_last", "wrap", "error"],
132
+ ) -> FrameBatch:
133
+
134
+ _validate_common_params(
135
+ decoder=decoder,
136
+ num_frames_per_clip=num_frames_per_clip,
137
+ policy=policy,
138
+ )
139
+ _validate_params_index_based(
140
+ num_clips=num_clips,
141
+ num_indices_between_frames=num_indices_between_frames,
142
+ )
143
+
144
+ sampling_range_start, sampling_range_end = _validate_sampling_range_index_based(
145
+ num_frames_per_clip=num_frames_per_clip,
146
+ num_indices_between_frames=num_indices_between_frames,
147
+ sampling_range_start=sampling_range_start,
148
+ sampling_range_end=sampling_range_end,
149
+ num_frames_in_video=len(decoder),
150
+ )
151
+
152
+ if kind == "random":
153
+ clip_start_indices = torch.randint(
154
+ low=sampling_range_start, high=sampling_range_end, size=(num_clips,)
155
+ )
156
+ else:
157
+ # Note [num clips larger than sampling range]
158
+ # If we ask for more clips than there are frames in the sampling range or
159
+ # in the video, we rely on torch.linspace behavior which will return
160
+ # duplicated indices.
161
+ # E.g. torch.linspace(0, 10, steps=20, dtype=torch.int) returns
162
+ # 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10
163
+ # Alternatively we could wrap around, but the current behavior is closer to
164
+ # the expected "equally spaced indices" sampling.
165
+ clip_start_indices = torch.linspace(
166
+ sampling_range_start,
167
+ sampling_range_end - 1,
168
+ steps=num_clips,
169
+ dtype=torch.int,
170
+ )
171
+
172
+ all_clips_indices = _build_all_clips_indices(
173
+ clip_start_indices=clip_start_indices,
174
+ num_frames_per_clip=num_frames_per_clip,
175
+ num_indices_between_frames=num_indices_between_frames,
176
+ num_frames_in_video=len(decoder),
177
+ policy_fun=_POLICY_FUNCTIONS[policy],
178
+ )
179
+
180
+ frames = decoder.get_frames_at(indices=all_clips_indices)
181
+ return _reshape_4d_framebatch_into_5d(
182
+ frames=frames,
183
+ num_clips=num_clips,
184
+ num_frames_per_clip=num_frames_per_clip,
185
+ )
186
+
187
+
188
+ def clips_at_random_indices(
189
+ decoder: VideoDecoder,
190
+ *,
191
+ num_clips: int = 1,
192
+ num_frames_per_clip: int = 1,
193
+ num_indices_between_frames: int = 1,
194
+ sampling_range_start: int = 0,
195
+ sampling_range_end: Optional[int] = None, # interval is [start, end).
196
+ policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
197
+ ) -> FrameBatch:
198
+ # See docstring below
199
+ torch._C._log_api_usage_once("torchcodec.samplers.clips_at_random_indices")
200
+ return _generic_index_based_sampler(
201
+ kind="random",
202
+ decoder=decoder,
203
+ num_clips=num_clips,
204
+ num_frames_per_clip=num_frames_per_clip,
205
+ num_indices_between_frames=num_indices_between_frames,
206
+ sampling_range_start=sampling_range_start,
207
+ sampling_range_end=sampling_range_end,
208
+ policy=policy,
209
+ )
210
+
211
+
212
+ def clips_at_regular_indices(
213
+ decoder: VideoDecoder,
214
+ *,
215
+ num_clips: int = 1,
216
+ num_frames_per_clip: int = 1,
217
+ num_indices_between_frames: int = 1,
218
+ sampling_range_start: int = 0,
219
+ sampling_range_end: Optional[int] = None, # interval is [start, end).
220
+ policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
221
+ ) -> FrameBatch:
222
+ # See docstring below
223
+ torch._C._log_api_usage_once("torchcodec.samplers.clips_at_regular_indices")
224
+ return _generic_index_based_sampler(
225
+ kind="regular",
226
+ decoder=decoder,
227
+ num_clips=num_clips,
228
+ num_frames_per_clip=num_frames_per_clip,
229
+ num_indices_between_frames=num_indices_between_frames,
230
+ sampling_range_start=sampling_range_start,
231
+ sampling_range_end=sampling_range_end,
232
+ policy=policy,
233
+ )
234
+
235
+
236
+ _COMMON_DOCS = f"""
237
+ Args:
238
+ decoder (VideoDecoder): The :class:`~torchcodec.decoders.VideoDecoder`
239
+ instance to sample clips from.
240
+ num_clips (int, optional): The number of clips to return. Default: 1.
241
+ num_frames_per_clip (int, optional): The number of frames per clips. Default: 1.
242
+ num_indices_between_frames(int, optional): The number of indices between
243
+ the frames *within* a clip. Default: 1, which means frames are
244
+ consecutive. This is sometimes refered-to as "dilation".
245
+ sampling_range_start (int, optional): The start of the sampling range,
246
+ which defines the first index that a clip may *start* at. Default:
247
+ 0, i.e. the start of the video.
248
+ sampling_range_end (int or None, optional): The end of the sampling
249
+ range, which defines the last index that a clip may *start* at. This
250
+ value is exclusive, i.e. a clip may only start within
251
+ [``sampling_range_start``, ``sampling_range_end``). If None
252
+ (default), the value is set automatically such that the clips never
253
+ span beyond the end of the video. For example if the last valid
254
+ index in a video is 99 and the clips span 10 frames, this value is
255
+ set to 99 - 10 + 1 = 90. Negative values are accepted and are
256
+ equivalent to ``len(video) - val``. When a clip spans beyond the end
257
+ of the video, the ``policy`` parameter defines how to construct such
258
+ clip.
259
+ policy (str, optional): Defines how to construct clips that span beyond
260
+ the end of the video. This is best described with an example:
261
+ assuming the last valid index in a video is 99, and a clip was
262
+ sampled to start at index 95, with ``num_frames_per_clip=5`` and
263
+ ``num_indices_between_frames=2``, the indices of the frames in the
264
+ clip are supposed to be [95, 97, 99, 101, 103]. But 101 and 103 are
265
+ invalid indices, so the ``policy`` parameter defines how to replace
266
+ those frames, with valid indices:
267
+
268
+ - "repeat_last": repeats the last valid frame of the clip. We would
269
+ get [95, 97, 99, 99, 99].
270
+ - "wrap": wraps around to the beginning of the clip. We would get
271
+ [95, 97, 99, 95, 97].
272
+ - "error": raises an error.
273
+
274
+ Default is "repeat_last". Note that when ``sampling_range_end=None``
275
+ (default), this policy parameter is unlikely to be relevant.
276
+
277
+ {_FRAMEBATCH_RETURN_DOCS}
278
+ """
279
+
280
+ clips_at_random_indices.__doc__ = f"""Sample :term:`clips` at random indices.
281
+ {_COMMON_DOCS}
282
+ """
283
+
284
+
285
+ clips_at_regular_indices.__doc__ = f"""Sample :term:`clips` at regular (equally-spaced) indices.
286
+ {_COMMON_DOCS}
287
+ """