torchcodec 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. torchcodec/__init__.py +27 -0
  2. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  3. torchcodec/_core/AVIOContextHolder.h +64 -0
  4. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  5. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  6. torchcodec/_core/AVIOTensorContext.cpp +130 -0
  7. torchcodec/_core/AVIOTensorContext.h +44 -0
  8. torchcodec/_core/BetaCudaDeviceInterface.cpp +849 -0
  9. torchcodec/_core/BetaCudaDeviceInterface.h +196 -0
  10. torchcodec/_core/CMakeLists.txt +295 -0
  11. torchcodec/_core/CUDACommon.cpp +330 -0
  12. torchcodec/_core/CUDACommon.h +51 -0
  13. torchcodec/_core/Cache.h +124 -0
  14. torchcodec/_core/CpuDeviceInterface.cpp +509 -0
  15. torchcodec/_core/CpuDeviceInterface.h +141 -0
  16. torchcodec/_core/CudaDeviceInterface.cpp +602 -0
  17. torchcodec/_core/CudaDeviceInterface.h +79 -0
  18. torchcodec/_core/DeviceInterface.cpp +117 -0
  19. torchcodec/_core/DeviceInterface.h +191 -0
  20. torchcodec/_core/Encoder.cpp +1054 -0
  21. torchcodec/_core/Encoder.h +192 -0
  22. torchcodec/_core/FFMPEGCommon.cpp +684 -0
  23. torchcodec/_core/FFMPEGCommon.h +314 -0
  24. torchcodec/_core/FilterGraph.cpp +159 -0
  25. torchcodec/_core/FilterGraph.h +59 -0
  26. torchcodec/_core/Frame.cpp +47 -0
  27. torchcodec/_core/Frame.h +72 -0
  28. torchcodec/_core/Metadata.cpp +124 -0
  29. torchcodec/_core/Metadata.h +92 -0
  30. torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
  31. torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
  32. torchcodec/_core/NVDECCache.cpp +60 -0
  33. torchcodec/_core/NVDECCache.h +102 -0
  34. torchcodec/_core/SingleStreamDecoder.cpp +1586 -0
  35. torchcodec/_core/SingleStreamDecoder.h +391 -0
  36. torchcodec/_core/StreamOptions.h +70 -0
  37. torchcodec/_core/Transform.cpp +128 -0
  38. torchcodec/_core/Transform.h +86 -0
  39. torchcodec/_core/ValidationUtils.cpp +35 -0
  40. torchcodec/_core/ValidationUtils.h +21 -0
  41. torchcodec/_core/__init__.py +46 -0
  42. torchcodec/_core/_metadata.py +262 -0
  43. torchcodec/_core/custom_ops.cpp +1090 -0
  44. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +169 -0
  45. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  46. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  47. torchcodec/_core/ops.py +605 -0
  48. torchcodec/_core/pybind_ops.cpp +50 -0
  49. torchcodec/_frame.py +146 -0
  50. torchcodec/_internally_replaced_utils.py +68 -0
  51. torchcodec/_samplers/__init__.py +7 -0
  52. torchcodec/_samplers/video_clip_sampler.py +419 -0
  53. torchcodec/decoders/__init__.py +12 -0
  54. torchcodec/decoders/_audio_decoder.py +185 -0
  55. torchcodec/decoders/_decoder_utils.py +113 -0
  56. torchcodec/decoders/_video_decoder.py +601 -0
  57. torchcodec/encoders/__init__.py +2 -0
  58. torchcodec/encoders/_audio_encoder.py +149 -0
  59. torchcodec/encoders/_video_encoder.py +196 -0
  60. torchcodec/libtorchcodec_core4.so +0 -0
  61. torchcodec/libtorchcodec_core5.so +0 -0
  62. torchcodec/libtorchcodec_core6.so +0 -0
  63. torchcodec/libtorchcodec_core7.so +0 -0
  64. torchcodec/libtorchcodec_core8.so +0 -0
  65. torchcodec/libtorchcodec_custom_ops4.so +0 -0
  66. torchcodec/libtorchcodec_custom_ops5.so +0 -0
  67. torchcodec/libtorchcodec_custom_ops6.so +0 -0
  68. torchcodec/libtorchcodec_custom_ops7.so +0 -0
  69. torchcodec/libtorchcodec_custom_ops8.so +0 -0
  70. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  71. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  72. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  73. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  74. torchcodec/libtorchcodec_pybind_ops8.so +0 -0
  75. torchcodec/samplers/__init__.py +2 -0
  76. torchcodec/samplers/_common.py +84 -0
  77. torchcodec/samplers/_index_based.py +287 -0
  78. torchcodec/samplers/_time_based.py +358 -0
  79. torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake +76 -0
  80. torchcodec/share/cmake/TorchCodec/ffmpeg_versions.cmake +122 -0
  81. torchcodec/transforms/__init__.py +12 -0
  82. torchcodec/transforms/_decoder_transforms.py +375 -0
  83. torchcodec/version.py +2 -0
  84. torchcodec-0.10.0.dist-info/METADATA +286 -0
  85. torchcodec-0.10.0.dist-info/RECORD +88 -0
  86. torchcodec-0.10.0.dist-info/WHEEL +5 -0
  87. torchcodec-0.10.0.dist-info/licenses/LICENSE +28 -0
  88. torchcodec-0.10.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,76 @@
1
+ # FindTorchCodec
2
+ # --------------
3
+ #
4
+ # Finds the TorchCodec library
5
+ #
6
+ # This will define the following variables:
7
+ #
8
+ # TORCHCODEC_FOUND: True if the system has the TorchCodec library
9
+ # TORCHCODEC_VARIANTS: list of TorchCodec variants. A variant is a supported
10
+ # FFmpeg major version.
11
+ #
12
+ # and the following imported targets:
13
+ #
14
+ # torchcodec::ffmpeg${N}
15
+ # torchcodec::core${N}
16
+ #
17
+ # where N is a TorchCodec variant (FFmpeg major version) from
18
+ # TORCHCODEC_VARIANTS list.
19
+
20
+ include(FindPackageHandleStandardArgs)
21
+ include("${CMAKE_CURRENT_LIST_DIR}/ffmpeg_versions.cmake")
22
+
23
+ # Assume we are in <install-prefix>/share/cmake/TorchCodec/TorchCodecConfig.cmake
24
+ get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
25
+ get_filename_component(TORCHCODEC_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)
26
+
27
+ # Include directories.
28
+ set(TORCHCODEC_INCLUDE_DIRS ${TORCHCODEC_INSTALL_PREFIX}/_core)
29
+ set(TORCHCODEC_VARIANTS "")
30
+
31
+ function(add_torchcodec_target ffmpeg_major_version)
32
+ set(target torchcodec::core${ffmpeg_major_version})
33
+
34
+ if (NOT TARGET torchcodec::ffmpeg${ffmpeg_major_version})
35
+ message(FATAL_ERROR "torchcodec::ffmpeg${ffmpeg_major_version} target is not defined")
36
+ endif()
37
+
38
+ find_library(lib_path torchcodec_core${ffmpeg_major_version}
39
+ PATHS "${TORCHCODEC_INSTALL_PREFIX}" NO_CACHE NO_DEFAULT_PATH)
40
+ if (NOT lib_path)
41
+ message(FATAL_ERROR "torchcodec_core${ffmpeg_major_version} shared library is missing")
42
+ endif()
43
+
44
+ message("Adding ${target} target")
45
+ add_library(${target} SHARED IMPORTED)
46
+ add_dependencies(${target} torchcodec::ffmpeg${ffmpeg_major_version})
47
+ set_target_properties(${target} PROPERTIES
48
+ INTERFACE_INCLUDE_DIRECTORIES ${TORCHCODEC_INCLUDE_DIRS}
49
+ IMPORTED_LOCATION ${lib_path}
50
+ )
51
+
52
+ list(APPEND TORCHCODEC_VARIANTS "${ffmpeg_major_version}")
53
+ set(TORCHCODEC_VARIANTS "${TORCHCODEC_VARIANTS}" PARENT_SCOPE)
54
+ endfunction()
55
+
56
+ # If any of the TORCHCODEC_FFMPEG${N}_INSTALL_PREFIX environment variables
57
+ # are defined, use them to locate the corresponding FFmpeg and TorchCodec targets.
58
+ # Otherwise, fall back to pkg-config to find FFmpeg.
59
+ set(use_pkg_config TRUE)
60
+ foreach(ffmpeg_major_version IN LISTS TORCHCODEC_SUPPORTED_FFMPEG_VERSIONS)
61
+ if (DEFINED ENV{TORCHCODEC_FFMPEG${ffmpeg_major_version}_INSTALL_PREFIX})
62
+ add_ffmpeg_target(
63
+ "${ffmpeg_major_version}"
64
+ "$ENV{TORCHCODEC_FFMPEG${ffmpeg_major_version}_INSTALL_PREFIX}"
65
+ )
66
+ add_torchcodec_target(${ffmpeg_major_version})
67
+ set(use_pkg_config FALSE)
68
+ endif()
69
+ endforeach()
70
+
71
+ if (use_pkg_config)
72
+ add_ffmpeg_target_with_pkg_config(ffmpeg_major_version)
73
+ add_torchcodec_target(${ffmpeg_major_version})
74
+ endif()
75
+
76
+ find_package_handle_standard_args(TorchCodec DEFAULT_MSG TORCHCODEC_VARIANTS)
@@ -0,0 +1,122 @@
1
+ # This file exposes helpers to create and expose FFmpeg targets as torchcodec::ffmpeg${N}
2
+ # where N is the FFmpeg major version.
3
+
4
+ # List of FFmpeg versions that TorchCodec can support - that's not a list of
5
+ # FFmpeg versions available on the current system!
6
+ set(TORCHCODEC_SUPPORTED_FFMPEG_VERSIONS "4;5;6;7;8")
7
+
8
+ # Create and expose torchcodec::ffmpeg${ffmpeg_major_version} target which can
9
+ # then be used as a dependency in other targets.
10
+ # prefix is the path to the FFmpeg installation containing the usual `include`
11
+ # and `lib` directories.
12
+ function(add_ffmpeg_target ffmpeg_major_version prefix)
13
+ # Check that given ffmpeg major version is something we support and error out if
14
+ # it's not.
15
+ list(FIND TORCHCODEC_SUPPORTED_FFMPEG_VERSIONS "${ffmpeg_major_version}" _index)
16
+ if (_index LESS 0)
17
+ message(FATAL_ERROR "FFmpeg version ${ffmpeg_major_version} is not supported")
18
+ endif()
19
+ if (NOT DEFINED prefix)
20
+ message(FATAL_ERROR "No prefix defined calling add_ffmpeg_target()")
21
+ endif()
22
+
23
+ # Define library names based on platform and FFmpeg version
24
+ if (LINUX)
25
+ if (ffmpeg_major_version EQUAL 4)
26
+ set(library_file_names libavutil.so.56 libavcodec.so.58 libavformat.so.58 libavdevice.so.58 libavfilter.so.7 libswscale.so.5 libswresample.so.3)
27
+ elseif (ffmpeg_major_version EQUAL 5)
28
+ set(library_file_names libavutil.so.57 libavcodec.so.59 libavformat.so.59 libavdevice.so.59 libavfilter.so.8 libswscale.so.6 libswresample.so.4)
29
+ elseif (ffmpeg_major_version EQUAL 6)
30
+ set(library_file_names libavutil.so.58 libavcodec.so.60 libavformat.so.60 libavdevice.so.60 libavfilter.so.9 libswscale.so.7 libswresample.so.4)
31
+ elseif (ffmpeg_major_version EQUAL 7)
32
+ set(library_file_names libavutil.so.59 libavcodec.so.61 libavformat.so.61 libavdevice.so.61 libavfilter.so.10 libswscale.so.8 libswresample.so.5)
33
+ elseif (ffmpeg_major_version EQUAL 8)
34
+ set(library_file_names libavutil.so.60 libavcodec.so.62 libavformat.so.62 libavdevice.so.62 libavfilter.so.11 libswscale.so.9 libswresample.so.6)
35
+ endif()
36
+ elseif (APPLE)
37
+ if (ffmpeg_major_version EQUAL 4)
38
+ set(library_file_names libavutil.56.dylib libavcodec.58.dylib libavformat.58.dylib libavdevice.58.dylib libavfilter.7.dylib libswscale.5.dylib libswresample.3.dylib)
39
+ elseif (ffmpeg_major_version EQUAL 5)
40
+ set(library_file_names libavutil.57.dylib libavcodec.59.dylib libavformat.59.dylib libavdevice.59.dylib libavfilter.8.dylib libswscale.6.dylib libswresample.4.dylib)
41
+ elseif (ffmpeg_major_version EQUAL 6)
42
+ set(library_file_names libavutil.58.dylib libavcodec.60.dylib libavformat.60.dylib libavdevice.60.dylib libavfilter.9.dylib libswscale.7.dylib libswresample.4.dylib)
43
+ elseif (ffmpeg_major_version EQUAL 7)
44
+ set(library_file_names libavutil.59.dylib libavcodec.61.dylib libavformat.61.dylib libavdevice.61.dylib libavfilter.10.dylib libswscale.8.dylib libswresample.5.dylib)
45
+ elseif (ffmpeg_major_version EQUAL 8)
46
+ set(library_file_names libavutil.60.dylib libavcodec.62.dylib libavformat.62.dylib libavdevice.62.dylib libavfilter.11.dylib libswscale.9.dylib libswresample.6.dylib)
47
+ endif()
48
+ elseif (WIN32)
49
+ set(library_file_names avutil.lib avcodec.lib avformat.lib avdevice.lib avfilter.lib swscale.lib swresample.lib)
50
+ else()
51
+ message(FATAL_ERROR "Unsupported operating system: ${CMAKE_SYSTEM_NAME}")
52
+ endif()
53
+
54
+ set(target "torchcodec::ffmpeg${ffmpeg_major_version}")
55
+ set(include_dir "${prefix}/include")
56
+ if (LINUX OR APPLE)
57
+ set(lib_dir "${prefix}/lib")
58
+ elseif (WIN32)
59
+ set(lib_dir "${prefix}/bin")
60
+ else()
61
+ message(FATAL_ERROR "Unsupported operating system: ${CMAKE_SYSTEM_NAME}")
62
+ endif()
63
+
64
+ list(
65
+ TRANSFORM library_file_names
66
+ PREPEND ${lib_dir}/
67
+ OUTPUT_VARIABLE lib_paths
68
+ )
69
+
70
+ message("Adding ${target} target")
71
+ # Verify that ffmpeg includes and libraries actually exist.
72
+ foreach (path IN LISTS include_dir lib_paths)
73
+ if (NOT EXISTS "${path}")
74
+ message(FATAL_ERROR "${path} does not exist")
75
+ endif()
76
+ endforeach()
77
+
78
+ # Actually define the target
79
+ add_library(${target} INTERFACE IMPORTED)
80
+ target_include_directories(${target} INTERFACE ${include_dir})
81
+ target_link_libraries(${target} INTERFACE ${lib_paths})
82
+ endfunction()
83
+
84
+ # Create and expose torchcodec::ffmpeg${ffmpeg_major_version} target which can
85
+ # then be used as a dependency in other targets.
86
+ # The FFmpeg installation is found by pkg-config.
87
+ function(add_ffmpeg_target_with_pkg_config ret_ffmpeg_major_version_var)
88
+ find_package(PkgConfig REQUIRED)
89
+ pkg_check_modules(TORCHCODEC_LIBAV REQUIRED IMPORTED_TARGET
90
+ libavdevice
91
+ libavfilter
92
+ libavformat
93
+ libavcodec
94
+ libavutil
95
+ libswresample
96
+ libswscale
97
+ )
98
+
99
+ # Split libavcodec's version string by '.' and convert it to a list
100
+ # The TORCHCODEC_LIBAV_libavcodec_VERSION is made available by pkg-config.
101
+ string(REPLACE "." ";" libavcodec_version_list ${TORCHCODEC_LIBAV_libavcodec_VERSION})
102
+ # Get the first element of the list, which is the major version
103
+ list(GET libavcodec_version_list 0 libavcodec_major_version)
104
+
105
+ if (${libavcodec_major_version} STREQUAL "58")
106
+ set(ffmpeg_major_version "4")
107
+ elseif (${libavcodec_major_version} STREQUAL "59")
108
+ set(ffmpeg_major_version "5")
109
+ elseif (${libavcodec_major_version} STREQUAL "60")
110
+ set(ffmpeg_major_version "6")
111
+ elseif (${libavcodec_major_version} STREQUAL "61")
112
+ set(ffmpeg_major_version "7")
113
+ elseif (${libavcodec_major_version} STREQUAL "62")
114
+ set(ffmpeg_major_version "8")
115
+ else()
116
+ message(FATAL_ERROR "Unsupported libavcodec version: ${libavcodec_major_version}")
117
+ endif()
118
+
119
+ message("Adding torchcodec::ffmpeg${ffmpeg_major_version} target")
120
+ add_library(torchcodec::ffmpeg${ffmpeg_major_version} ALIAS PkgConfig::TORCHCODEC_LIBAV)
121
+ set(${ret_ffmpeg_major_version_var} ${ffmpeg_major_version} PARENT_SCOPE)
122
+ endfunction()
@@ -0,0 +1,12 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from ._decoder_transforms import ( # noqa
8
+ CenterCrop,
9
+ DecoderTransform,
10
+ RandomCrop,
11
+ Resize,
12
+ )
@@ -0,0 +1,375 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+
8
+ from abc import ABC, abstractmethod
9
+ from collections.abc import Sequence
10
+ from types import ModuleType
11
+
12
+ import torch
13
+ from torch import nn
14
+
15
+
16
+ class DecoderTransform(ABC):
17
+ """Base class for all decoder transforms.
18
+
19
+ A *decoder transform* is a transform that is applied by the decoder before
20
+ returning the decoded frame. Applying decoder transforms to frames
21
+ should be both faster and more memory efficient than receiving normally
22
+ decoded frames and applying the same kind of transform.
23
+
24
+ Most ``DecoderTransform`` objects have a complementary transform in TorchVision,
25
+ specificially in `torchvision.transforms.v2 <https://docs.pytorch.org/vision/stable/transforms.html>`_.
26
+ For such transforms, we ensure that:
27
+
28
+ 1. The names are the same.
29
+ 2. Default behaviors are the same.
30
+ 3. The parameters for the ``DecoderTransform`` object are a subset of the
31
+ TorchVision :class:`~torchvision.transforms.v2.Transform` object.
32
+ 4. Parameters with the same name control the same behavior and accept a
33
+ subset of the same types.
34
+ 5. The difference between the frames returned by a decoder transform and
35
+ the complementary TorchVision transform are such that a model should
36
+ not be able to tell the difference.
37
+ """
38
+
39
+ @abstractmethod
40
+ def _make_transform_spec(self, input_dims: tuple[int | None, int | None]) -> str:
41
+ """Makes the transform spec that is used by the `VideoDecoder`.
42
+
43
+ Args:
44
+ input_dims (tuple[int | None, int | None]): The dimensions of
45
+ the input frame in the form (height, width). We cannot know the
46
+ dimensions at object construction time because it's dependent on
47
+ the video being decoded and upstream transforms in the same
48
+ transform pipeline. Not all transforms need to know this; those
49
+ that don't will ignore it. The individual values in the tuple are
50
+ optional because the original values come from file metadata which
51
+ may be missing. We maintain the optionality throughout the APIs so
52
+ that we can decide as late as possible that it's necessary for the
53
+ values to exist. That is, if the values are missing from the
54
+ metadata and we have transforms which ignore the input dimensions,
55
+ we want that to still work.
56
+
57
+ Note: This method is the moral equivalent of TorchVision's
58
+ `Transform.make_params()`.
59
+
60
+ Returns:
61
+ str: A string which contains the spec for the transform that the
62
+ `VideoDecoder` knows what to do with.
63
+ """
64
+ pass
65
+
66
+ def _get_output_dims(self) -> tuple[int | None, int | None] | None:
67
+ """Get the dimensions of the output frame.
68
+
69
+ Transforms that change the frame dimensions need to override this
70
+ method. Transforms that don't change the frame dimensions can rely on
71
+ this default implementation.
72
+
73
+ Returns:
74
+ tuple[int | None, int | None] | None: The output dimensions.
75
+ - None: The output dimensions are the same as the input dimensions.
76
+ - (int, int): The (height, width) of the output frame.
77
+ """
78
+ return None
79
+
80
+
81
+ def import_torchvision_transforms_v2() -> ModuleType:
82
+ try:
83
+ from torchvision.transforms import v2
84
+ except ImportError as e:
85
+ raise RuntimeError(
86
+ "Cannot import TorchVision; this should never happen, please report a bug."
87
+ ) from e
88
+ return v2
89
+
90
+
91
+ class Resize(DecoderTransform):
92
+ """Resize the decoded frame to a given size.
93
+
94
+ Complementary TorchVision transform: :class:`~torchvision.transforms.v2.Resize`.
95
+ Interpolation is always bilinear. Anti-aliasing is always on.
96
+
97
+ Args:
98
+ size (Sequence[int]): Desired output size. Must be a sequence of
99
+ the form (height, width).
100
+ """
101
+
102
+ def __init__(self, size: Sequence[int]):
103
+ if len(size) != 2:
104
+ raise ValueError(
105
+ "Resize transform must have a (height, width) "
106
+ f"pair for the size, got {size}."
107
+ )
108
+ self.size = size
109
+
110
+ def _make_transform_spec(self, input_dims: tuple[int | None, int | None]) -> str:
111
+ return f"resize, {self.size[0]}, {self.size[1]}"
112
+
113
+ def _get_output_dims(self) -> tuple[int | None, int | None] | None:
114
+ return (self.size[0], self.size[1])
115
+
116
+ @classmethod
117
+ def _from_torchvision(cls, tv_resize: nn.Module):
118
+ v2 = import_torchvision_transforms_v2()
119
+
120
+ assert isinstance(tv_resize, v2.Resize)
121
+
122
+ if tv_resize.interpolation is not v2.InterpolationMode.BILINEAR:
123
+ raise ValueError(
124
+ "TorchVision Resize transform must use bilinear interpolation."
125
+ )
126
+ if tv_resize.antialias is False:
127
+ raise ValueError(
128
+ "TorchVision Resize transform must have antialias enabled."
129
+ )
130
+ if tv_resize.size is None:
131
+ raise ValueError("TorchVision Resize transform must have a size specified.")
132
+ if len(tv_resize.size) != 2:
133
+ raise ValueError(
134
+ "TorchVision Resize transform must have a (height, width) "
135
+ f"pair for the size, got {tv_resize.size}."
136
+ )
137
+ return cls(size=tv_resize.size)
138
+
139
+
140
+ class CenterCrop(DecoderTransform):
141
+ """Crop the decoded frame to a given size in the center of the frame.
142
+
143
+ Complementary TorchVision transform: :class:`~torchvision.transforms.v2.CenterCrop`.
144
+
145
+ Args:
146
+ size (Sequence[int]): Desired output size. Must be a sequence of
147
+ the form (height, width).
148
+ """
149
+
150
+ def __init__(self, size: Sequence[int]):
151
+ if len(size) != 2:
152
+ raise ValueError(
153
+ "CenterCrop transform must have a (height, width) "
154
+ f"pair for the size, got {size}."
155
+ )
156
+ self.size = size
157
+
158
+ def _make_transform_spec(self, input_dims: tuple[int | None, int | None]) -> str:
159
+ return f"center_crop, {self.size[0]}, {self.size[1]}"
160
+
161
+ def _get_output_dims(self) -> tuple[int | None, int | None] | None:
162
+ return (self.size[0], self.size[1])
163
+
164
+ @classmethod
165
+ def _from_torchvision(
166
+ cls,
167
+ tv_center_crop: nn.Module,
168
+ ):
169
+ v2 = import_torchvision_transforms_v2()
170
+
171
+ if not isinstance(tv_center_crop, v2.CenterCrop):
172
+ raise ValueError(
173
+ "Transform must be TorchVision's CenterCrop, "
174
+ f"it is instead {type(tv_center_crop).__name__}. "
175
+ "This should never happen, please report a bug."
176
+ )
177
+
178
+ if len(tv_center_crop.size) != 2:
179
+ raise ValueError(
180
+ "TorchVision CenterCrop transform must have a (height, width) "
181
+ f"pair for the size, got {tv_center_crop.size}."
182
+ )
183
+
184
+ return cls(size=tv_center_crop.size)
185
+
186
+
187
+ class RandomCrop(DecoderTransform):
188
+ """Crop the decoded frame to a given size at a random location in the frame.
189
+
190
+ Complementary TorchVision transform: :class:`~torchvision.transforms.v2.RandomCrop`.
191
+ Padding of all kinds is disabled. The random location within the frame is
192
+ determined during the initialization of the
193
+ :class:`~torchcodec.decoders.VideoDecoder` object that owns this transform.
194
+ As a consequence, each decoded frame in the video will be cropped at the
195
+ same location. Videos with variable resolution may result in undefined
196
+ behavior.
197
+
198
+ Args:
199
+ size (Sequence[int]): Desired output size. Must be a sequence of
200
+ the form (height, width).
201
+ """
202
+
203
+ def __init__(self, size: Sequence[int]):
204
+ if len(size) != 2:
205
+ raise ValueError(
206
+ "RandomCrop transform must have a (height, width) "
207
+ f"pair for the size, got {size}."
208
+ )
209
+ self.size = size
210
+
211
+ def _make_transform_spec(self, input_dims: tuple[int | None, int | None]) -> str:
212
+ height, width = input_dims
213
+ if height is None:
214
+ raise ValueError(
215
+ "Video metadata has no height. "
216
+ "RandomCrop can only be used when input frame dimensions are known."
217
+ )
218
+ if width is None:
219
+ raise ValueError(
220
+ "Video metadata has no width. "
221
+ "RandomCrop can only be used when input frame dimensions are known."
222
+ )
223
+
224
+ # Note: This logic below must match the logic in
225
+ # torchvision.transforms.v2.RandomCrop.make_params(). Given
226
+ # the same seed, they should get the same result. This is an
227
+ # API guarantee with our users.
228
+ if height < self.size[0] or width < self.size[1]:
229
+ raise ValueError(
230
+ f"Input dimensions {input_dims} are smaller than the crop size {self.size}."
231
+ )
232
+
233
+ top = int(torch.randint(0, height - self.size[0] + 1, size=()).item())
234
+ left = int(torch.randint(0, width - self.size[1] + 1, size=()).item())
235
+
236
+ return f"crop, {self.size[0]}, {self.size[1]}, {left}, {top}"
237
+
238
+ def _get_output_dims(self) -> tuple[int | None, int | None] | None:
239
+ return (self.size[0], self.size[1])
240
+
241
+ @classmethod
242
+ def _from_torchvision(
243
+ cls,
244
+ tv_random_crop: nn.Module,
245
+ ):
246
+ v2 = import_torchvision_transforms_v2()
247
+
248
+ if not isinstance(tv_random_crop, v2.RandomCrop):
249
+ raise ValueError(
250
+ "Transform must be TorchVision's RandomCrop, "
251
+ f"it is instead {type(tv_random_crop).__name__}. "
252
+ "This should never happen, please report a bug."
253
+ )
254
+
255
+ if tv_random_crop.padding is not None:
256
+ raise ValueError(
257
+ "TorchVision RandomCrop transform must not specify padding."
258
+ )
259
+
260
+ if tv_random_crop.pad_if_needed is True:
261
+ raise ValueError(
262
+ "TorchVision RandomCrop transform must not specify pad_if_needed."
263
+ )
264
+
265
+ if tv_random_crop.fill != 0:
266
+ raise ValueError("TorchVision RandomCrop fill must be 0.")
267
+
268
+ if tv_random_crop.padding_mode != "constant":
269
+ raise ValueError("TorchVision RandomCrop padding_mode must be constant.")
270
+
271
+ if len(tv_random_crop.size) != 2:
272
+ raise ValueError(
273
+ "TorchVision RandcomCrop transform must have a (height, width) "
274
+ f"pair for the size, got {tv_random_crop.size}."
275
+ )
276
+
277
+ return cls(size=tv_random_crop.size)
278
+
279
+
280
+ def _make_transform_specs(
281
+ transforms: Sequence[DecoderTransform | nn.Module] | None,
282
+ input_dims: tuple[int | None, int | None],
283
+ ) -> str:
284
+ """Given a sequence of transforms, turn those into the specification string
285
+ the core API expects.
286
+
287
+ Args:
288
+ transforms: Optional sequence of transform objects. The objects can be
289
+ one of two types:
290
+ 1. torchcodec.transforms.DecoderTransform
291
+ 2. torchvision.transforms.v2.Transform, but our type annotation
292
+ only mentions its base, nn.Module. We don't want to take a
293
+ hard dependency on TorchVision.
294
+ input_dims: Optional (height, width) pair. Note that only some
295
+ transforms need to know the dimensions. If the user provides
296
+ transforms that don't need to know the dimensions, and that metadata
297
+ is missing, everything should still work. That means we assert their
298
+ existence as late as possible.
299
+
300
+ Returns:
301
+ String of transforms in the format the core API expects: transform
302
+ specifications separate by semicolons.
303
+ """
304
+ if transforms is None:
305
+ return ""
306
+
307
+ try:
308
+ from torchvision.transforms import v2
309
+
310
+ tv_available = True
311
+ except ImportError:
312
+ tv_available = False
313
+
314
+ # The following loop accomplishes two tasks:
315
+ #
316
+ # 1. Converts the transform to a DecoderTransform, if necessary. We
317
+ # accept TorchVision transform objects and they must be converted
318
+ # to their matching DecoderTransform.
319
+ # 2. Calculates what the input dimensions are to each transform.
320
+ #
321
+ # The order in our transforms list is semantically meaningful, as we
322
+ # actually have a pipeline where the output of one transform is the input to
323
+ # the next. For example, if we have the transforms list [A, B, C, D], then
324
+ # we should understand that as:
325
+ #
326
+ # A -> B -> C -> D
327
+ #
328
+ # Where the frame produced by A is the input to B, the frame produced by B
329
+ # is the input to C, etc. This particularly matters for frame dimensions.
330
+ # Transforms can both:
331
+ #
332
+ # 1. Produce frames with arbitrary dimensions.
333
+ # 2. Rely on their input frame's dimensions to calculate ahead-of-time
334
+ # what their runtime behavior will be.
335
+ #
336
+ # The consequence of the above facts is that we need to statically track
337
+ # frame dimensions in the pipeline while we pre-process it. The input
338
+ # frame's dimensions to A, our first transform, is always what we know from
339
+ # our metadata. For each transform, we always calculate its output
340
+ # dimensions from its input dimensions. We store these with the converted
341
+ # transform, to be all used together when we generate the specs.
342
+ converted_transforms: list[
343
+ tuple[
344
+ DecoderTransform,
345
+ # A (height, width) pair where the values may be missing.
346
+ tuple[int | None, int | None],
347
+ ]
348
+ ] = []
349
+ curr_input_dims = input_dims
350
+ for transform in transforms:
351
+ if not isinstance(transform, DecoderTransform):
352
+ if not tv_available:
353
+ raise ValueError(
354
+ f"The supplied transform, {transform}, is not a TorchCodec "
355
+ " DecoderTransform. TorchCodec also accepts TorchVision "
356
+ "v2 transforms, but TorchVision is not installed."
357
+ )
358
+ elif isinstance(transform, v2.Resize):
359
+ transform = Resize._from_torchvision(transform)
360
+ elif isinstance(transform, v2.CenterCrop):
361
+ transform = CenterCrop._from_torchvision(transform)
362
+ elif isinstance(transform, v2.RandomCrop):
363
+ transform = RandomCrop._from_torchvision(transform)
364
+ else:
365
+ raise ValueError(
366
+ f"Unsupported transform: {transform}. Transforms must be "
367
+ "either a TorchCodec DecoderTransform or a TorchVision "
368
+ "v2 transform."
369
+ )
370
+
371
+ converted_transforms.append((transform, curr_input_dims))
372
+ output_dims = transform._get_output_dims()
373
+ curr_input_dims = output_dims if output_dims is not None else curr_input_dims
374
+
375
+ return ";".join([t._make_transform_spec(dims) for t, dims in converted_transforms])
torchcodec/version.py ADDED
@@ -0,0 +1,2 @@
1
+ # Note that this file is generated during install.
2
+ __version__ = '0.10.0'