PyPI - transformers - Versions diffs - 5.0.0rc1__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl - Mend

transformers 5.0.0rc1py3-none-any.whl → 5.0.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (671) hide show

transformers/utils/auto_docstring.py CHANGED Viewed

@@ -17,7 +17,8 @@ import inspect
 import os
 import textwrap
 from pathlib import Path
-from typing import get_args
+from types import UnionType
+from typing import Union, get_args, get_origin
 import regex as re
@@ -1280,38 +1281,46 @@ def _get_model_info(func, parent_class):
     return model_name_lowercase, class_name, config_class
-def _process_parameter_type(param, param_name, func):
+def _process_parameter_type(param):
     """
     Process and format a parameter's type annotation.
     Args:
         param (`inspect.Parameter`): The parameter from the function signature
-        param_name (`str`): The name of the parameter
-        func (`function`): The function the parameter belongs to
     """
     optional = False
-    if param.annotation != inspect.Parameter.empty:
-        param_type = param.annotation
-        if "typing" in str(param_type):
-            param_type = "".join(str(param_type).split("typing.")).replace("transformers.", "~")
-        elif hasattr(param_type, "__module__"):
-            param_type = f"{param_type.__module__.replace('transformers.', '~').replace('builtins', '')}.{param.annotation.__name__}"
-            if param_type[0] == ".":
-                param_type = param_type[1:]
-        else:
-            if False:
-                print(
-                    f"[ERROR] {param_type} for {param_name} of {func.__qualname__} in file {func.__code__.co_filename} has an invalid type"
-                )
-        if "ForwardRef" in param_type:
-            param_type = re.sub(r"ForwardRef\('([\w.]+)'\)", r"\1", param_type)
-        if "Optional" in param_type:
-            param_type = re.sub(r"Optional\[(.*?)\]", r"\1", param_type)
+    if param.annotation == inspect.Parameter.empty:
+        return "", False
+    elif param.annotation is None:
+        return "None", True
+    # This is, astonishingly, the right way to do it: https://docs.python.org/3/library/typing.html#typing.Union
+    elif get_origin(param.annotation) is Union or get_origin(param.annotation) is UnionType:
+        subtypes = get_args(param.annotation)
+    else:
+        subtypes = [param.annotation]  # Just pretend it's a single-element union so we don't need two code paths
+    out_str = []
+    for subtype in subtypes:
+        if subtype is type(None):
             optional = True
+            continue
+        if hasattr(subtype, "__module__") and hasattr(subtype, "__name__"):
+            subtype = f"{subtype.__module__.replace('transformers.', '~').replace('builtins', '').replace('typing.', '')}.{subtype.__name__}".removeprefix(
+                "."
+            )
+        else:
+            subtype = str(subtype)  # Just give up
+        if "ForwardRef" in subtype:
+            subtype = re.sub(r"ForwardRef\('([\w.]+)'\)", r"\1", subtype)
+        out_str.append(subtype)
+    if param.default is not inspect.Parameter.empty:
+        optional = True
+    if not out_str:
+        return "", optional
+    elif len(out_str) == 1:
+        return out_str[0], optional
     else:
-        param_type = ""
-    return param_type, optional
+        return f"Union[{', '.join(out_str)}]", optional
 def _get_parameter_info(param_name, documented_params, source_args_dict, param_type, optional):
@@ -1392,7 +1401,7 @@ def _process_regular_parameters(
             continue
         # Process parameter type and optional status
-        param_type, optional = _process_parameter_type(param, param_name, func)
+        param_type, optional = _process_parameter_type(param)
         # Check for default value
         param_default = ""

transformers/utils/generic.py CHANGED Viewed

@@ -155,6 +155,26 @@ def is_torch_dtype(x):
     return isinstance(x, torch.dtype)
+def _is_tensor_or_array_like(value):
+    """
+    Check if a value is array-like (includes ragged arrays)
+    """
+    if is_numpy_array(value):
+        return True
+    if is_torch_tensor(value):
+        return True
+    if isinstance(value, (int, float, bool, np.number)):
+        return True
+    if isinstance(value, (list, tuple)):
+        if len(value) == 0:
+            # consider empty list or nested list as array-like
+            return True
+        return _is_tensor_or_array_like(value[0])
+    return False
 def maybe_autocast(
     device_type: str,
     dtype: Optional["_dtype"] = None,

transformers/utils/import_utils.py CHANGED Viewed

@@ -552,6 +552,11 @@ def is_torch_flex_attn_available() -> bool:
     return is_torch_available() and version.parse(get_torch_version()) >= version.parse("2.5.0")
+@lru_cache
+def is_grouped_mm_available() -> bool:
+    return is_torch_available() and version.parse(get_torch_version()) >= version.parse("2.9.0")
 @lru_cache
 def is_kenlm_available() -> bool:
     return _is_package_available("kenlm")
@@ -885,14 +890,17 @@ def is_flash_attn_2_available() -> bool:
     import torch
-    if torch.version.cuda:
-        return version.parse(flash_attn_version) >= version.parse("2.1.0")
-    elif torch.version.hip:
-        # TODO: Bump the requirement to 2.1.0 once released in https://github.com/ROCmSoftwarePlatform/flash-attention
-        return version.parse(flash_attn_version) >= version.parse("2.0.4")
-    elif is_torch_mlu_available():
-        return version.parse(flash_attn_version) >= version.parse("2.3.3")
-    else:
+    try:
+        if torch.version.cuda:
+            return version.parse(flash_attn_version) >= version.parse("2.1.0")
+        elif torch.version.hip:
+            # TODO: Bump the requirement to 2.1.0 once released in https://github.com/ROCmSoftwarePlatform/flash-attention
+            return version.parse(flash_attn_version) >= version.parse("2.0.4")
+        elif is_torch_mlu_available():
+            return version.parse(flash_attn_version) >= version.parse("2.3.3")
+        else:
+            return False
+    except packaging.version.InvalidVersion:
         return False
@@ -910,7 +918,12 @@ def is_flash_attn_greater_or_equal_2_10() -> bool:
 @lru_cache
 def is_flash_attn_greater_or_equal(library_version: str) -> bool:
     is_available, flash_attn_version = _is_package_available("flash_attn", return_version=True)
-    return is_available and version.parse(flash_attn_version) >= version.parse(library_version)
+    if not is_available:
+        return False
+    try:
+        return version.parse(flash_attn_version) >= version.parse(library_version)
+    except packaging.version.InvalidVersion:
+        return False
 @lru_cache
@@ -1071,6 +1084,11 @@ def is_pytest_available() -> bool:
     return _is_package_available("pytest")
+@lru_cache
+def is_pytest_order_available() -> bool:
+    return is_pytest_available() and _is_package_available("pytest_order")
 @lru_cache
 def is_spacy_available() -> bool:
     return _is_package_available("spacy")
@@ -1106,6 +1124,16 @@ def is_nltk_available() -> bool:
     return _is_package_available("nltk")
+@lru_cache
+def is_numba_available() -> bool:
+    is_available = _is_package_available("numba")
+    if not is_available:
+        return False
+    numpy_available, numpy_version = _is_package_available("numpy", return_version=True)
+    return not numpy_available or version.parse(numpy_version) < version.parse("2.2.0")
 @lru_cache
 def is_torchaudio_available() -> bool:
     return _is_package_available("torchaudio")
@@ -1825,6 +1853,20 @@ BACKENDS_MAPPING = OrderedDict(
 def requires_backends(obj, backends):
+    """
+    Method that automatically raises in case the specified backends are not available. It is often used during class
+    initialization to ensure the required dependencies are installed:
+    ```py
+    requires_backends(self, ["torch"])
+    ```
+    The backends should be defined in the `BACKEND_MAPPING` defined in `transformers.utils.import_utils`.
+    Args:
+        obj: object to be checked
+        backends: list or tuple of backends to check.
+    """
     if not isinstance(backends, (list, tuple)):
         backends = [backends]

transformers/utils/kernel_config.py CHANGED Viewed

@@ -71,14 +71,36 @@ def add_to_mapping(layer_name, device, repo_name, mode, compatible_mapping):
     }
+def add_to_mapping_local(layer_name, device, repo_name, mode, compatible_mapping):
+    from pathlib import Path
+    from kernels import LocalLayerRepository
+    if device not in ["cuda", "rocm", "xpu", "npu"]:
+        raise ValueError(f"Only cuda, rocm, xpu and npu devices supported, got: {device}")
+    repo_layer_name = repo_name.split(":")[1]
+    repo_path = repo_name.split(":")[0]
+    repo_package_name = repo_path.split("/")[-1]
+    compatible_mapping[layer_name] = {
+        device: {
+            mode: LocalLayerRepository(
+                repo_path=Path(repo_path),
+                package_name=repo_package_name,
+                layer_name=repo_layer_name,
+            )
+        }
+    }
 class KernelConfig(PushToHubMixin):
     """
     Kernel configuration class. This class is used to configure the kernel mapping for a model.
     """
-    def __init__(self, kernel_mapping={}):
+    def __init__(self, kernel_mapping={}, use_local_kernel=False):
         self.kernel_mapping = kernel_mapping
         self.registered_layer_names = {}
+        self.use_local_kernel = use_local_kernel
     def update_kernel(self, repo_id, registered_name, layer_name, device, mode, revision=None):
         from kernels import LayerRepository
@@ -105,6 +127,7 @@ class KernelConfig(PushToHubMixin):
         2. Each kernel value is either a string of the form 'org/repo:layer_name' or a dict mapping device types ("cuda", "rocm", "xpu", "npu") to such strings.
         3. Each device key in a dict is one of "cuda", "rocm", "xpu", or "npu".
         4. Each repo_name is a valid repository and layer name in the format 'org/repo:layer_name' (i.e., a string containing both a slash and a colon).
+        5. If a local path is detected, it should be in the format '/abs/path:layer_name'. The absolute path must include the `package_name`, like "/home/user/layer_norm".
         Args:
             model: The model instance whose modules are checked for registered kernel_layer_name attributes.
@@ -114,14 +137,13 @@ class KernelConfig(PushToHubMixin):
                         or if a repo_name is not a valid 'org/repo:layer_name' string.
         """
         MAPPING_FORMAT = """
+        For single device form remote
         {
             "RMSNorm":
                 "kernels-community/layer_norm:LlamaRMSNorm",
             ...
         },
-        or
+        For multiple devices form remote
         {
             "RMSNorm": {
                 "cuda":
@@ -132,6 +154,23 @@ class KernelConfig(PushToHubMixin):
             },
             ...
         }
+        For single device form local
+        {
+            "RMSNorm":
+                "/abs/path:LlamaRMSNorm",
+            ...
+        },
+        For multiple devices form local
+        {
+            "RMSNorm": {
+                "cuda":
+                    "/abs/path:LlamaRMSNorm",
+                "rocm":
+                    "/abs/path:LlamaRMSNorm",
+                ...
+            },
+            ...
+        }
         """
         self.store_registered_layer_names(model)
         # Validate that the kernel mapping is a dict
@@ -149,7 +188,7 @@ class KernelConfig(PushToHubMixin):
             if isinstance(kernel, str):
                 if "/" not in kernel or ":" not in kernel:
                     raise ValueError(
-                        f"Kernel mapping for '{layer_name}' must be a valid repo name with a layer name (e.g., 'org/repo:layer_name'), got: {kernel}"
+                        f"Kernel mapping for '{layer_name}' must be a valid repo name with a layer name (e.g., 'org/repo:layer_name' or '/abs/path:layer_name'), got: {kernel}"
                     )
             elif isinstance(kernel, dict):
@@ -159,9 +198,8 @@ class KernelConfig(PushToHubMixin):
                     if not isinstance(repo_name, str) or "/" not in repo_name or ":" not in repo_name:
                         raise ValueError(
-                            f"Kernel mapping for '{layer_name}' must be a valid repo name with a layer name (e.g., 'org/repo:layer_name'), got: {repo_name}"
+                            f"Kernel mapping for '{layer_name}' must be a valid repo name with a layer name (e.g., 'org/repo:layer_name' or '/abs/path:layer_name'), got: {repo_name}"
                         )
             else:
                 raise ValueError(f"Kernel mapping must follow the format: {MAPPING_FORMAT}, got: {kernel}")
@@ -174,18 +212,13 @@ class KernelConfig(PushToHubMixin):
                 ...
             },
-            or
+            or for local path:
             {
-                "RMSNorm": {
-                    "cuda":
-                        "kernels-community/layer_norm:LlamaRMSNorm",
-                    "rocm":
-                        "kernels-community/layer_norm:LlamaRMSNorm",
-                    ...
-                },
+                "RMSNorm":
+                    "/home/user/liger_kernels:LigerRMSNorm",
                 ...
-            }
+            },
         into a nested mapping:
@@ -200,6 +233,20 @@ class KernelConfig(PushToHubMixin):
                 }
             }
+            or for local path:
+            {
+                "RMSNorm": {
+                    "cuda": {
+                        Mode.INFERENCE: LocalLayerRepository(
+                            repo_path=Path("/home/user/liger_kernels"),
+                            package_name="liger_kernels",
+                            layer_name="LigerRMSNorm",
+                        )
+                    }
+                }
+            }
         that's compatible with the kernels library.
         The device is inferred from the model's parameters if not provided.
@@ -217,11 +264,17 @@ class KernelConfig(PushToHubMixin):
             if isinstance(kernel, str):
                 repo_name = kernel
-                add_to_mapping(layer_name, current_device, repo_name, mode, compatible_mapping)
+                if not self.use_local_kernel:
+                    add_to_mapping(layer_name, current_device, repo_name, mode, compatible_mapping)
+                else:
+                    add_to_mapping_local(layer_name, current_device, repo_name, mode, compatible_mapping)
             elif isinstance(kernel, dict):
                 for device, repo_name in kernel.items():
                     if device != current_device:
                         continue
-                    add_to_mapping(layer_name, device, repo_name, mode, compatible_mapping)
+                    if not self.use_local_kernel:
+                        add_to_mapping(layer_name, device, repo_name, mode, compatible_mapping)
+                    else:
+                        add_to_mapping_local(layer_name, device, repo_name, mode, compatible_mapping)
         self.kernel_mapping = compatible_mapping

transformers/utils/quantization_config.py CHANGED Viewed

@@ -69,6 +69,7 @@ class AwqFormat(str, Enum):
     GEMM = "gemm"
     GEMV = "gemv"
     GEMV_FAST = "gemv_fast"
+    LLM_AWQ = "llm-awq"
 class AwqBackend(str, Enum):
@@ -838,14 +839,13 @@ class AwqConfig(GPTQConfig):
         r"""
         Safety checker that arguments are correct
         """
-        if self.format not in [
-            AwqFormat.GEMM,
-            AwqFormat.GEMV,
-            AwqFormat.GEMV_FAST,
-        ]:
-            raise ValueError(
-                f"Only supported versions are in [AWQLinearVersion.GEMM, AWQLinearVersion.GEMV, AWQLinearVersion.GEMV_FAST] - not recognized version {self.format}"
-            )
+        if self.backend == "llm-awq":
+            self.format = AwqFormat.LLM_AWQ
+            self.backend = AwqBackend.AUTO
+        if self.format not in AwqFormat.__members__.values():
+            raise ValueError(f"Invalid format '{self.format}'. Must be one of: {[b.value for b in AwqFormat]}")
         if self.backend not in AwqBackend.__members__.values():
             raise ValueError(f"Invalid backend '{self.backend}'. Must be one of: {[b.value for b in AwqBackend]}")

transformers/video_processing_utils.py CHANGED Viewed

@@ -175,7 +175,7 @@ class BaseVideoProcessor(BaseImageProcessorFast):
     def __init__(self, **kwargs: Unpack[VideosKwargs]) -> None:
         super().__init__()
-        self._processor_class = kwargs.pop("processor_class", None)
+        kwargs.pop("processor_class", None)
         # Additional attributes without default values
         for key, value in kwargs.items():
@@ -442,7 +442,6 @@ class BaseVideoProcessor(BaseImageProcessorFast):
             processed_videos_grouped[shape] = stacked_videos
         processed_videos = reorder_videos(processed_videos_grouped, grouped_videos_index)
-        processed_videos = torch.stack(processed_videos, dim=0) if return_tensors else processed_videos
         return BatchFeature(data={"pixel_values_videos": processed_videos}, tensor_type=return_tensors)
@@ -716,6 +715,7 @@ class BaseVideoProcessor(BaseImageProcessorFast):
             logger.info(
                 f"loading configuration file {video_processor_file} from cache at {resolved_video_processor_file}"
             )
         return video_processor_dict, kwargs
     @classmethod
@@ -771,11 +771,21 @@ class BaseVideoProcessor(BaseImageProcessorFast):
             `dict[str, Any]`: Dictionary of all the attributes that make up this video processor instance.
         """
         output = deepcopy(self.__dict__)
-        output.pop("model_valid_processing_keys", None)
-        output.pop("_valid_kwargs_names", None)
-        output["video_processor_type"] = self.__class__.__name__
+        filtered_dict = {}
+        for key, value in output.items():
+            if value is None:
+                class_default = getattr(type(self), key, "NOT_FOUND")
+                # Keep None if user explicitly set it (class default is non-None)
+                if class_default != "NOT_FOUND" and class_default is not None:
+                    filtered_dict[key] = value
+            else:
+                filtered_dict[key] = value
-        return output
+        filtered_dict.pop("model_valid_processing_keys", None)
+        filtered_dict.pop("_valid_kwargs_names", None)
+        filtered_dict["video_processor_type"] = self.__class__.__name__
+        return filtered_dict
     def to_json_string(self) -> str:
         """
@@ -790,12 +800,6 @@ class BaseVideoProcessor(BaseImageProcessorFast):
             if isinstance(value, np.ndarray):
                 dictionary[key] = value.tolist()
-        # make sure private name "_processor_class" is correctly
-        # saved as "processor_class"
-        _processor_class = dictionary.pop("_processor_class", None)
-        if _processor_class is not None:
-            dictionary["processor_class"] = _processor_class
         return json.dumps(dictionary, indent=2, sort_keys=True) + "\n"
     def to_json_file(self, json_file_path: Union[str, os.PathLike]):

{transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: transformers
-Version: 5.0.0rc1
+Version: 5.0.0rc2
 Summary: Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training.
 Home-page: https://github.com/huggingface/transformers
 Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
@@ -11,7 +11,6 @@ Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Education
 Classifier: Intended Audience :: Science/Research
-Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
@@ -103,7 +102,7 @@ Requires-Dist: kenlm; extra == "torch-speech"
 Provides-Extra: vision
 Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "vision"
 Provides-Extra: timm
-Requires-Dist: timm!=1.0.18,<=1.0.19; extra == "timm"
+Requires-Dist: timm>=1.0.23; extra == "timm"
 Provides-Extra: torch-vision
 Requires-Dist: torchvision; extra == "torch-vision"
 Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "torch-vision"
@@ -232,7 +231,7 @@ Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "all"
 Requires-Dist: kernels<0.11,>=0.10.2; extra == "all"
 Requires-Dist: optuna; extra == "all"
 Requires-Dist: ray[tune]>=2.7.0; extra == "all"
-Requires-Dist: timm!=1.0.18,<=1.0.19; extra == "all"
+Requires-Dist: timm>=1.0.23; extra == "all"
 Requires-Dist: torchvision; extra == "all"
 Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "all"
 Requires-Dist: codecarbon>=2.8.1; extra == "all"
@@ -294,7 +293,7 @@ Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "dev-torch"
 Requires-Dist: kernels<0.11,>=0.10.2; extra == "dev-torch"
 Requires-Dist: optuna; extra == "dev-torch"
 Requires-Dist: ray[tune]>=2.7.0; extra == "dev-torch"
-Requires-Dist: timm!=1.0.18,<=1.0.19; extra == "dev-torch"
+Requires-Dist: timm>=1.0.23; extra == "dev-torch"
 Requires-Dist: torchvision; extra == "dev-torch"
 Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "dev-torch"
 Requires-Dist: codecarbon>=2.8.1; extra == "dev-torch"
@@ -330,7 +329,7 @@ Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "dev"
 Requires-Dist: kernels<0.11,>=0.10.2; extra == "dev"
 Requires-Dist: optuna; extra == "dev"
 Requires-Dist: ray[tune]>=2.7.0; extra == "dev"
-Requires-Dist: timm!=1.0.18,<=1.0.19; extra == "dev"
+Requires-Dist: timm>=1.0.23; extra == "dev"
 Requires-Dist: torchvision; extra == "dev"
 Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "dev"
 Requires-Dist: codecarbon>=2.8.1; extra == "dev"

transformers 5.0.0rc1__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl

transformers 5.0.0rc1py3-none-any.whl → 5.0.0rc2py3-none-any.whl