PyPI - transformers - Versions diffs - 5.0.0rc3__py3-none-any.whl → 5.1.0__py3-none-any.whl - Mend

transformers 5.0.0rc3py3-none-any.whl → 5.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1021) hide show

transformers/utils/loading_report.py CHANGED Viewed

@@ -16,7 +16,7 @@ import re
 import shutil
 import sys
 from collections import OrderedDict, defaultdict
-from collections.abc import Iterable
+from dataclasses import dataclass
 from typing import Any
@@ -83,26 +83,6 @@ def update_key_name(mapping: dict[str, Any]) -> dict[str, Any]:
     return out
-# We have a class to simplify disabling ANSI colors
-class ANSI:
-    palette = {
-        "reset": "[0m",
-        "red": "[31m",
-        "yellow": "[33m",
-        "orange": "[38;5;208m",
-        "purple": "[35m",
-        "bold": "[1m",
-        "italic": "[3m",
-        "dim": "[2m",
-    }
-    def __init__(self, enable):
-        self.enable = enable
-    def __getitem__(self, key):
-        return self.palette[key] if self.enable else ""
 _ansi_re = re.compile(r"\x1b\[[0-9;]*m")
@@ -126,8 +106,24 @@ def _make_table(rows, headers):
     return "\n".join([header_line, sep_line] + body)
-def _color(s, color, ansi):
-    return f"{ansi[color]}{s}{ansi['reset']}"
+PALETTE = {
+    "reset": "[0m",
+    "red": "[31m",
+    "yellow": "[33m",
+    "orange": "[38;5;208m",
+    "purple": "[35m",
+    "bold": "[1m",
+    "italic": "[3m",
+    "dim": "[2m",
+}
+def _color(s, color):
+    """Return color-formatted input `s` if `sys.stdout` is interactive, e.g. connected to a terminal."""
+    if sys.stdout.isatty():
+        return f"{PALETTE[color]}{s}{PALETTE['reset']}"
+    else:
+        return s
 def _get_terminal_width(default=80):
@@ -137,21 +133,115 @@ def _get_terminal_width(default=80):
         return default
+@dataclass
+class LoadStateDictInfo:
+    """
+    Mutable container for state-dict loading results and diagnostics. Each entry in this structure is mutable,
+    and will usually be mutated in-place during the loading pipeline.
+    Attributes:
+        missing_keys (`set[str]`):
+            Keys that are missing from the loaded checkpoints but expected in the model's architecture.
+        unexpected_keys (`set[str]`):
+            Keys that are found in the checkpoints, but not expected in the model's architecture.
+        mismatched_keys (`set[tuple[str, tuple[int], tuple[int]]]`):
+            Keys that are found in the checkpoints and are expected in the model's architecture, but with a different shape.
+        error_msgs ( `list[str]`):
+            Some potential error messages.
+        conversion_errors (`dict[str, str]`):
+            Errors happening during the on-the-fly weight conversion process.
+    """
+    missing_keys: set[str]
+    unexpected_keys: set[str]
+    mismatched_keys: set[tuple[str, tuple[int], tuple[int]]]
+    error_msgs: list[str]
+    conversion_errors: dict[str, str]
+    def missing_and_mismatched(self):
+        """Return all effective missing keys, including `missing` and `mismatched` keys."""
+        return self.missing_keys | {k[0] for k in self.mismatched_keys}
+    def to_dict(self):
+        # Does not include the `conversion_errors` to be coherent with legacy reporting in the tests
+        return {
+            "missing_keys": self.missing_keys,
+            "unexpected_keys": self.unexpected_keys,
+            "mismatched_keys": self.mismatched_keys,
+            "error_msgs": self.error_msgs,
+        }
+    def create_loading_report(self) -> str | None:
+        """Generate the minimal table of a loading report."""
+        term_w = _get_terminal_width()
+        rows = []
+        tips = ""
+        if self.unexpected_keys:
+            tips += (
+                f"\n- {_color('UNEXPECTED', 'orange') + PALETTE['italic']}\t:can be ignored when loading from different "
+                "task/architecture; not ok if you expect identical arch."
+            )
+            for k in update_key_name(self.unexpected_keys):
+                status = _color("UNEXPECTED", "orange")
+                rows.append([k, status, "", ""])
+        if self.missing_keys:
+            tips += (
+                f"\n- {_color('MISSING', 'red') + PALETTE['italic']}\t:those params were newly initialized because missing "
+                "from the checkpoint. Consider training on your downstream task."
+            )
+            for k in update_key_name(self.missing_keys):
+                status = _color("MISSING", "red")
+                rows.append([k, status, ""])
+        if self.mismatched_keys:
+            tips += (
+                f"\n- {_color('MISMATCH', 'yellow') + PALETTE['italic']}\t:ckpt weights were loaded, but they did not match "
+                "the original empty weight shapes."
+            )
+            iterator = {a: (b, c) for a, b, c in self.mismatched_keys}
+            for key, (shape_ckpt, shape_model) in update_key_name(iterator).items():
+                status = _color("MISMATCH", "yellow")
+                data = [
+                    key,
+                    status,
+                    f"Reinit due to size mismatch - ckpt: {str(shape_ckpt)} vs model:{str(shape_model)}",
+                ]
+                rows.append(data)
+        if self.conversion_errors:
+            tips += f"\n- {_color('CONVERSION', 'purple') + PALETTE['italic']}\t:originate from the conversion scheme"
+            for k, v in update_key_name(self.conversion_errors).items():
+                status = _color("CONVERSION", "purple")
+                _details = f"\n\n{v}\n\n"
+                rows.append([k, status, _details])
+        # If nothing is wrong, return None
+        if len(rows) == 0:
+            return None
+        headers = ["Key", "Status"]
+        if term_w > 200:
+            headers += ["Details"]
+        else:
+            headers += ["", ""]
+        table = _make_table(rows, headers=headers)
+        tips = f"\n\n{PALETTE['italic']}Notes:{tips}{PALETTE['reset']}"
+        report = table + tips
+        return report
 def log_state_dict_report(
-    *,
     model,
-    pretrained_model_name_or_path,
+    pretrained_model_name_or_path: str,
+    ignore_mismatched_sizes: bool,
+    loading_info: LoadStateDictInfo,
     logger: logging.Logger | None = None,
-    error_msgs: Iterable[str] | None = None,
-    unexpected_keys=None,
-    missing_keys=None,
-    mismatched_keys=None,
-    mismatched_shapes=None,
-    ignore_mismatched_sizes=True,
-    conversion_errors=None,
-    color=True,  # allow disabling for plain logs
 ):
-    """Log a readable report about state_dict loading issues.
+    """
+    Log a readable report about state_dict loading issues.
     This version is terminal-size aware: for very small terminals it falls back to a compact
     Key | Status view so output doesn't wrap badly.
@@ -159,94 +249,32 @@ def log_state_dict_report(
     if logger is None:
         logger = logging.getLogger(__name__)
-    error_msgs = error_msgs or []
-    unexpected_keys = unexpected_keys or []
-    missing_keys = missing_keys or []
-    mismatched_keys = mismatched_keys or []
-    mismatched_shapes = mismatched_shapes or []
-    conversion_errors = conversion_errors or {}
-    # Detect whether the current stdout supports ANSI colors; allow callers to pass `color=False` to force no color
-    color_enabled = bool(color and sys.stdout.isatty())
-    ansi = ANSI(color_enabled)
     # Re-raise errors early if needed
-    if error_msgs:
-        error_msg = "\n\t".join(error_msgs)
+    if loading_info.error_msgs:
+        error_msg = "\n\t".join(loading_info.error_msgs)
         if "size mismatch" in error_msg:
             error_msg += (
                 "\n\tYou may consider adding `ignore_mismatched_sizes=True` to `from_pretrained(...)` if appropriate."
             )
         raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}")
-    term_w = _get_terminal_width()
-    rows = []
-    if unexpected_keys:
-        for k in update_key_name(unexpected_keys):
-            status = "UNEXPECTED"
-            status = _color(status, "orange", ansi)
-            rows.append([k, status, "", ""])
-    if missing_keys:
-        for k in update_key_name(missing_keys):
-            status = "MISSING"
-            status = _color(status, "red", ansi)
-            rows.append([k, status, ""])
-    if mismatched_keys:
-        iterator = {a: (b, c) for a, b, c in mismatched_shapes}
-        for key, (shape_ckpt, shape_model) in update_key_name(iterator).items():
-            status = "MISMATCH"
-            status = _color(status, "yellow", ansi)
-            data = [key, status]
-            data.append(
-                " ".join(["Reinit due to size mismatch", f"ckpt: {str(shape_ckpt)} vs model:{str(shape_model)}"])
-            )
-            rows.append(data)
-    if conversion_errors:
-        for k, v in update_key_name(conversion_errors).items():
-            status = "CONVERSION"
-            status = _color(status, "purple", ansi)
-            _details = v[:term_w]
-            rows.append([k, status, _details])
-    if not rows:
+    # Create the report table
+    report = loading_info.create_loading_report()
+    if report is None:
         return
-    headers = ["Key", "Status"]
-    if term_w > 200:
-        headers += ["Details"]
-    else:
-        headers += ["", ""]
-    table = _make_table(rows, headers=headers)
-    prelude = (
-        f"{ansi['bold']}{model.__class__.__name__} LOAD REPORT{ansi['reset']} from: {pretrained_model_name_or_path}\n"
-    )
-    tips = f"\n\n{ansi['italic']}Notes:"
-    if unexpected_keys:
-        tips += f"\n- {_color('UNEXPECTED', 'orange', ansi) + ansi['italic']}\t:can be ignored when loading from different task/architecture; not ok if you expect identical arch."
-    if missing_keys:
-        tips += f"\n- {_color('MISSING', 'red', ansi) + ansi['italic']}\t:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task."
-    if mismatched_keys:
-        tips += f"\n- {_color('MISMATCH', 'yellow', ansi) + ansi['italic']}\t:ckpt weights were loaded, but they did not match the original empty weight shapes."
-    if conversion_errors:
-        tips += f"\n- {_color('CONVERSION', 'purple', ansi) + ansi['italic']}\t:originate from the conversion scheme"
-    tips += f"{ansi['reset']}"
+    prelude = f"{PALETTE['bold']}{model.__class__.__name__} LOAD REPORT{PALETTE['reset']} from: {pretrained_model_name_or_path}\n"
     # Log the report as warning
-    logger.warning(prelude + table + tips)
+    logger.warning(prelude + report)
     # Re-raise in those case, after the report
-    if conversion_errors:
+    if loading_info.conversion_errors:
         raise RuntimeError(
             "We encountered some issues during automatic conversion of the weights. For details look at the `CONVERSION` entries of "
             "the above report!"
         )
-    if not ignore_mismatched_sizes and mismatched_keys:
+    if not ignore_mismatched_sizes and loading_info.mismatched_keys:
         raise RuntimeError(
             "You set `ignore_mismatched_sizes` to `False`, thus raising an error. For details look at the above report!"
         )
-    return prelude + table + tips

transformers/utils/quantization_config.py CHANGED Viewed

@@ -1488,25 +1488,6 @@ class TorchAoConfig(QuantizationConfigMixin):
     # int4_weight_only quant is only working with *torch.bfloat16* dtype right now
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", dtype=torch.bfloat16, quantization_config=quantization_config)
-    # autoquant
-    # `autoquant` is a convenient way for users to search for the best quantization for each layer
-    # `min_sqnr` is an option to control the accuracy of the model, higher value means the model is more
-    # accurate, we can start with 30 and adjust it to larger or smaller (e.g. 40, 20)
-    # defaults to None, which means we'll try to get the best performing quantized model without
-    # considering accuracy
-    quantization_config = TorchAoConfig("autoquant", min_sqnr=30)
-    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", dtype=torch.bfloat16, quantization_config=quantization_config)
-    # run through example inputs, quantization methods will be selected based on the shape of example input
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    input_text = "What are we having for dinner?"
-    input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
-    MAX_NEW_TOKENS = 1000
-    model.generate(**input_ids, max_new_tokens=MAX_NEW_TOKENS, cache_implementation="static")
-    # manually ran finalize_autoquant if needed
-    if hasattr(quantized_model, "finalize_autoquant"):
-      print("finalizing autoquant")
-      quantized_model.finalize_autoquant()
     ```
     """
@@ -1583,7 +1564,6 @@ class TorchAoConfig(QuantizationConfigMixin):
     def _get_torchao_quant_type_to_method(self):
         """Get mapping of quant_type strings to their corresponding methods."""
         from torchao.quantization import (
-            autoquant,
             int4_weight_only,
             int8_dynamic_activation_int8_weight,
             int8_weight_only,
@@ -1593,7 +1573,6 @@ class TorchAoConfig(QuantizationConfigMixin):
             "int4_weight_only": int4_weight_only,
             "int8_weight_only": int8_weight_only,
             "int8_dynamic_activation_int8_weight": int8_dynamic_activation_int8_weight,
-            "autoquant": autoquant,
         }
     def get_apply_tensor_subclass(self):

transformers/video_processing_utils.py CHANGED Viewed

@@ -68,7 +68,7 @@ if is_torch_available():
     import torch
 if is_torchvision_v2_available():
-    from torchvision.transforms.v2 import functional as F
+    import torchvision.transforms.v2.functional as tvF
 logger = logging.get_logger(__name__)
@@ -220,7 +220,7 @@ class BaseVideoProcessor(BaseImageProcessorFast):
             `torch.Tensor`: The converted video.
         """
-        video = F.grayscale_to_rgb(video)
+        video = tvF.grayscale_to_rgb(video)
         if video.shape[-3] == 3 or not (video[..., 3, :, :] < 255).any():
             return video
@@ -311,7 +311,7 @@ class BaseVideoProcessor(BaseImageProcessorFast):
             if isinstance(videos[0], list):
                 # Videos sometimes are passed as a list of image URLs, especially through templates
                 videos = [
-                    torch.stack([F.pil_to_tensor(image) for image in images], dim=0)
+                    torch.stack([tvF.pil_to_tensor(image) for image in images], dim=0)
                     for images in self.fetch_images(videos)
                 ]
                 if do_sample_frames:
@@ -336,7 +336,7 @@ class BaseVideoProcessor(BaseImageProcessorFast):
         for video in videos:
             # `make_batched_videos` always returns a 4D array per video
             if isinstance(video, np.ndarray):
-                # not using F.to_tensor as it doesn't handle (C, H, W) numpy arrays
+                # not using tvF.to_tensor as it doesn't handle (C, H, W) numpy arrays
                 video = torch.from_numpy(video).contiguous()
             # Infer the channel dimension format if not provided
@@ -405,7 +405,7 @@ class BaseVideoProcessor(BaseImageProcessorFast):
         do_convert_rgb: bool,
         do_resize: bool,
         size: SizeDict,
-        interpolation: Optional["F.InterpolationMode"],
+        interpolation: Optional["tvF.InterpolationMode"],
         do_center_crop: bool,
         crop_size: SizeDict,
         do_rescale: bool,

transformers/video_utils.py CHANGED Viewed

@@ -200,7 +200,9 @@ def make_batched_videos(videos) -> list[Union[np.ndarray, "torch.Tensor", "URL",
     except (IndexError, TypeError):
         pass
-    if isinstance(videos, str) or is_valid_video(videos):
+    if is_batched_video(videos):
+        return convert_pil_frames_to_video(list(videos))
+    elif isinstance(videos, str) or is_valid_video(videos):
         return convert_pil_frames_to_video([videos])
     # only one frame passed, thus we unsqueeze time dim
     elif is_valid_image(videos):

transformers 5.0.0rc3__py3-none-any.whl → 5.1.0__py3-none-any.whl

transformers 5.0.0rc3py3-none-any.whl → 5.1.0py3-none-any.whl