PyPI - vllm-cpu - Versions diffs - 0.12.0__cp313-cp313-manylinux_2_17_aarch64.whl - Mend

vllm-cpu 0.12.0__cp313-cp313-manylinux_2_17_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1600) hide show

vllm/logger.py ADDED Viewed

@@ -0,0 +1,298 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Logging configuration for vLLM."""
+import datetime
+import json
+import logging
+import os
+import sys
+from collections.abc import Generator, Hashable
+from contextlib import contextmanager
+from functools import lru_cache, partial
+from logging import Logger
+from logging.config import dictConfig
+from os import path
+from types import MethodType
+from typing import Any, Literal, cast
+import vllm.envs as envs
+_FORMAT = (
+    f"{envs.VLLM_LOGGING_PREFIX}%(levelname)s %(asctime)s "
+    "[%(fileinfo)s:%(lineno)d] %(message)s"
+)
+_DATE_FORMAT = "%m-%d %H:%M:%S"
+def _use_color() -> bool:
+    if envs.NO_COLOR or envs.VLLM_LOGGING_COLOR == "0":
+        return False
+    if envs.VLLM_LOGGING_COLOR == "1":
+        return True
+    if envs.VLLM_LOGGING_STREAM == "ext://sys.stdout":  # stdout
+        return hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
+    elif envs.VLLM_LOGGING_STREAM == "ext://sys.stderr":  # stderr
+        return hasattr(sys.stderr, "isatty") and sys.stderr.isatty()
+    return False
+DEFAULT_LOGGING_CONFIG = {
+    "formatters": {
+        "vllm": {
+            "class": "vllm.logging_utils.NewLineFormatter",
+            "datefmt": _DATE_FORMAT,
+            "format": _FORMAT,
+        },
+        "vllm_color": {
+            "class": "vllm.logging_utils.ColoredFormatter",
+            "datefmt": _DATE_FORMAT,
+            "format": _FORMAT,
+        },
+    },
+    "handlers": {
+        "vllm": {
+            "class": "logging.StreamHandler",
+            # Choose formatter based on color setting.
+            "formatter": "vllm_color" if _use_color() else "vllm",
+            "level": envs.VLLM_LOGGING_LEVEL,
+            "stream": envs.VLLM_LOGGING_STREAM,
+        },
+    },
+    "loggers": {
+        "vllm": {
+            "handlers": ["vllm"],
+            "level": envs.VLLM_LOGGING_LEVEL,
+            "propagate": False,
+        },
+    },
+    "version": 1,
+    "disable_existing_loggers": False,
+}
+@lru_cache
+def _print_debug_once(logger: Logger, msg: str, *args: Hashable) -> None:
+    # Set the stacklevel to 3 to print the original caller's line info
+    logger.debug(msg, *args, stacklevel=3)
+@lru_cache
+def _print_info_once(logger: Logger, msg: str, *args: Hashable) -> None:
+    # Set the stacklevel to 3 to print the original caller's line info
+    logger.info(msg, *args, stacklevel=3)
+@lru_cache
+def _print_warning_once(logger: Logger, msg: str, *args: Hashable) -> None:
+    # Set the stacklevel to 3 to print the original caller's line info
+    logger.warning(msg, *args, stacklevel=3)
+LogScope = Literal["process", "global", "local"]
+def _should_log_with_scope(scope: LogScope) -> bool:
+    """Decide whether to log based on scope"""
+    if scope == "global":
+        from vllm.distributed.parallel_state import is_global_first_rank
+        return is_global_first_rank()
+    if scope == "local":
+        from vllm.distributed.parallel_state import is_local_first_rank
+        return is_local_first_rank()
+    # default "process" scope: always log
+    return True
+class _VllmLogger(Logger):
+    """
+    Note:
+        This class is just to provide type information.
+        We actually patch the methods directly on the [`logging.Logger`][]
+        instance to avoid conflicting with other libraries such as
+        `intel_extension_for_pytorch.utils._logger`.
+    """
+    def debug_once(
+        self, msg: str, *args: Hashable, scope: LogScope = "process"
+    ) -> None:
+        """
+        As [`debug`][logging.Logger.debug], but subsequent calls with
+        the same message are silently dropped.
+        """
+        if not _should_log_with_scope(scope):
+            return
+        _print_debug_once(self, msg, *args)
+    def info_once(self, msg: str, *args: Hashable, scope: LogScope = "process") -> None:
+        """
+        As [`info`][logging.Logger.info], but subsequent calls with
+        the same message are silently dropped.
+        """
+        if not _should_log_with_scope(scope):
+            return
+        _print_info_once(self, msg, *args)
+    def warning_once(
+        self, msg: str, *args: Hashable, scope: LogScope = "process"
+    ) -> None:
+        """
+        As [`warning`][logging.Logger.warning], but subsequent calls with
+        the same message are silently dropped.
+        """
+        if not _should_log_with_scope(scope):
+            return
+        _print_warning_once(self, msg, *args)
+# Pre-defined methods mapping to avoid repeated dictionary creation
+_METHODS_TO_PATCH = {
+    "debug_once": _VllmLogger.debug_once,
+    "info_once": _VllmLogger.info_once,
+    "warning_once": _VllmLogger.warning_once,
+}
+def _configure_vllm_root_logger() -> None:
+    logging_config = dict[str, Any]()
+    if not envs.VLLM_CONFIGURE_LOGGING and envs.VLLM_LOGGING_CONFIG_PATH:
+        raise RuntimeError(
+            "VLLM_CONFIGURE_LOGGING evaluated to false, but "
+            "VLLM_LOGGING_CONFIG_PATH was given. VLLM_LOGGING_CONFIG_PATH "
+            "implies VLLM_CONFIGURE_LOGGING. Please enable "
+            "VLLM_CONFIGURE_LOGGING or unset VLLM_LOGGING_CONFIG_PATH."
+        )
+    if envs.VLLM_CONFIGURE_LOGGING:
+        logging_config = DEFAULT_LOGGING_CONFIG
+        vllm_handler = logging_config["handlers"]["vllm"]
+        # Refresh these values in case env vars have changed.
+        vllm_handler["level"] = envs.VLLM_LOGGING_LEVEL
+        vllm_handler["stream"] = envs.VLLM_LOGGING_STREAM
+        vllm_handler["formatter"] = "vllm_color" if _use_color() else "vllm"
+        vllm_loggers = logging_config["loggers"]["vllm"]
+        vllm_loggers["level"] = envs.VLLM_LOGGING_LEVEL
+    if envs.VLLM_LOGGING_CONFIG_PATH:
+        if not path.exists(envs.VLLM_LOGGING_CONFIG_PATH):
+            raise RuntimeError(
+                "Could not load logging config. File does not exist: %s",
+                envs.VLLM_LOGGING_CONFIG_PATH,
+            )
+        with open(envs.VLLM_LOGGING_CONFIG_PATH, encoding="utf-8") as file:
+            custom_config = json.loads(file.read())
+        if not isinstance(custom_config, dict):
+            raise ValueError(
+                "Invalid logging config. Expected dict, got %s.",
+                type(custom_config).__name__,
+            )
+        logging_config = custom_config
+    for formatter in logging_config.get("formatters", {}).values():
+        # This provides backwards compatibility after #10134.
+        if formatter.get("class") == "vllm.logging.NewLineFormatter":
+            formatter["class"] = "vllm.logging_utils.NewLineFormatter"
+    if logging_config:
+        dictConfig(logging_config)
+def init_logger(name: str) -> _VllmLogger:
+    """The main purpose of this function is to ensure that loggers are
+    retrieved in such a way that we can be sure the root vllm logger has
+    already been configured."""
+    logger = logging.getLogger(name)
+    for method_name, method in _METHODS_TO_PATCH.items():
+        setattr(logger, method_name, MethodType(method, logger))
+    return cast(_VllmLogger, logger)
+@contextmanager
+def suppress_logging(level: int = logging.INFO) -> Generator[None, Any, None]:
+    current_level = logging.root.manager.disable
+    logging.disable(level)
+    yield
+    logging.disable(current_level)
+# The root logger is initialized when the module is imported.
+# This is thread-safe as the module is only imported once,
+# guaranteed by the Python GIL.
+_configure_vllm_root_logger()
+logger = init_logger(__name__)
+def _trace_calls(log_path, root_dir, frame, event, arg=None):
+    if event in ["call", "return"]:
+        # Extract the filename, line number, function name, and the code object
+        filename = frame.f_code.co_filename
+        lineno = frame.f_lineno
+        func_name = frame.f_code.co_name
+        if not filename.startswith(root_dir):
+            # only log the functions in the vllm root_dir
+            return
+        # Log every function call or return
+        try:
+            last_frame = frame.f_back
+            if last_frame is not None:
+                last_filename = last_frame.f_code.co_filename
+                last_lineno = last_frame.f_lineno
+                last_func_name = last_frame.f_code.co_name
+            else:
+                # initial frame
+                last_filename = ""
+                last_lineno = 0
+                last_func_name = ""
+            with open(log_path, "a") as f:
+                ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
+                if event == "call":
+                    f.write(
+                        f"{ts} Call to"
+                        f" {func_name} in {filename}:{lineno}"
+                        f" from {last_func_name} in {last_filename}:"
+                        f"{last_lineno}\n"
+                    )
+                else:
+                    f.write(
+                        f"{ts} Return from"
+                        f" {func_name} in {filename}:{lineno}"
+                        f" to {last_func_name} in {last_filename}:"
+                        f"{last_lineno}\n"
+                    )
+        except NameError:
+            # modules are deleted during shutdown
+            pass
+    return partial(_trace_calls, log_path, root_dir)
+def enable_trace_function_call(log_file_path: str, root_dir: str | None = None):
+    """
+    Enable tracing of every function call in code under `root_dir`.
+    This is useful for debugging hangs or crashes.
+    `log_file_path` is the path to the log file.
+    `root_dir` is the root directory of the code to trace. If None, it is the
+    vllm root directory.
+    Note that this call is thread-level, any threads calling this function
+    will have the trace enabled. Other threads will not be affected.
+    """
+    logger.warning(
+        "VLLM_TRACE_FUNCTION is enabled. It will record every"
+        " function executed by Python. This will slow down the code. It "
+        "is suggested to be used for debugging hang or crashes only."
+    )
+    logger.info("Trace frame log is saved to %s", log_file_path)
+    if root_dir is None:
+        # by default, this is the vllm root directory
+        root_dir = os.path.dirname(os.path.dirname(__file__))
+    sys.settrace(partial(_trace_calls, log_file_path, root_dir))

vllm/logging_utils/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from vllm.logging_utils.formatter import ColoredFormatter, NewLineFormatter
+from vllm.logging_utils.lazy import lazy
+from vllm.logging_utils.log_time import logtime
+__all__ = [
+    "NewLineFormatter",
+    "ColoredFormatter",
+    "lazy",
+    "logtime",
+]

vllm/logging_utils/dump_input.py ADDED Viewed

@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import contextlib
+import enum
+import json
+import torch
+from vllm.config import VllmConfig
+from vllm.logger import init_logger
+from vllm.v1.core.sched.output import SchedulerOutput
+from vllm.v1.metrics.stats import SchedulerStats
+from vllm.version import __version__ as VLLM_VERSION
+logger = init_logger(__name__)
+def prepare_object_to_dump(obj) -> str:
+    if isinstance(obj, str):
+        return f"'{obj}'"  # Double quotes
+    elif isinstance(obj, dict):
+        dict_str = ", ".join(
+            {f"{str(k)}: {prepare_object_to_dump(v)}" for k, v in obj.items()}
+        )
+        return f"{{{dict_str}}}"
+    elif isinstance(obj, list):
+        return f"[{', '.join([prepare_object_to_dump(v) for v in obj])}]"
+    elif isinstance(obj, set):
+        return f"[{', '.join([prepare_object_to_dump(v) for v in list(obj)])}]"
+        # return [prepare_object_to_dump(v) for v in list(obj)]
+    elif isinstance(obj, tuple):
+        return f"[{', '.join([prepare_object_to_dump(v) for v in obj])}]"
+    elif isinstance(obj, enum.Enum):
+        return repr(obj)
+    elif isinstance(obj, torch.Tensor):
+        # We only print the 'draft' of the tensor to not expose sensitive data
+        # and to get some metadata in case of CUDA runtime crashed
+        return f"Tensor(shape={obj.shape}, device={obj.device},dtype={obj.dtype})"
+    elif hasattr(obj, "anon_repr"):
+        return obj.anon_repr()
+    elif hasattr(obj, "__dict__"):
+        items = obj.__dict__.items()
+        dict_str = ", ".join(
+            [f"{str(k)}={prepare_object_to_dump(v)}" for k, v in items]
+        )
+        return f"{type(obj).__name__}({dict_str})"
+    else:
+        # Hacky way to make sure we can serialize the object in JSON format
+        try:
+            return json.dumps(obj)
+        except (TypeError, OverflowError):
+            return repr(obj)
+def dump_engine_exception(
+    config: VllmConfig,
+    scheduler_output: SchedulerOutput,
+    scheduler_stats: SchedulerStats | None,
+):
+    # NOTE: ensure we can log extra info without risking raises
+    # unexpected errors during logging
+    with contextlib.suppress(Exception):
+        _dump_engine_exception(config, scheduler_output, scheduler_stats)
+def _dump_engine_exception(
+    config: VllmConfig,
+    scheduler_output: SchedulerOutput,
+    scheduler_stats: SchedulerStats | None,
+):
+    logger.error(
+        "Dumping input data for V1 LLM engine (v%s) with config: %s, ",
+        VLLM_VERSION,
+        config,
+    )
+    try:
+        dump_obj = prepare_object_to_dump(scheduler_output)
+        logger.error("Dumping scheduler output for model execution: %s", dump_obj)
+        if scheduler_stats:
+            logger.error("Dumping scheduler stats: %s", scheduler_stats)
+    except Exception:
+        logger.exception("Error preparing object to dump")

vllm/logging_utils/formatter.py ADDED Viewed

@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import logging
+from pathlib import Path
+from vllm import envs
+class NewLineFormatter(logging.Formatter):
+    """Adds logging prefix to newlines to align multi-line messages."""
+    def __init__(self, fmt, datefmt=None, style="%"):
+        super().__init__(fmt, datefmt, style)
+        self.use_relpath = envs.VLLM_LOGGING_LEVEL == "DEBUG"
+        if self.use_relpath:
+            self.root_dir = Path(__file__).resolve().parent.parent.parent
+    def format(self, record):
+        def shrink_path(relpath: Path) -> str:
+            """
+            Shortens a file path for logging display:
+            - Removes leading 'vllm' folder if present.
+            - If path starts with 'v1',
+            keeps the first two and last two levels,
+            collapsing the middle as '...'.
+            - Otherwise, keeps the first and last two levels,
+            collapsing the middle as '...'.
+            - If the path is short, returns it as-is.
+            - Examples:
+            vllm/model_executor/layers/quantization/utils/fp8_utils.py ->
+            model_executor/.../quantization/utils/fp8_utils.py
+            vllm/model_executor/layers/quantization/awq.py ->
+            model_executor/layers/quantization/awq.py
+            vllm/v1/attention/backends/mla/common.py ->
+            v1/attention/backends/mla/common.py
+            Args:
+                relpath (Path): The relative path to be shortened.
+            Returns:
+                str: The shortened path string for display.
+            """
+            parts = list(relpath.parts)
+            new_parts = []
+            if parts and parts[0] == "vllm":
+                parts = parts[1:]
+            if parts and parts[0] == "v1":
+                new_parts += parts[:2]
+                parts = parts[2:]
+            elif parts:
+                new_parts += parts[:1]
+                parts = parts[1:]
+            if len(parts) > 2:
+                new_parts += ["..."] + parts[-2:]
+            else:
+                new_parts += parts
+            return "/".join(new_parts)
+        if self.use_relpath:
+            abs_path = getattr(record, "pathname", None)
+            if abs_path:
+                try:
+                    relpath = Path(abs_path).resolve().relative_to(self.root_dir)
+                except Exception:
+                    relpath = Path(record.filename)
+            else:
+                relpath = Path(record.filename)
+            record.fileinfo = shrink_path(relpath)
+        else:
+            record.fileinfo = record.filename
+        msg = super().format(record)
+        if record.message != "":
+            parts = msg.split(record.message)
+            msg = msg.replace("\n", "\r\n" + parts[0])
+        return msg
+class ColoredFormatter(NewLineFormatter):
+    """Adds ANSI color codes to log levels for terminal output.
+    This formatter adds colors by injecting them into the format string for
+    static elements (timestamp, filename, line number) and modifying the
+    levelname attribute for dynamic color selection.
+    """
+    # ANSI color codes
+    COLORS = {
+        "DEBUG": "\033[37m",  # White
+        "INFO": "\033[32m",  # Green
+        "WARNING": "\033[33m",  # Yellow
+        "ERROR": "\033[31m",  # Red
+        "CRITICAL": "\033[35m",  # Magenta
+    }
+    GREY = "\033[90m"  # Grey for timestamp and file info
+    RESET = "\033[0m"
+    def __init__(self, fmt, datefmt=None, style="%"):
+        # Inject grey color codes into format string for timestamp and file info
+        if fmt:
+            # Wrap %(asctime)s with grey
+            fmt = fmt.replace("%(asctime)s", f"{self.GREY}%(asctime)s{self.RESET}")
+            # Wrap [%(fileinfo)s:%(lineno)d] with grey
+            fmt = fmt.replace(
+                "[%(fileinfo)s:%(lineno)d]",
+                f"{self.GREY}[%(fileinfo)s:%(lineno)d]{self.RESET}",
+            )
+        # Call parent __init__ with potentially modified format string
+        super().__init__(fmt, datefmt, style)
+    def format(self, record):
+        # Store original levelname to restore later (in case record is reused)
+        orig_levelname = record.levelname
+        # Only modify levelname - it needs dynamic color based on severity
+        if (color_code := self.COLORS.get(record.levelname)) is not None:
+            record.levelname = f"{color_code}{record.levelname}{self.RESET}"
+        # Call parent format which will handle everything else
+        msg = super().format(record)
+        # Restore original levelname
+        record.levelname = orig_levelname
+        return msg

vllm/logging_utils/lazy.py ADDED Viewed

@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Callable
+from typing import Any
+class lazy:
+    """Wrap a zero-argument callable evaluated only during log formatting."""
+    __slots__ = ("_factory",)
+    def __init__(self, factory: Callable[[], Any]) -> None:
+        self._factory = factory
+    def __str__(self) -> str:
+        return str(self._factory())
+    def __repr__(self) -> str:
+        return str(self)

vllm/logging_utils/log_time.py ADDED Viewed

@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Provides a timeslice logging decorator
+"""
+import functools
+import time
+def logtime(logger, msg=None):
+    """
+    Logs the execution time of the decorated function.
+    Always place it beneath other decorators.
+    """
+    def _inner(func):
+        @functools.wraps(func)
+        def _wrapper(*args, **kwargs):
+            start = time.perf_counter()
+            result = func(*args, **kwargs)
+            elapsed = time.perf_counter() - start
+            prefix = (
+                f"Function '{func.__module__}.{func.__qualname__}'"
+                if msg is None
+                else msg
+            )
+            logger.debug("%s: Elapsed time %.7f secs", prefix, elapsed)
+            return result
+        return _wrapper
+    return _inner

vllm/logits_process.py ADDED Viewed

@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Callable, Sequence
+from typing import TypeAlias
+import torch
+from vllm.tokenizers import TokenizerLike
+LogitsProcessor: TypeAlias = (
+    Callable[[list[int], torch.Tensor], torch.Tensor]
+    | Callable[[list[int], list[int], torch.Tensor], torch.Tensor]
+)
+"""LogitsProcessor is a function that takes a list
+of previously generated tokens, the logits tensor
+for the next token and, optionally, prompt tokens as a
+first argument, and returns a modified tensor of logits
+to sample from."""
+def get_bad_words_logits_processors(
+    bad_words: list[str], tokenizer: TokenizerLike
+) -> list[LogitsProcessor]:
+    bad_words_ids: list[list[int]] = list()
+    for bad_word in bad_words:
+        # To prohibit words both at the beginning
+        # and in the middle of text
+        # (related to add_prefix_space tokenizer parameter)
+        for add_prefix_space in [False, True]:
+            prefix = " " if add_prefix_space else ""
+            prompt = prefix + bad_word.lstrip()
+            prompt_token_ids = tokenizer.encode(text=prompt, add_special_tokens=False)
+            # If no space at the beginning
+            # or if prefix space produces a new word token
+            if (not add_prefix_space) or (
+                add_prefix_space
+                and prompt_token_ids[0] != bad_words_ids[-1][0]
+                and len(prompt_token_ids) == len(bad_words_ids[-1])
+            ):
+                bad_words_ids.append(prompt_token_ids)
+    return [NoBadWordsLogitsProcessor(bad_words_ids=bad_words_ids)]
+class NoBadWordsLogitsProcessor:
+    _SMALLEST_LOGIT = float("-inf")
+    _NEUTRAL_LOGIT = 0.0
+    def __init__(self, bad_words_ids: list[list[int]]):
+        self.bad_words_ids = bad_words_ids
+        self.word_bias: torch.FloatTensor = None
+    def __call__(
+        self,
+        past_tokens_ids: Sequence[int],
+        logits: torch.FloatTensor,
+    ) -> torch.Tensor:
+        if self.word_bias is None:
+            self._init_word_bias(logits=logits)
+        last_token_bias = torch.zeros_like(logits)
+        for bad_word_ids in self.bad_words_ids:
+            if len(bad_word_ids) == 1:  # 1-token words already processed
+                continue
+            if len(bad_word_ids) > len(past_tokens_ids) + 1:
+                continue
+            prefix_length = len(bad_word_ids) - 1
+            last_token_id = bad_word_ids[-1]
+            actual_prefix = past_tokens_ids[-prefix_length:]
+            expected_prefix = bad_word_ids[:prefix_length]
+            assert len(actual_prefix) == len(expected_prefix)
+            is_match = tuple(actual_prefix) == tuple(expected_prefix)
+            last_token_bias[last_token_id] += (
+                self._SMALLEST_LOGIT if is_match else self._NEUTRAL_LOGIT
+            )
+        logits = logits + self.word_bias + last_token_bias
+        return logits
+    def _init_word_bias(self, logits: torch.FloatTensor) -> None:
+        # Code based on NoBadWordsLogitsProcessor and SequenceBiasLogitsProcessor  # noqa: E501
+        # from https://github.com/huggingface/transformers/blob/main/src/transformers/generation/logits_process.py
+        vocab_size = logits.shape[-1]
+        self._check_token_ids_bounds(vocab_size=vocab_size)
+        self.word_bias = torch.zeros(
+            (vocab_size,), dtype=torch.float, device=logits.device
+        )
+        for bad_word_ids in self.bad_words_ids:
+            if len(bad_word_ids) == 1:
+                bad_word_id = bad_word_ids[-1]
+                self.word_bias[bad_word_id] = self._SMALLEST_LOGIT
+    def _check_token_ids_bounds(self, vocab_size: int) -> None:
+        invalid_token_ids = []
+        for bad_word_ids in self.bad_words_ids:
+            for token_id in bad_word_ids:
+                if token_id < 0 or token_id >= vocab_size:
+                    invalid_token_ids.append(token_id)
+        if len(invalid_token_ids) > 0:
+            raise ValueError(
+                f"The model vocabulary size is {vocab_size},"
+                f" but the following tokens"
+                f" were specified as bad: {invalid_token_ids}."
+                f" All token id values should be integers satisfying:"
+                f" 0 <= token_id < {vocab_size}."
+            )