PyPI - vllm-cpu-amxbf16 - Versions diffs - 0.11.2.post2__cp310-cp310-manylinux_2_17_x86_64.whl - Mend

vllm-cpu-amxbf16 0.11.2.post2__cp310-cp310-manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1536) hide show

vllm/v1/metrics/prometheus.py ADDED Viewed

@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
+import tempfile
+from prometheus_client import REGISTRY, CollectorRegistry, multiprocess
+from vllm.logger import init_logger
+logger = init_logger(__name__)
+# Global temporary directory for prometheus multiprocessing
+_prometheus_multiproc_dir: tempfile.TemporaryDirectory | None = None
+def setup_multiprocess_prometheus():
+    """Set up prometheus multiprocessing directory if not already configured."""
+    global _prometheus_multiproc_dir
+    if "PROMETHEUS_MULTIPROC_DIR" not in os.environ:
+        # Make TemporaryDirectory for prometheus multiprocessing
+        # Note: global TemporaryDirectory will be automatically
+        # cleaned up upon exit.
+        _prometheus_multiproc_dir = tempfile.TemporaryDirectory()
+        os.environ["PROMETHEUS_MULTIPROC_DIR"] = _prometheus_multiproc_dir.name
+        logger.debug(
+            "Created PROMETHEUS_MULTIPROC_DIR at %s", _prometheus_multiproc_dir.name
+        )
+    else:
+        logger.warning(
+            "Found PROMETHEUS_MULTIPROC_DIR was set by user. "
+            "This directory must be wiped between vLLM runs or "
+            "you will find inaccurate metrics. Unset the variable "
+            "and vLLM will properly handle cleanup."
+        )
+def get_prometheus_registry() -> CollectorRegistry:
+    """Get the appropriate prometheus registry based on multiprocessing
+    configuration.
+    Returns:
+        Registry: A prometheus registry
+    """
+    if os.getenv("PROMETHEUS_MULTIPROC_DIR") is not None:
+        logger.debug("Using multiprocess registry for prometheus metrics")
+        registry = CollectorRegistry()
+        multiprocess.MultiProcessCollector(registry)
+        return registry
+    return REGISTRY
+def unregister_vllm_metrics():
+    """Unregister any existing vLLM collectors from the prometheus registry.
+    This is useful for testing and CI/CD where metrics may be registered
+    multiple times across test runs.
+    Also, in case of multiprocess, we need to unregister the metrics from the
+    global registry.
+    """
+    registry = REGISTRY
+    # Unregister any existing vLLM collectors
+    for collector in list(registry._collector_to_names):
+        if hasattr(collector, "_name") and "vllm" in collector._name:
+            registry.unregister(collector)
+def shutdown_prometheus():
+    """Shutdown prometheus metrics."""
+    path = _prometheus_multiproc_dir
+    if path is None:
+        return
+    try:
+        pid = os.getpid()
+        multiprocess.mark_process_dead(pid, path)
+        logger.debug("Marked Prometheus metrics for process %d as dead", pid)
+    except Exception as e:
+        logger.error("Error during metrics cleanup: %s", str(e))

vllm/v1/metrics/ray_wrappers.py ADDED Viewed

@@ -0,0 +1,169 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import time
+from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorPrometheus
+from vllm.v1.metrics.loggers import PrometheusStatLogger
+from vllm.v1.spec_decode.metrics import SpecDecodingProm
+try:
+    from ray.util import metrics as ray_metrics
+    from ray.util.metrics import Metric
+except ImportError:
+    ray_metrics = None
+import regex as re
+class RayPrometheusMetric:
+    def __init__(self):
+        if ray_metrics is None:
+            raise ImportError("RayPrometheusMetric requires Ray to be installed.")
+        self.metric: Metric = None
+    def labels(self, *labels, **labelskwargs):
+        if labelskwargs:
+            for k, v in labelskwargs.items():
+                if not isinstance(v, str):
+                    labelskwargs[k] = str(v)
+            self.metric.set_default_tags(labelskwargs)
+        if labels:
+            if len(labels) != len(self.metric._tag_keys):
+                raise ValueError(
+                    "Number of labels must match the number of tag keys. "
+                    f"Expected {len(self.metric._tag_keys)}, got {len(labels)}"
+                )
+            self.metric.set_default_tags(dict(zip(self.metric._tag_keys, labels)))
+        return self
+    @staticmethod
+    def _get_sanitized_opentelemetry_name(name: str) -> str:
+        """
+        For compatibility with Ray + OpenTelemetry, the metric name must be
+        sanitized. In particular, this replaces disallowed character (e.g., ':')
+        with '_' in the metric name.
+        Allowed characters: a-z, A-Z, 0-9, _
+        # ruff: noqa: E501
+        Ref: https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/src/metrics/instrument_metadata_validator.cc#L22-L23
+        Ref: https://github.com/ray-project/ray/blob/master/src/ray/stats/metric.cc#L107
+        """
+        return re.sub(r"[^a-zA-Z0-9_]", "_", name)
+class RayGaugeWrapper(RayPrometheusMetric):
+    """Wraps around ray.util.metrics.Gauge to provide same API as
+    prometheus_client.Gauge"""
+    def __init__(
+        self,
+        name: str,
+        documentation: str | None = "",
+        labelnames: list[str] | None = None,
+        multiprocess_mode: str | None = "",
+    ):
+        # All Ray metrics are keyed by WorkerId, so multiprocess modes like
+        # "mostrecent", "all", "sum" do not apply. This logic can be manually
+        # implemented at the observability layer (Prometheus/Grafana).
+        del multiprocess_mode
+        labelnames_tuple = tuple(labelnames) if labelnames else None
+        name = self._get_sanitized_opentelemetry_name(name)
+        self.metric = ray_metrics.Gauge(
+            name=name, description=documentation, tag_keys=labelnames_tuple
+        )
+    def set(self, value: int | float):
+        return self.metric.set(value)
+    def set_to_current_time(self):
+        # ray metrics doesn't have set_to_current time, https://docs.ray.io/en/latest/_modules/ray/util/metrics.html
+        return self.metric.set(time.time())
+class RayCounterWrapper(RayPrometheusMetric):
+    """Wraps around ray.util.metrics.Counter to provide same API as
+    prometheus_client.Counter"""
+    def __init__(
+        self,
+        name: str,
+        documentation: str | None = "",
+        labelnames: list[str] | None = None,
+    ):
+        labelnames_tuple = tuple(labelnames) if labelnames else None
+        name = self._get_sanitized_opentelemetry_name(name)
+        self.metric = ray_metrics.Counter(
+            name=name, description=documentation, tag_keys=labelnames_tuple
+        )
+    def inc(self, value: int | float = 1.0):
+        if value == 0:
+            return
+        return self.metric.inc(value)
+class RayHistogramWrapper(RayPrometheusMetric):
+    """Wraps around ray.util.metrics.Histogram to provide same API as
+    prometheus_client.Histogram"""
+    def __init__(
+        self,
+        name: str,
+        documentation: str | None = "",
+        labelnames: list[str] | None = None,
+        buckets: list[float] | None = None,
+    ):
+        labelnames_tuple = tuple(labelnames) if labelnames else None
+        name = self._get_sanitized_opentelemetry_name(name)
+        boundaries = buckets if buckets else []
+        self.metric = ray_metrics.Histogram(
+            name=name,
+            description=documentation,
+            tag_keys=labelnames_tuple,
+            boundaries=boundaries,
+        )
+    def observe(self, value: int | float):
+        return self.metric.observe(value)
+class RaySpecDecodingProm(SpecDecodingProm):
+    """
+    RaySpecDecodingProm is used by RayMetrics to log to Ray metrics.
+    Provides the same metrics as SpecDecodingProm but uses Ray's
+    util.metrics library.
+    """
+    _counter_cls = RayCounterWrapper
+class RayKVConnectorPrometheus(KVConnectorPrometheus):
+    """
+    RayKVConnectorPrometheus is used by RayMetrics to log Ray
+    metrics. Provides the same metrics as KV connectors but
+    uses Ray's util.metrics library.
+    """
+    _gauge_cls = RayGaugeWrapper
+    _counter_cls = RayCounterWrapper
+    _histogram_cls = RayHistogramWrapper
+class RayPrometheusStatLogger(PrometheusStatLogger):
+    """RayPrometheusStatLogger uses Ray metrics instead."""
+    _gauge_cls = RayGaugeWrapper
+    _counter_cls = RayCounterWrapper
+    _histogram_cls = RayHistogramWrapper
+    _spec_decoding_cls = RaySpecDecodingProm
+    _kv_connector_cls = RayKVConnectorPrometheus
+    @staticmethod
+    def _unregister_vllm_metrics():
+        # No-op on purpose
+        pass

vllm/v1/metrics/reader.py ADDED Viewed

@@ -0,0 +1,257 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass
+from prometheus_client import REGISTRY
+from prometheus_client import Metric as PromMetric
+from prometheus_client.samples import Sample
+@dataclass
+class Metric:
+    """A base class for prometheus metrics.
+    Each metric may be associated with key=value labels, and
+    in some cases a single vLLM instance may have multiple
+    metrics with the same name but different sets of labels.
+    """
+    name: str
+    labels: dict[str, str]
+@dataclass
+class Counter(Metric):
+    """A monotonically increasing integer counter."""
+    value: int
+@dataclass
+class Vector(Metric):
+    """An ordered array of integer counters.
+    This type - which doesn't exist in Prometheus - models one very
+    specific metric, vllm:spec_decode_num_accepted_tokens_per_pos.
+    """
+    values: list[int]
+@dataclass
+class Gauge(Metric):
+    """A numerical value that can go up or down."""
+    value: float
+@dataclass
+class Histogram(Metric):
+    """Observations recorded in configurable buckets.
+    Buckets are represented by a dictionary. The key is
+    the upper limit of the bucket, and the value is the
+    observed count in that bucket. A '+Inf' key always
+    exists.
+    The count property is the total count across all
+    buckets, identical to the count of the '+Inf' bucket.
+    The sum property is the total sum of all observed
+    values.
+    """
+    count: int
+    sum: float
+    buckets: dict[str, int]
+def get_metrics_snapshot() -> list[Metric]:
+    """An API for accessing in-memory Prometheus metrics.
+    Example:
+        >>> for metric in llm.get_metrics():
+        ...     if isinstance(metric, Counter):
+        ...         print(f"{metric} = {metric.value}")
+        ...     elif isinstance(metric, Gauge):
+        ...         print(f"{metric} = {metric.value}")
+        ...     elif isinstance(metric, Histogram):
+        ...         print(f"{metric}")
+        ...         print(f"    sum = {metric.sum}")
+        ...         print(f"    count = {metric.count}")
+        ...         for bucket_le, value in metrics.buckets.items():
+        ...             print(f"    {bucket_le} = {value}")
+    """
+    collected: list[Metric] = []
+    for metric in REGISTRY.collect():
+        if not metric.name.startswith("vllm:"):
+            continue
+        if metric.type == "gauge":
+            samples = _get_samples(metric)
+            for s in samples:
+                collected.append(
+                    Gauge(name=metric.name, labels=s.labels, value=s.value)
+                )
+        elif metric.type == "counter":
+            samples = _get_samples(metric, "_total")
+            if metric.name == "vllm:spec_decode_num_accepted_tokens_per_pos":
+                #
+                # Ugly vllm:num_accepted_tokens_per_pos special case.
+                #
+                # This metric is a vector of counters - for each spec
+                # decoding token position, we observe the number of
+                # accepted tokens using a Counter labeled with 'position'.
+                # We convert these into a vector of integer values.
+                #
+                for labels, values in _digest_num_accepted_by_pos_samples(samples):
+                    collected.append(
+                        Vector(name=metric.name, labels=labels, values=values)
+                    )
+            else:
+                for s in samples:
+                    collected.append(
+                        Counter(name=metric.name, labels=s.labels, value=int(s.value))
+                    )
+        elif metric.type == "histogram":
+            #
+            # A histogram has a number of '_bucket' samples where
+            # the 'le' label represents the upper limit of the bucket.
+            # We convert these bucketized values into a dict of values
+            # indexed by the value of the 'le' label. The 'le=+Inf'
+            # label is a special case, catching all values observed.
+            #
+            bucket_samples = _get_samples(metric, "_bucket")
+            count_samples = _get_samples(metric, "_count")
+            sum_samples = _get_samples(metric, "_sum")
+            for labels, buckets, count_value, sum_value in _digest_histogram(
+                bucket_samples, count_samples, sum_samples
+            ):
+                collected.append(
+                    Histogram(
+                        name=metric.name,
+                        labels=labels,
+                        buckets=buckets,
+                        count=count_value,
+                        sum=sum_value,
+                    )
+                )
+        else:
+            raise AssertionError(f"Unknown metric type {metric.type}")
+    return collected
+def _get_samples(metric: PromMetric, suffix: str | None = None) -> list[Sample]:
+    name = (metric.name + suffix) if suffix is not None else metric.name
+    return [s for s in metric.samples if s.name == name]
+def _strip_label(labels: dict[str, str], key_to_remove: str) -> dict[str, str]:
+    labels_copy = labels.copy()
+    labels_copy.pop(key_to_remove)
+    return labels_copy
+def _digest_histogram(
+    bucket_samples: list[Sample], count_samples: list[Sample], sum_samples: list[Sample]
+) -> list[tuple[dict[str, str], dict[str, int], int, float]]:
+    #
+    # In the case of DP, we have an indigestable
+    # per-bucket-per-engine count as a list of labelled
+    # samples, along with total and sum samples
+    #
+    # bucket_samples (in):
+    #   labels = {bucket: 100, idx: 0}, value = 2
+    #   labels = {bucket: 200, idx: 0}, value = 4
+    #   labels = {bucket: Inf, idx: 0}, value = 10
+    #   labels = {bucket: 100, idx: 1}, value = 1
+    #   labels = {bucket: 200, idx: 2}, value = 5
+    #   labels = {bucket: Inf, idx: 3}, value = 7
+    # count_samples (in):
+    #   labels = {idx: 0}, value = 10
+    #   labels = {idx: 1}, value = 7
+    # sum_samples (in):
+    #   labels = {idx: 0}, value = 2000
+    #   labels = {idx: 1}, value = 1200
+    #
+    # output: [
+    #   {idx: 0}, {"100": 2, "200": 4, "Inf": 10}, 10, 2000
+    #   {idx: 1}, {"100": 1, "200": 5, "Inf": 7},   7, 1200
+    # ]
+    buckets_by_labels: dict[frozenset[tuple[str, str]], dict[str, int]] = {}
+    for s in bucket_samples:
+        bucket = s.labels["le"]
+        labels_key = frozenset(_strip_label(s.labels, "le").items())
+        if labels_key not in buckets_by_labels:
+            buckets_by_labels[labels_key] = {}
+        buckets_by_labels[labels_key][bucket] = int(s.value)
+    counts_by_labels: dict[frozenset[tuple[str, str]], int] = {}
+    for s in count_samples:
+        labels_key = frozenset(s.labels.items())
+        counts_by_labels[labels_key] = int(s.value)
+    sums_by_labels: dict[frozenset[tuple[str, str]], float] = {}
+    for s in sum_samples:
+        labels_key = frozenset(s.labels.items())
+        sums_by_labels[labels_key] = s.value
+    assert (
+        set(buckets_by_labels.keys())
+        == set(counts_by_labels.keys())
+        == set(sums_by_labels.keys())
+    )
+    output = []
+    label_keys = list(buckets_by_labels.keys())
+    for k in label_keys:
+        labels = dict(k)
+        output.append(
+            (labels, buckets_by_labels[k], counts_by_labels[k], sums_by_labels[k])
+        )
+    return output
+def _digest_num_accepted_by_pos_samples(
+    samples: list[Sample],
+) -> list[tuple[dict[str, str], list[int]]]:
+    #
+    # In the case of DP, we have an indigestable
+    # per-position-per-engine count as a list of
+    # labelled samples
+    #
+    # samples (in):
+    #   labels = {pos: 0, idx: 0}, value = 10
+    #   labels = {pos: 1, idx: 0}, value = 7
+    #   labels = {pos: 2, idx: 0}, value = 2
+    #   labels = {pos: 0, idx: 1}, value = 5
+    #   labels = {pos: 1, idx: 1}, value = 3
+    #   labels = {pos: 2, idx: 1}, value = 1
+    #
+    # output: [
+    #   {idx: 0}, [10, 7, 2]
+    #   {idx: 1}, [5, 3, 1]
+    # ]
+    #
+    max_pos = 0
+    values_by_labels: dict[frozenset[tuple[str, str]], dict[int, int]] = {}
+    for s in samples:
+        position = int(s.labels["position"])
+        max_pos = max(max_pos, position)
+        labels_key = frozenset(_strip_label(s.labels, "position").items())
+        if labels_key not in values_by_labels:
+            values_by_labels[labels_key] = {}
+        values_by_labels[labels_key][position] = int(s.value)
+    output = []
+    for labels_key, values_by_position in values_by_labels.items():
+        labels = dict(labels_key)
+        values = [0] * (max_pos + 1)
+        for pos, val in values_by_position.items():
+            values[pos] = val
+        output.append((labels, values))
+    return output