PyPI - spanforge - Versions diffs - 2.0.0__py3-none-any.whl - Mend

spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

spanforge/__init__.py +695 -0
spanforge/_batch_exporter.py +322 -0
spanforge/_cli.py +3081 -0
spanforge/_hooks.py +340 -0
spanforge/_server.py +953 -0
spanforge/_span.py +1015 -0
spanforge/_store.py +287 -0
spanforge/_stream.py +654 -0
spanforge/_trace.py +334 -0
spanforge/_tracer.py +253 -0
spanforge/actor.py +141 -0
spanforge/alerts.py +464 -0
spanforge/auto.py +181 -0
spanforge/baseline.py +336 -0
spanforge/config.py +460 -0
spanforge/consent.py +227 -0
spanforge/consumer.py +379 -0
spanforge/core/__init__.py +5 -0
spanforge/core/compliance_mapping.py +1060 -0
spanforge/cost.py +597 -0
spanforge/debug.py +514 -0
spanforge/drift.py +488 -0
spanforge/egress.py +63 -0
spanforge/eval.py +575 -0
spanforge/event.py +1052 -0
spanforge/exceptions.py +246 -0
spanforge/explain.py +181 -0
spanforge/export/__init__.py +50 -0
spanforge/export/append_only.py +342 -0
spanforge/export/cloud.py +349 -0
spanforge/export/datadog.py +495 -0
spanforge/export/grafana.py +331 -0
spanforge/export/jsonl.py +198 -0
spanforge/export/otel_bridge.py +291 -0
spanforge/export/otlp.py +817 -0
spanforge/export/otlp_bridge.py +231 -0
spanforge/export/redis_backend.py +282 -0
spanforge/export/webhook.py +302 -0
spanforge/exporters/__init__.py +29 -0
spanforge/exporters/console.py +271 -0
spanforge/exporters/jsonl.py +144 -0
spanforge/hitl.py +297 -0
spanforge/inspect.py +429 -0
spanforge/integrations/__init__.py +39 -0
spanforge/integrations/_pricing.py +277 -0
spanforge/integrations/anthropic.py +388 -0
spanforge/integrations/bedrock.py +306 -0
spanforge/integrations/crewai.py +251 -0
spanforge/integrations/gemini.py +349 -0
spanforge/integrations/groq.py +444 -0
spanforge/integrations/langchain.py +349 -0
spanforge/integrations/llamaindex.py +370 -0
spanforge/integrations/ollama.py +286 -0
spanforge/integrations/openai.py +370 -0
spanforge/integrations/together.py +485 -0
spanforge/metrics.py +393 -0
spanforge/metrics_export.py +342 -0
spanforge/migrate.py +278 -0
spanforge/model_registry.py +282 -0
spanforge/models.py +407 -0
spanforge/namespaces/__init__.py +215 -0
spanforge/namespaces/audit.py +253 -0
spanforge/namespaces/cache.py +209 -0
spanforge/namespaces/chain.py +74 -0
spanforge/namespaces/confidence.py +69 -0
spanforge/namespaces/consent.py +85 -0
spanforge/namespaces/cost.py +175 -0
spanforge/namespaces/decision.py +135 -0
spanforge/namespaces/diff.py +146 -0
spanforge/namespaces/drift.py +79 -0
spanforge/namespaces/eval_.py +232 -0
spanforge/namespaces/fence.py +180 -0
spanforge/namespaces/guard.py +104 -0
spanforge/namespaces/hitl.py +92 -0
spanforge/namespaces/latency.py +69 -0
spanforge/namespaces/prompt.py +185 -0
spanforge/namespaces/redact.py +172 -0
spanforge/namespaces/template.py +197 -0
spanforge/namespaces/tool_call.py +76 -0
spanforge/namespaces/trace.py +1006 -0
spanforge/normalizer.py +183 -0
spanforge/presidio_backend.py +149 -0
spanforge/processor.py +258 -0
spanforge/prompt_registry.py +415 -0
spanforge/py.typed +0 -0
spanforge/redact.py +780 -0
spanforge/sampling.py +500 -0
spanforge/schemas/v1.0/schema.json +170 -0
spanforge/schemas/v2.0/schema.json +536 -0
spanforge/signing.py +1152 -0
spanforge/stream.py +559 -0
spanforge/testing.py +376 -0
spanforge/trace.py +199 -0
spanforge/types.py +696 -0
spanforge/ulid.py +304 -0
spanforge/validate.py +383 -0
spanforge-2.0.0.dist-info/METADATA +1777 -0
spanforge-2.0.0.dist-info/RECORD +101 -0
spanforge-2.0.0.dist-info/WHEEL +4 -0
spanforge-2.0.0.dist-info/entry_points.txt +5 -0
spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0

spanforge/normalizer.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""spanforge.normalizer — ProviderNormalizer Protocol and GenericNormalizer.
+Defines the :class:`ProviderNormalizer` structural protocol (RFC-0001 §10.4)
+that provider-specific integration modules must satisfy, plus a
+:class:`GenericNormalizer` fallback that handles OpenAI-compatible,
+Anthropic-compatible, and raw ``dict`` response shapes without requiring
+any vendored SDK.
+Usage
+-----
+::
+    from spanforge.normalizer import GenericNormalizer
+    normalizer = GenericNormalizer()
+    token_usage, model_info, cost = normalizer.normalize_response(raw_response)
+RFC reference
+-------------
+RFC-0001-SPANFORGE §10.4 — Provider Normalizer interface mandate.
+"""
+from __future__ import annotations
+from typing import Any, Protocol, runtime_checkable
+from spanforge.namespaces.trace import CostBreakdown, ModelInfo, TokenUsage
+__all__: list[str] = ["ProviderNormalizer", "GenericNormalizer"]
+# ---------------------------------------------------------------------------
+# Protocol
+# ---------------------------------------------------------------------------
+@runtime_checkable
+class ProviderNormalizer(Protocol):
+    """Structural protocol for provider-specific response normalizers.
+    Any object implementing this single-method interface can be used as a
+    drop-in normalizer within the SpanForge instrumentation pipeline.  No
+    base class is required — structural (duck-typed) conformance is enough.
+    Implementors
+    ------------
+    * :class:`GenericNormalizer` — OpenAI-compatible + Anthropic-compatible
+      shapes; zero-dependency fallback.
+    * ``spanforge.integrations.openai.OpenAINormalizer`` (when available)
+    * ``spanforge.integrations.anthropic.AnthropicNormalizer`` (when available)
+    """
+    def normalize_response(
+        self,
+        response: object,
+    ) -> tuple[TokenUsage, ModelInfo, CostBreakdown | None]:
+        """Extract :class:`~spanforge.namespaces.trace.TokenUsage`,
+        :class:`~spanforge.namespaces.trace.ModelInfo`, and optionally
+        :class:`~spanforge.namespaces.trace.CostBreakdown` from a raw LLM
+        provider response object.
+        Parameters
+        ----------
+        response:
+            Raw response object or dict from a provider SDK call.
+        Returns
+        -------
+        tuple[TokenUsage, ModelInfo, CostBreakdown | None]
+            A 3-tuple of typed value objects.  ``CostBreakdown`` will be
+            ``None`` when pricing data is unavailable.
+        """
+        ...  # pragma: no cover — Protocol method, never called directly.
+# ---------------------------------------------------------------------------
+# Generic fallback implementation
+# ---------------------------------------------------------------------------
+_UNKNOWN = "_custom"
+def _get(obj: Any, *keys: str, default: Any = None) -> Any:
+    """Attribute-then-dict key lookup — tolerates both objects and dicts."""
+    for key in keys:
+        if obj is None:
+            return default
+        if isinstance(obj, dict):
+            obj = obj.get(key)
+        else:
+            obj = getattr(obj, key, None)
+    return obj if obj is not None else default
+class GenericNormalizer:
+    """Zero-dependency fallback normalizer for common LLM response shapes.
+    Supports three structural layouts without requiring any provider SDK:
+    1. **OpenAI-compatible** — ``response.usage.{prompt_tokens,
+       completion_tokens, total_tokens}``, ``response.model``.
+    2. **Anthropic-compatible** — ``response.usage.{input_tokens,
+       output_tokens}``, ``response.model``.
+    3. **Raw dict** — any dict with keys from either layout above.
+    When neither layout matches, sensible zero-value defaults are returned
+    so the caller always gets a valid :class:`~spanforge.namespaces.trace.TokenUsage`
+    regardless of the provider response shape.
+    """
+    def normalize_response(
+        self,
+        response: object,
+    ) -> tuple[TokenUsage, ModelInfo, CostBreakdown | None]:
+        """Normalise *response* into typed SpanForge value objects.
+        Parameters
+        ----------
+        response:
+            Raw provider response — may be a dataclass, SDK response object,
+            or plain ``dict``.
+        Returns
+        -------
+        tuple[TokenUsage, ModelInfo, CostBreakdown | None]
+            Typed value objects; ``CostBreakdown`` is always ``None`` (pricing
+            data requires a :class:`~spanforge.namespaces.trace.PricingTier`
+            which this generic normalizer does not possess).
+        """
+        usage = _get(response, "usage")
+        # ---------- token counts ----------
+        # OpenAI layout: prompt_tokens / completion_tokens / total_tokens
+        # Anthropic layout: input_tokens / output_tokens
+        input_tokens: int = int(
+            _get(usage, "prompt_tokens", default=0)
+            or _get(usage, "input_tokens", default=0)
+            or 0
+        )
+        output_tokens: int = int(
+            _get(usage, "completion_tokens", default=0)
+            or _get(usage, "output_tokens", default=0)
+            or 0
+        )
+        total_tokens: int = int(
+            _get(usage, "total_tokens", default=0)
+            or (input_tokens + output_tokens)
+        )
+        cached_tokens: int = int(
+            _get(usage, "cached_tokens", default=0)
+            or _get(usage, "cache_read_input_tokens", default=0)
+            or 0
+        )
+        cache_creation_tokens: int = int(
+            _get(usage, "cache_creation_input_tokens", default=0) or 0
+        )
+        reasoning_tokens: int = int(
+            _get(usage, "reasoning_tokens", default=0) or 0
+        )
+        token_usage = TokenUsage(
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            total_tokens=total_tokens,
+            cached_tokens=cached_tokens if cached_tokens else None,
+            cache_creation_tokens=cache_creation_tokens if cache_creation_tokens else None,
+            reasoning_tokens=reasoning_tokens if reasoning_tokens else None,
+        )
+        # ---------- model info ----------
+        model_name: str = str(
+            _get(response, "model", default="")
+            or _get(response, "model_id", default="")
+            or "unknown"
+        )
+        model_info = ModelInfo(
+            system=_UNKNOWN,
+            name=model_name,
+            response_model=model_name,
+        )
+        return token_usage, model_info, None

spanforge/presidio_backend.py ADDED Viewed

@@ -0,0 +1,149 @@
+"""spanforge.presidio_backend — Optional Presidio-powered PII detection backend.
+Wraps Microsoft Presidio AnalyzerEngine to provide entity recognition that
+is more accurate than regex-only scanning.  Falls back gracefully if the
+``presidio-analyzer`` package is not installed.
+Install with::
+    pip install "spanforge[presidio]"
+Usage::
+    from spanforge.presidio_backend import presidio_scan_payload, is_available
+    if is_available():
+        result = presidio_scan_payload({"message": "My SSN is 123-45-6789"})
+        print(result.clean)  # False
+The result is a standard :class:`~spanforge.redact.PIIScanResult`, fully
+compatible with the built-in regex scanner.
+"""
+from __future__ import annotations
+from collections.abc import Mapping
+from typing import Any
+from spanforge.redact import PIIScanHit, PIIScanResult
+__all__ = [
+    "is_available",
+    "presidio_scan_payload",
+]
+# ---------------------------------------------------------------------------
+# Availability check
+# ---------------------------------------------------------------------------
+def is_available() -> bool:
+    """Return ``True`` if the ``presidio-analyzer`` package is importable."""
+    try:
+        import presidio_analyzer  # type: ignore[import-untyped]  # noqa: PLC0415, F401
+        return True
+    except ImportError:
+        return False
+# Map Presidio entity types to SpanForge PII labels / sensitivity
+_ENTITY_MAP: dict[str, tuple[str, str]] = {
+    "CREDIT_CARD": ("credit_card", "high"),
+    "CRYPTO": ("crypto_address", "medium"),
+    "EMAIL_ADDRESS": ("email", "medium"),
+    "IBAN_CODE": ("iban", "high"),
+    "IP_ADDRESS": ("ip_address", "low"),
+    "LOCATION": ("location", "low"),
+    "PERSON": ("person_name", "medium"),
+    "PHONE_NUMBER": ("phone", "medium"),
+    "US_SSN": ("ssn", "high"),
+    "UK_NHS": ("uk_nhs", "high"),
+    "US_DRIVER_LICENSE": ("us_driver_license", "high"),
+    "US_PASSPORT": ("us_passport", "high"),
+    "IN_AADHAAR": ("aadhaar", "high"),
+    "IN_PAN": ("pan", "high"),
+    "NRP": ("nationality", "low"),
+    "MEDICAL_LICENSE": ("medical_license", "medium"),
+    "URL": ("url", "low"),
+    "DATE_TIME": ("date_time", "low"),
+}
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def presidio_scan_payload(
+    payload: dict[str, Any],
+    *,
+    language: str = "en",
+    score_threshold: float = 0.5,
+    max_depth: int = 10,
+) -> PIIScanResult:
+    """Scan a payload dict for PII using Microsoft Presidio.
+    Walks the payload recursively (up to *max_depth*), analysing every string
+    value with the Presidio ``AnalyzerEngine``.
+    **Security**: detected values are never returned — only the entity type,
+    path, count, and sensitivity level.
+    Args:
+        payload:          The dictionary to scan.
+        language:         Language code for analysis (default: ``"en"``).
+        score_threshold:  Minimum Presidio confidence score (default: 0.5).
+        max_depth:        Maximum nesting depth (default: 10).
+    Returns:
+        A :class:`~spanforge.redact.PIIScanResult` summarising detections.
+    Raises:
+        ImportError: If ``presidio-analyzer`` is not installed.
+    """
+    try:
+        from presidio_analyzer import AnalyzerEngine  # type: ignore[import-untyped]  # noqa: PLC0415
+    except ImportError as exc:
+        raise ImportError(
+            "The 'presidio-analyzer' package is required for the Presidio backend.\n"
+            "Install it with: pip install 'spanforge[presidio]'"
+        ) from exc
+    analyzer = AnalyzerEngine()
+    hits: list[PIIScanHit] = []
+    scanned = 0
+    def _walk(obj: Any, path: str, depth: int) -> None:  # noqa: ANN401
+        nonlocal scanned
+        if depth > max_depth:
+            return
+        if isinstance(obj, str):
+            scanned += 1
+            results = analyzer.analyze(
+                text=obj,
+                language=language,
+                score_threshold=score_threshold,
+            )
+            # Group by entity type
+            entity_counts: dict[str, int] = {}
+            for r in results:
+                entity_counts[r.entity_type] = entity_counts.get(r.entity_type, 0) + 1
+            for entity_type, count in entity_counts.items():
+                label, sensitivity = _ENTITY_MAP.get(
+                    entity_type, (entity_type.lower(), "medium")
+                )
+                hits.append(PIIScanHit(
+                    pii_type=label,
+                    path=path,
+                    match_count=count,
+                    sensitivity=sensitivity,
+                ))
+        elif isinstance(obj, Mapping):
+            for k, v in obj.items():
+                _walk(v, f"{path}.{k}" if path else str(k), depth + 1)
+        elif isinstance(obj, (list, tuple)):
+            for i, v in enumerate(obj):
+                _walk(v, f"{path}[{i}]", depth + 1)
+    _walk(payload, "", 0)
+    return PIIScanResult(hits=hits, scanned=scanned)

spanforge/processor.py ADDED Viewed

@@ -0,0 +1,258 @@
+"""spanforge.processor — Span processor pipeline (RFC-0001 §18).
+Span processors let users hook into the span lifecycle **before** and
+**after** a span is exported.  Common uses:
+* Attribute enrichment (e.g. add ``k8s.pod_name`` to every span)
+* Redaction of sensitive fields (complementing built-in :class:`~spanforge.redact.RedactionPolicy`)
+* Custom metrics counters / latency histograms
+* Distributed context propagation helpers
+Usage::
+    from spanforge import configure
+    from spanforge.processor import SpanProcessor, ProcessorChain
+    class EnrichProcessor(SpanProcessor):
+        def on_start(self, span) -> None:
+            span.set_attribute("service.region", "us-east-1")
+        def on_end(self, span) -> None:
+            # span is already finalised with status / duration
+            if span.status == "error":
+                span.set_attribute("alert.triggered", True)
+    configure(span_processors=[EnrichProcessor()])
+Processors receive the *live* :class:`~spanforge._span.Span` object.
+Mutations made in ``on_start`` are visible to user code inside the ``with``
+block.  Mutations made in ``on_end`` appear in the exported payload.
+Thread-safety
+-------------
+Processors are called from the thread that owns the span context manager, so
+they run in the same thread/task as the user code.  Processors MUST NOT
+block the event loop; long-running work should be dispatched to a background
+thread or asyncio task.
+Error handling
+--------------
+Exceptions propagating from a processor are silently caught so that a buggy
+processor never aborts user code.  Errors are logged at ``WARNING`` level.
+"""
+from __future__ import annotations
+import logging
+import threading
+from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
+if TYPE_CHECKING:
+    from spanforge._span import Span
+__all__ = [
+    "NoopSpanProcessor",
+    "ProcessorChain",
+    "SpanProcessor",
+    "add_processor",
+    "clear_processors",
+]
+_proc_logger = logging.getLogger("spanforge.processor")
+# ---------------------------------------------------------------------------
+# Protocol
+# ---------------------------------------------------------------------------
+@runtime_checkable
+class SpanProcessor(Protocol):
+    """Protocol implemented by all span processors.
+    Both methods are optional — a processor that only enriches on start can
+    omit ``on_end``, and vice-versa.  The default no-op implementations
+    defined in this protocol mean partial implementations work correctly.
+    """
+    def on_start(self, span: "Span") -> None:
+        """Called synchronously immediately after the span is created.
+        The span has been pushed onto the context stack and its start time
+        recorded.  Attributes may be freely added or mutated here.
+        Args:
+            span: The newly created :class:`~spanforge._span.Span` (mutable).
+        """
+        ...
+    def on_end(self, span: "Span") -> None:
+        """Called synchronously after the span is finalised but before export.
+        ``span.end_ns``, ``span.duration_ms``, and ``span.status`` are all
+        set by the time this method runs.  Attributes may still be mutated
+        and will appear in the exported :class:`~spanforge.namespaces.trace.SpanPayload`.
+        Args:
+            span: The finalised :class:`~spanforge._span.Span` (still mutable).
+        """
+        ...
+# ---------------------------------------------------------------------------
+# No-op implementation (default)
+# ---------------------------------------------------------------------------
+class NoopSpanProcessor:
+    """Span processor that does nothing.  Used as the default."""
+    def on_start(self, span: "Span") -> None:
+        pass
+    def on_end(self, span: "Span") -> None:
+        pass
+# ---------------------------------------------------------------------------
+# Processor chain
+# ---------------------------------------------------------------------------
+class ProcessorChain:
+    """An ordered chain of :class:`SpanProcessor` implementations.
+    Processors are called in insertion order for ``on_start`` and in the
+    **same** order for ``on_end``.  Errors are caught per-processor so a
+    bug in one processor does not prevent subsequent processors from running.
+    Args:
+        processors: Initial list of processors.
+    Example::
+        chain = ProcessorChain([EnrichProcessor(), RedactProcessor()])
+        chain.on_start(span)
+        # ... later ...
+        chain.on_end(span)
+    """
+    def __init__(self, processors: list[Any] | None = None) -> None:
+        self._processors: list[Any] = list(processors or [])
+        self._lock = threading.Lock()
+    def add(self, processor: Any) -> None:  # noqa: ANN401
+        """Append *processor* to the chain."""
+        with self._lock:
+            self._processors.append(processor)
+    def remove(self, processor: Any) -> None:  # noqa: ANN401
+        """Remove *processor* from the chain (no-op if not present)."""
+        with self._lock:
+            try:
+                self._processors.remove(processor)
+            except ValueError:
+                pass
+    def clear(self) -> None:
+        """Remove all processors from the chain."""
+        with self._lock:
+            self._processors.clear()
+    def on_start(self, span: "Span") -> None:
+        """Fire ``on_start`` on all processors in order."""
+        with self._lock:
+            procs = list(self._processors)  # snapshot to avoid holding lock during callbacks
+        for proc in procs:
+            try:
+                proc.on_start(span)
+            except Exception as exc:  # NOSONAR
+                _proc_logger.warning(
+                    "SpanProcessor.on_start error in %r: %s", type(proc).__name__, exc
+                )
+    def on_end(self, span: "Span") -> None:
+        """Fire ``on_end`` on all processors in order."""
+        with self._lock:
+            procs = list(self._processors)  # snapshot to avoid holding lock during callbacks
+        for proc in procs:
+            try:
+                proc.on_end(span)
+            except Exception as exc:  # NOSONAR
+                _proc_logger.warning(
+                    "SpanProcessor.on_end error in %r: %s", type(proc).__name__, exc
+                )
+    def __len__(self) -> int:
+        with self._lock:
+            return len(self._processors)
+    def __repr__(self) -> str:
+        with self._lock:
+            names = [type(p).__name__ for p in self._processors]
+        return f"ProcessorChain({names!r})"
+# ---------------------------------------------------------------------------
+# Module-level helpers — called from _span.py
+# ---------------------------------------------------------------------------
+def _run_on_start(span: "Span") -> None:
+    """Fire ``on_start`` on all processors registered in the active config."""
+    try:
+        from spanforge.config import get_config  # noqa: PLC0415
+        processors = get_config().span_processors
+    except Exception:  # NOSONAR
+        return
+    for proc in processors:
+        try:
+            proc.on_start(span)
+        except Exception as exc:  # NOSONAR
+            _proc_logger.warning(
+                "SpanProcessor.on_start error in %r: %s", type(proc).__name__, exc
+            )
+def _run_on_end(span: "Span") -> None:
+    """Fire ``on_end`` on all processors registered in the active config."""
+    try:
+        from spanforge.config import get_config  # noqa: PLC0415
+        processors = get_config().span_processors
+    except Exception:  # NOSONAR
+        return
+    for proc in processors:
+        try:
+            proc.on_end(span)
+        except Exception as exc:  # NOSONAR
+            _proc_logger.warning(
+                "SpanProcessor.on_end error in %r: %s", type(proc).__name__, exc
+            )
+def add_processor(processor: Any) -> None:  # noqa: ANN401
+    """Append *processor* to the global span processor list in the active config.
+    Convenience wrapper around ``configure(span_processors=[...])``.
+    Args:
+        processor: Any object implementing :class:`SpanProcessor` protocol.
+    Example::
+        from spanforge.processor import add_processor, SpanProcessor
+        class Enricher(SpanProcessor):
+            def on_start(self, span): span.set_attribute("region", "eu-west-1")
+            def on_end(self, span): pass
+        add_processor(Enricher())
+    """
+    from spanforge.config import get_config  # noqa: PLC0415
+    get_config().span_processors.append(processor)
+def clear_processors() -> None:
+    """Remove all span processors from the active config."""
+    from spanforge.config import get_config  # noqa: PLC0415
+    get_config().span_processors.clear()