traccia 0.1.2__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. traccia/__init__.py +73 -0
  2. traccia/auto.py +748 -0
  3. traccia/auto_instrumentation.py +74 -0
  4. traccia/cli.py +349 -0
  5. traccia/config.py +699 -0
  6. traccia/context/__init__.py +33 -0
  7. traccia/context/context.py +67 -0
  8. traccia/context/propagators.py +283 -0
  9. traccia/errors.py +48 -0
  10. traccia/exporter/__init__.py +8 -0
  11. traccia/exporter/console_exporter.py +31 -0
  12. traccia/exporter/file_exporter.py +178 -0
  13. traccia/exporter/http_exporter.py +214 -0
  14. traccia/exporter/otlp_exporter.py +190 -0
  15. traccia/instrumentation/__init__.py +26 -0
  16. traccia/instrumentation/anthropic.py +92 -0
  17. traccia/instrumentation/decorator.py +263 -0
  18. traccia/instrumentation/fastapi.py +38 -0
  19. traccia/instrumentation/http_client.py +21 -0
  20. traccia/instrumentation/http_server.py +25 -0
  21. traccia/instrumentation/openai.py +358 -0
  22. traccia/instrumentation/requests.py +68 -0
  23. traccia/integrations/__init__.py +39 -0
  24. traccia/integrations/langchain/__init__.py +14 -0
  25. traccia/integrations/langchain/callback.py +418 -0
  26. traccia/integrations/langchain/utils.py +129 -0
  27. traccia/integrations/openai_agents/__init__.py +73 -0
  28. traccia/integrations/openai_agents/processor.py +262 -0
  29. traccia/pricing_config.py +58 -0
  30. traccia/processors/__init__.py +35 -0
  31. traccia/processors/agent_enricher.py +159 -0
  32. traccia/processors/batch_processor.py +140 -0
  33. traccia/processors/cost_engine.py +71 -0
  34. traccia/processors/cost_processor.py +70 -0
  35. traccia/processors/drop_policy.py +44 -0
  36. traccia/processors/logging_processor.py +31 -0
  37. traccia/processors/rate_limiter.py +223 -0
  38. traccia/processors/sampler.py +22 -0
  39. traccia/processors/token_counter.py +216 -0
  40. traccia/runtime_config.py +127 -0
  41. traccia/tracer/__init__.py +15 -0
  42. traccia/tracer/otel_adapter.py +577 -0
  43. traccia/tracer/otel_utils.py +24 -0
  44. traccia/tracer/provider.py +155 -0
  45. traccia/tracer/span.py +286 -0
  46. traccia/tracer/span_context.py +16 -0
  47. traccia/tracer/tracer.py +243 -0
  48. traccia/utils/__init__.py +19 -0
  49. traccia/utils/helpers.py +95 -0
  50. {traccia-0.1.2.dist-info → traccia-0.1.6.dist-info}/METADATA +72 -15
  51. traccia-0.1.6.dist-info/RECORD +55 -0
  52. traccia-0.1.6.dist-info/top_level.txt +1 -0
  53. traccia-0.1.2.dist-info/RECORD +0 -6
  54. traccia-0.1.2.dist-info/top_level.txt +0 -1
  55. {traccia-0.1.2.dist-info → traccia-0.1.6.dist-info}/WHEEL +0 -0
  56. {traccia-0.1.2.dist-info → traccia-0.1.6.dist-info}/entry_points.txt +0 -0
  57. {traccia-0.1.2.dist-info → traccia-0.1.6.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,71 @@
1
+ """Cost calculation based on model pricing and token usage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, Optional, Tuple
6
+
7
+ DEFAULT_PRICING: Dict[str, Dict[str, float]] = {
8
+ # prices per 1k tokens
9
+ "gpt-4": {"prompt": 0.03, "completion": 0.06},
10
+ "gpt-4o": {"prompt": 0.005, "completion": 0.015},
11
+ "gpt-3.5-turbo": {"prompt": 0.0015, "completion": 0.002},
12
+ "claude-3-opus": {"prompt": 0.015, "completion": 0.075},
13
+ "claude-3-sonnet": {"prompt": 0.003, "completion": 0.015},
14
+ # Add other models via pricing overrides:
15
+ # - env: AGENT_DASHBOARD_PRICING_JSON='{"model": {"prompt": x, "completion": y}}'
16
+ # - start_tracing(pricing_override={...})
17
+ }
18
+
19
+ def _lookup_price(model: str, table: Dict[str, Dict[str, float]]) -> Optional[Tuple[str, Dict[str, float]]]:
20
+ """
21
+ Return (matched_key, price_dict) for a given model name.
22
+
23
+ Supports exact matches and prefix matches for version-suffixed model names:
24
+ e.g. "claude-3-opus-20240229" -> "claude-3-opus"
25
+ "gpt-4o-2024-08-06" -> "gpt-4o"
26
+ """
27
+ if not model:
28
+ return None
29
+ m = str(model).strip()
30
+ if not m:
31
+ return None
32
+ # exact (case sensitive + lower)
33
+ if m in table:
34
+ return m, table[m]
35
+ ml = m.lower()
36
+ if ml in table:
37
+ return ml, table[ml]
38
+ # prefix match (longest key wins)
39
+ for key in sorted(table.keys(), key=len, reverse=True):
40
+ if ml.startswith(key.lower()):
41
+ return key, table[key]
42
+ return None
43
+
44
+
45
+ def match_pricing_model_key(
46
+ model: str, pricing_table: Optional[Dict[str, Dict[str, float]]] = None
47
+ ) -> Optional[str]:
48
+ """Return the pricing table key that would be used for `model`, if any."""
49
+ table = pricing_table or DEFAULT_PRICING
50
+ matched = _lookup_price(model, table)
51
+ if not matched:
52
+ return None
53
+ key, _ = matched
54
+ return key
55
+
56
+
57
+ def compute_cost(
58
+ model: str,
59
+ prompt_tokens: int,
60
+ completion_tokens: int,
61
+ pricing_table: Optional[Dict[str, Dict[str, float]]] = None,
62
+ ) -> Optional[float]:
63
+ table = pricing_table or DEFAULT_PRICING
64
+ matched = _lookup_price(model, table)
65
+ if not matched:
66
+ return None
67
+ _, price = matched
68
+ prompt_cost = (prompt_tokens / 1000.0) * price.get("prompt", 0.0)
69
+ completion_cost = (completion_tokens / 1000.0) * price.get("completion", 0.0)
70
+ return round(prompt_cost + completion_cost, 6)
71
+
@@ -0,0 +1,70 @@
1
+ """Span processor that annotates spans with cost based on token usage and pricing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, Optional
6
+
7
+ from traccia.processors.cost_engine import compute_cost, match_pricing_model_key
8
+ from traccia.tracer.provider import SpanProcessor
9
+
10
+
11
+ class CostAnnotatingProcessor(SpanProcessor):
12
+ """
13
+ Adds `llm.cost.usd` to spans when token usage and model info are available.
14
+
15
+ Expects spans to carry:
16
+ - llm.model (model name)
17
+ - llm.usage.prompt_tokens
18
+ - llm.usage.completion_tokens
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ pricing_table: Optional[Dict[str, Dict[str, float]]] = None,
24
+ *,
25
+ pricing_source: str = "default",
26
+ ) -> None:
27
+ self.pricing_table = pricing_table or {}
28
+ self.pricing_source = pricing_source
29
+
30
+ def on_end(self, span) -> None:
31
+ if "llm.cost.usd" in (span.attributes or {}):
32
+ return
33
+ model = span.attributes.get("llm.model")
34
+ prompt = span.attributes.get("llm.usage.prompt_tokens")
35
+ completion = span.attributes.get("llm.usage.completion_tokens")
36
+ # Anthropic-style names (also supported)
37
+ if prompt is None:
38
+ prompt = span.attributes.get("llm.usage.input_tokens")
39
+ if completion is None:
40
+ completion = span.attributes.get("llm.usage.output_tokens")
41
+ if not model or prompt is None or completion is None:
42
+ return
43
+ cost = compute_cost(
44
+ model,
45
+ int(prompt),
46
+ int(completion),
47
+ pricing_table=self.pricing_table,
48
+ )
49
+ if cost is not None:
50
+ span.set_attribute("llm.cost.usd", cost)
51
+ # Provenance for downstream analysis.
52
+ span.set_attribute("llm.cost.source", span.attributes.get("llm.usage.source", "unknown"))
53
+ span.set_attribute("llm.pricing.source", self.pricing_source)
54
+ key = match_pricing_model_key(model, self.pricing_table)
55
+ if key:
56
+ span.set_attribute("llm.pricing.model_key", key)
57
+
58
+ def shutdown(self) -> None:
59
+ return None
60
+
61
+ def force_flush(self, timeout: Optional[float] = None) -> None:
62
+ return None
63
+
64
+ def update_pricing_table(
65
+ self, pricing_table: Dict[str, Dict[str, float]], pricing_source: Optional[str] = None
66
+ ) -> None:
67
+ self.pricing_table = pricing_table
68
+ if pricing_source:
69
+ self.pricing_source = pricing_source
70
+
@@ -0,0 +1,44 @@
1
+ """Queue overflow handling strategies for span buffering."""
2
+
3
+ from collections import deque
4
+ from typing import Deque
5
+
6
+ from traccia.tracer.span import Span
7
+
8
+
9
+ class DropPolicy:
10
+ """Base policy deciding how to handle span queue overflow."""
11
+
12
+ def handle(self, queue: Deque[Span], span: Span, max_size: int) -> bool:
13
+ """
14
+ Apply the drop policy.
15
+
16
+ Returns True if the span was enqueued, False if it was dropped.
17
+ """
18
+ raise NotImplementedError
19
+
20
+
21
+ class DropOldestPolicy(DropPolicy):
22
+ """Drop the oldest span to make room for a new one."""
23
+
24
+ def handle(self, queue: Deque[Span], span: Span, max_size: int) -> bool:
25
+ if len(queue) >= max_size and queue:
26
+ queue.popleft()
27
+ if len(queue) < max_size:
28
+ queue.append(span)
29
+ return True
30
+ return False
31
+
32
+
33
+ class DropNewestPolicy(DropPolicy):
34
+ """Drop the incoming span if the queue is full."""
35
+
36
+ def handle(self, queue: Deque[Span], span: Span, max_size: int) -> bool:
37
+ if len(queue) < max_size:
38
+ queue.append(span)
39
+ return True
40
+ return False
41
+
42
+
43
+ DEFAULT_DROP_POLICY = DropOldestPolicy()
44
+
@@ -0,0 +1,31 @@
1
+ """Span processor that logs spans when they end."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import Optional
7
+
8
+ from traccia.tracer.provider import SpanProcessor
9
+
10
+
11
+ class LoggingSpanProcessor(SpanProcessor):
12
+ """Logs span summary on end using the standard logging module."""
13
+
14
+ def __init__(self, logger: Optional[logging.Logger] = None) -> None:
15
+ self.logger = logger or logging.getLogger("traccia.traces")
16
+
17
+ def on_end(self, span) -> None:
18
+ attrs = span.attributes or {}
19
+ msg = (
20
+ f"[trace] name={span.name} trace_id={span.context.trace_id} "
21
+ f"span_id={span.context.span_id} status={span.status.name} "
22
+ f"duration_ns={span.duration_ns} attrs={attrs}"
23
+ )
24
+ self.logger.info(msg)
25
+
26
+ def shutdown(self) -> None:
27
+ return None
28
+
29
+ def force_flush(self, timeout: Optional[float] = None) -> None:
30
+ return None
31
+
@@ -0,0 +1,223 @@
1
+ """Rate limiting processor for span export."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import threading
7
+ import time
8
+ from collections import deque
9
+ from typing import Optional
10
+
11
+ from opentelemetry.sdk.trace import ReadableSpan
12
+
13
+ from traccia.errors import RateLimitError
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class RateLimiter:
19
+ """
20
+ Token bucket rate limiter with hybrid blocking/dropping behavior.
21
+
22
+ Features:
23
+ - Token bucket algorithm for smooth rate limiting
24
+ - Short blocking period before dropping spans
25
+ - Detailed logging of dropped spans
26
+ - Thread-safe implementation
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ max_spans_per_second: Optional[float] = None,
32
+ max_block_ms: int = 100,
33
+ ):
34
+ """
35
+ Initialize rate limiter.
36
+
37
+ Args:
38
+ max_spans_per_second: Maximum spans per second (None = unlimited)
39
+ max_block_ms: Maximum milliseconds to block before dropping
40
+ """
41
+ self.max_spans_per_second = max_spans_per_second
42
+ self.max_block_ms = max_block_ms
43
+ self.enabled = max_spans_per_second is not None and max_spans_per_second > 0
44
+
45
+ # Token bucket state
46
+ self._tokens: float = max_spans_per_second or 0
47
+ self._max_tokens: float = max_spans_per_second or 0
48
+ self._last_refill_time: float = time.time()
49
+ self._lock = threading.Lock()
50
+
51
+ # Stats
52
+ self._total_spans = 0
53
+ self._dropped_spans = 0
54
+ self._blocked_spans = 0
55
+
56
+ # Recent timestamps for sliding window (backup)
57
+ self._recent_timestamps: deque = deque()
58
+ self._window_seconds = 1.0
59
+
60
+ def acquire(self, span: Optional[ReadableSpan] = None) -> bool:
61
+ """
62
+ Try to acquire permission to process a span.
63
+
64
+ Returns True if span should be processed, False if it should be dropped.
65
+
66
+ Behavior:
67
+ 1. If unlimited (disabled), always return True
68
+ 2. Try to acquire a token immediately
69
+ 3. If no token, block for up to max_block_ms
70
+ 4. If still no token after blocking, drop and return False
71
+
72
+ Args:
73
+ span: Optional span for logging purposes
74
+
75
+ Returns:
76
+ True if span should be processed, False if dropped
77
+ """
78
+ if not self.enabled:
79
+ return True
80
+
81
+ self._total_spans += 1
82
+
83
+ with self._lock:
84
+ # Refill tokens based on elapsed time
85
+ self._refill_tokens()
86
+
87
+ # Try to acquire immediately
88
+ if self._tokens >= 1.0:
89
+ self._tokens -= 1.0
90
+ return True
91
+
92
+ # No tokens available, try blocking
93
+ if self.max_block_ms > 0:
94
+ block_start = time.time()
95
+ blocked_ms = 0
96
+
97
+ while blocked_ms < self.max_block_ms:
98
+ # Release lock briefly to allow other threads
99
+ self._lock.release()
100
+ time.sleep(0.001) # Sleep 1ms
101
+ self._lock.acquire()
102
+
103
+ # Refill and try again
104
+ self._refill_tokens()
105
+ if self._tokens >= 1.0:
106
+ self._tokens -= 1.0
107
+ self._blocked_spans += 1
108
+ return True
109
+
110
+ blocked_ms = (time.time() - block_start) * 1000
111
+
112
+ # Still no tokens after blocking - drop the span
113
+ self._dropped_spans += 1
114
+
115
+ # Log dropped span
116
+ span_name = span.name if span else "unknown"
117
+ logger.warning(
118
+ f"Rate limit exceeded - dropping span '{span_name}'. "
119
+ f"Total dropped: {self._dropped_spans}/{self._total_spans} "
120
+ f"({self._dropped_spans / self._total_spans * 100:.1f}%)"
121
+ )
122
+
123
+ return False
124
+
125
+ def _refill_tokens(self) -> None:
126
+ """Refill tokens based on elapsed time (token bucket algorithm)."""
127
+ now = time.time()
128
+ elapsed = now - self._last_refill_time
129
+
130
+ if elapsed > 0:
131
+ # Add tokens based on rate and elapsed time
132
+ new_tokens = elapsed * self.max_spans_per_second
133
+ self._tokens = min(self._max_tokens, self._tokens + new_tokens)
134
+ self._last_refill_time = now
135
+
136
+ def get_stats(self) -> dict:
137
+ """Get rate limiting statistics."""
138
+ with self._lock:
139
+ drop_rate = (self._dropped_spans / self._total_spans * 100) if self._total_spans > 0 else 0
140
+ return {
141
+ "enabled": self.enabled,
142
+ "max_spans_per_second": self.max_spans_per_second,
143
+ "total_spans": self._total_spans,
144
+ "dropped_spans": self._dropped_spans,
145
+ "blocked_spans": self._blocked_spans,
146
+ "drop_rate_percent": round(drop_rate, 2),
147
+ "current_tokens": round(self._tokens, 2),
148
+ }
149
+
150
+ def reset_stats(self) -> None:
151
+ """Reset statistics counters."""
152
+ with self._lock:
153
+ self._total_spans = 0
154
+ self._dropped_spans = 0
155
+ self._blocked_spans = 0
156
+
157
+
158
+ class RateLimitingSpanProcessor:
159
+ """
160
+ Span processor that enforces rate limiting before passing to next processor.
161
+
162
+ This should be added early in the processor chain to drop spans before
163
+ they consume resources in downstream processors.
164
+ """
165
+
166
+ def __init__(
167
+ self,
168
+ next_processor,
169
+ max_spans_per_second: Optional[float] = None,
170
+ max_block_ms: int = 100,
171
+ ):
172
+ """
173
+ Initialize rate limiting processor.
174
+
175
+ Args:
176
+ next_processor: Next processor in the chain
177
+ max_spans_per_second: Maximum spans per second (None = unlimited)
178
+ max_block_ms: Maximum milliseconds to block before dropping
179
+ """
180
+ self.next_processor = next_processor
181
+ self.rate_limiter = RateLimiter(
182
+ max_spans_per_second=max_spans_per_second,
183
+ max_block_ms=max_block_ms,
184
+ )
185
+
186
+ def on_start(self, span, parent_context=None):
187
+ """Called when span starts - pass through to next processor."""
188
+ if self.next_processor and hasattr(self.next_processor, 'on_start'):
189
+ self.next_processor.on_start(span, parent_context)
190
+
191
+ def on_end(self, span):
192
+ """
193
+ Called when span ends - check rate limit before passing to next processor.
194
+
195
+ If rate limit is exceeded, span is dropped and not passed to next processor.
196
+ """
197
+ # Check rate limit
198
+ if not self.rate_limiter.acquire(span):
199
+ # Span dropped - don't pass to next processor
200
+ return
201
+
202
+ # Pass to next processor
203
+ if self.next_processor and hasattr(self.next_processor, 'on_end'):
204
+ self.next_processor.on_end(span)
205
+
206
+ def shutdown(self):
207
+ """Shutdown processor and log final stats."""
208
+ stats = self.rate_limiter.get_stats()
209
+ if stats["enabled"] and stats["dropped_spans"] > 0:
210
+ logger.info(
211
+ f"Rate limiter shutdown. Final stats: "
212
+ f"{stats['dropped_spans']}/{stats['total_spans']} spans dropped "
213
+ f"({stats['drop_rate_percent']}%)"
214
+ )
215
+
216
+ if self.next_processor and hasattr(self.next_processor, 'shutdown'):
217
+ self.next_processor.shutdown()
218
+
219
+ def force_flush(self, timeout_millis: int = 30000):
220
+ """Force flush - pass through to next processor."""
221
+ if self.next_processor and hasattr(self.next_processor, 'force_flush'):
222
+ return self.next_processor.force_flush(timeout_millis)
223
+ return True
@@ -0,0 +1,22 @@
1
+ """Sampling decisions for traces."""
2
+
3
+ import random
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class SamplingResult:
9
+ sampled: bool
10
+
11
+
12
+ class Sampler:
13
+ """Head-based sampler using a fixed probability."""
14
+
15
+ def __init__(self, sample_rate: float = 1.0) -> None:
16
+ if not 0.0 <= sample_rate <= 1.0:
17
+ raise ValueError("sample_rate must be between 0.0 and 1.0")
18
+ self.sample_rate = sample_rate
19
+
20
+ def should_sample(self) -> SamplingResult:
21
+ return SamplingResult(sampled=random.random() <= self.sample_rate)
22
+
@@ -0,0 +1,216 @@
1
+ """Token counting utilities and processor for spans with LLM usage.
2
+
3
+ Best practice:
4
+ - Prefer provider-reported usage tokens when available.
5
+ - Otherwise, estimate with the vendor tokenizer when available (tiktoken for
6
+ OpenAI) and record the estimate source on the span.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any, Dict, Optional, Tuple
12
+
13
+ from traccia.tracer.provider import SpanProcessor
14
+
15
+ try: # optional dependency for accurate counting
16
+ import tiktoken # type: ignore
17
+ except Exception: # pragma: no cover
18
+ tiktoken = None # fallback to heuristic
19
+
20
+
21
+ MODEL_TO_ENCODING = {
22
+ # OpenAI mappings (approximate; kept current as of gpt-4o family)
23
+ "gpt-4o": "o200k_base",
24
+ "gpt-4o-mini": "o200k_base",
25
+ "gpt-4": "cl100k_base",
26
+ "gpt-3.5-turbo": "cl100k_base",
27
+ }
28
+
29
+
30
+ def _encoding_for_model(model: Optional[str]):
31
+ if tiktoken is None:
32
+ return None
33
+ if not model:
34
+ return None
35
+ m = str(model)
36
+ # First try tiktoken's model registry (best when available).
37
+ try:
38
+ return tiktoken.encoding_for_model(m)
39
+ except Exception:
40
+ pass
41
+ # Then try our explicit mapping, supporting version-suffixed models by prefix.
42
+ encoding_name = MODEL_TO_ENCODING.get(m)
43
+ if encoding_name is None:
44
+ for key in sorted(MODEL_TO_ENCODING.keys(), key=len, reverse=True):
45
+ if m.startswith(key):
46
+ encoding_name = MODEL_TO_ENCODING[key]
47
+ break
48
+ if encoding_name:
49
+ try:
50
+ return tiktoken.get_encoding(encoding_name)
51
+ except Exception:
52
+ return None
53
+ return None
54
+
55
+
56
+ def _count_with_tiktoken(text: str, model: Optional[str]) -> Optional[int]:
57
+ if tiktoken is None or not text:
58
+ return None
59
+ encoding = _encoding_for_model(model)
60
+ if encoding is None:
61
+ return None
62
+ try:
63
+ return len(encoding.encode(text))
64
+ except Exception:
65
+ return None
66
+
67
+
68
+ def estimate_tokens_from_text(text: str, model: Optional[str] = None) -> int:
69
+ """
70
+ Estimate tokens. Prefer model-accurate count via tiktoken when available,
71
+ otherwise fall back to a rough whitespace split.
72
+ """
73
+ if not text:
74
+ return 0
75
+ exact = _count_with_tiktoken(text, model)
76
+ if exact is not None:
77
+ return exact
78
+ return len(text.split())
79
+
80
+
81
+ def estimate_tokens_from_text_with_source(
82
+ text: str, model: Optional[str] = None
83
+ ) -> Tuple[int, str]:
84
+ """
85
+ Return (token_count, source) where source is:
86
+ - "estimated.tiktoken"
87
+ - "estimated.heuristic"
88
+ """
89
+ if not text:
90
+ return 0, "estimated.heuristic"
91
+ exact = _count_with_tiktoken(text, model)
92
+ if exact is not None:
93
+ return exact, "estimated.tiktoken"
94
+ return len(text.split()), "estimated.heuristic"
95
+
96
+
97
+ def _openai_chat_overhead(model: Optional[str]) -> Tuple[int, int, int]:
98
+ """
99
+ Return (tokens_per_message, tokens_per_name, tokens_for_reply).
100
+
101
+ These constants are model-dependent in OpenAI's chat format. For estimation
102
+ we use a reasonable default that is close for many modern chat models.
103
+ """
104
+ # Defaults (works reasonably for gpt-4/4o families as an estimate)
105
+ return 3, 1, 3
106
+
107
+
108
+ def estimate_openai_chat_prompt_tokens_with_source(
109
+ messages: Any, model: Optional[str] = None
110
+ ) -> Optional[Tuple[int, str]]:
111
+ """
112
+ Estimate prompt tokens from a list of chat messages.
113
+
114
+ This is best-effort and should be treated as an estimate unless provider
115
+ usage is available.
116
+ """
117
+ if not isinstance(messages, (list, tuple)) or not messages:
118
+ return None
119
+ if tiktoken is None:
120
+ # Heuristic fallback: count whitespace tokens across role/content.
121
+ parts = []
122
+ for msg in list(messages)[:50]:
123
+ if not isinstance(msg, dict):
124
+ continue
125
+ role = msg.get("role") or ""
126
+ content = msg.get("content") or ""
127
+ if not isinstance(content, str):
128
+ content = str(content)
129
+ parts.append(f"{role} {content}".strip())
130
+ text = "\n".join([p for p in parts if p])
131
+ return (len(text.split()), "estimated.chat_heuristic")
132
+ try:
133
+ encoding = _encoding_for_model(model) or tiktoken.get_encoding("cl100k_base")
134
+ except Exception:
135
+ return None
136
+
137
+ tokens_per_message, tokens_per_name, tokens_for_reply = _openai_chat_overhead(model)
138
+ total = 0
139
+ for msg in list(messages)[:50]:
140
+ if not isinstance(msg, dict):
141
+ continue
142
+ total += tokens_per_message
143
+ role = msg.get("role") or ""
144
+ name = msg.get("name")
145
+ content = msg.get("content") or ""
146
+ if not isinstance(content, str):
147
+ content = str(content)
148
+ total += len(encoding.encode(str(role)))
149
+ total += len(encoding.encode(content))
150
+ if name:
151
+ total += tokens_per_name
152
+ total += len(encoding.encode(str(name)))
153
+ total += tokens_for_reply
154
+ return total, "estimated.tiktoken_chat"
155
+
156
+
157
+ class TokenCountingProcessor(SpanProcessor):
158
+ """
159
+ A processor that infers token counts when not provided by the LLM response.
160
+ It prefers a model-specific tokenizer (tiktoken) when available.
161
+ """
162
+
163
+ def on_end(self, span) -> None:
164
+ prompt = span.attributes.get("llm.prompt")
165
+ completion = span.attributes.get("llm.completion")
166
+ model = span.attributes.get("llm.model")
167
+ openai_messages = span.attributes.get("llm.openai.messages")
168
+
169
+ wrote_any = False
170
+ wrote_prompt = False
171
+ wrote_completion = False
172
+
173
+ if "llm.usage.prompt_tokens" not in span.attributes:
174
+ # Prefer chat-structure estimation when available.
175
+ est = estimate_openai_chat_prompt_tokens_with_source(openai_messages, model)
176
+ if est is not None:
177
+ count, source = est
178
+ span.set_attribute("llm.usage.prompt_tokens", count)
179
+ span.set_attribute("llm.usage.prompt_source", source)
180
+ wrote_any = True
181
+ wrote_prompt = True
182
+ elif isinstance(prompt, str):
183
+ count, source = estimate_tokens_from_text_with_source(prompt, model)
184
+ span.set_attribute("llm.usage.prompt_tokens", count)
185
+ span.set_attribute("llm.usage.prompt_source", source)
186
+ wrote_any = True
187
+ wrote_prompt = True
188
+
189
+ if "llm.usage.completion_tokens" not in span.attributes and isinstance(completion, str):
190
+ count, source = estimate_tokens_from_text_with_source(completion, model)
191
+ span.set_attribute("llm.usage.completion_tokens", count)
192
+ span.set_attribute("llm.usage.completion_source", source)
193
+ wrote_any = True
194
+ wrote_completion = True
195
+
196
+ # Synthesize overall usage source if not provided by instrumentation.
197
+ if wrote_any and "llm.usage.source" not in span.attributes:
198
+ ps = span.attributes.get("llm.usage.prompt_source")
199
+ cs = span.attributes.get("llm.usage.completion_source")
200
+ if ps and cs and ps == cs:
201
+ span.set_attribute("llm.usage.source", ps)
202
+ elif ps or cs:
203
+ span.set_attribute("llm.usage.source", "mixed")
204
+
205
+ # If provider already marked usage as provider_usage, and we filled any missing
206
+ # fields, mark it as mixed.
207
+ if wrote_any and span.attributes.get("llm.usage.source") == "provider_usage":
208
+ if wrote_prompt or wrote_completion:
209
+ span.set_attribute("llm.usage.source", "mixed")
210
+
211
+ def shutdown(self) -> None:
212
+ return None
213
+
214
+ def force_flush(self, timeout: Optional[float] = None) -> None:
215
+ return None
216
+