traccia 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- traccia/__init__.py +73 -0
- traccia/auto.py +736 -0
- traccia/auto_instrumentation.py +74 -0
- traccia/cli.py +349 -0
- traccia/config.py +693 -0
- traccia/context/__init__.py +33 -0
- traccia/context/context.py +67 -0
- traccia/context/propagators.py +283 -0
- traccia/errors.py +48 -0
- traccia/exporter/__init__.py +8 -0
- traccia/exporter/console_exporter.py +31 -0
- traccia/exporter/file_exporter.py +178 -0
- traccia/exporter/http_exporter.py +214 -0
- traccia/exporter/otlp_exporter.py +190 -0
- traccia/instrumentation/__init__.py +20 -0
- traccia/instrumentation/anthropic.py +92 -0
- traccia/instrumentation/decorator.py +263 -0
- traccia/instrumentation/fastapi.py +38 -0
- traccia/instrumentation/http_client.py +21 -0
- traccia/instrumentation/http_server.py +25 -0
- traccia/instrumentation/openai.py +178 -0
- traccia/instrumentation/requests.py +68 -0
- traccia/integrations/__init__.py +22 -0
- traccia/integrations/langchain/__init__.py +14 -0
- traccia/integrations/langchain/callback.py +418 -0
- traccia/integrations/langchain/utils.py +129 -0
- traccia/pricing_config.py +58 -0
- traccia/processors/__init__.py +35 -0
- traccia/processors/agent_enricher.py +159 -0
- traccia/processors/batch_processor.py +140 -0
- traccia/processors/cost_engine.py +71 -0
- traccia/processors/cost_processor.py +70 -0
- traccia/processors/drop_policy.py +44 -0
- traccia/processors/logging_processor.py +31 -0
- traccia/processors/rate_limiter.py +223 -0
- traccia/processors/sampler.py +22 -0
- traccia/processors/token_counter.py +216 -0
- traccia/runtime_config.py +106 -0
- traccia/tracer/__init__.py +15 -0
- traccia/tracer/otel_adapter.py +577 -0
- traccia/tracer/otel_utils.py +24 -0
- traccia/tracer/provider.py +155 -0
- traccia/tracer/span.py +286 -0
- traccia/tracer/span_context.py +16 -0
- traccia/tracer/tracer.py +243 -0
- traccia/utils/__init__.py +19 -0
- traccia/utils/helpers.py +95 -0
- {traccia-0.1.2.dist-info → traccia-0.1.5.dist-info}/METADATA +32 -15
- traccia-0.1.5.dist-info/RECORD +53 -0
- traccia-0.1.5.dist-info/top_level.txt +1 -0
- traccia-0.1.2.dist-info/RECORD +0 -6
- traccia-0.1.2.dist-info/top_level.txt +0 -1
- {traccia-0.1.2.dist-info → traccia-0.1.5.dist-info}/WHEEL +0 -0
- {traccia-0.1.2.dist-info → traccia-0.1.5.dist-info}/entry_points.txt +0 -0
- {traccia-0.1.2.dist-info → traccia-0.1.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""Rate limiting processor for span export."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
from collections import deque
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
12
|
+
|
|
13
|
+
from traccia.errors import RateLimitError
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RateLimiter:
|
|
19
|
+
"""
|
|
20
|
+
Token bucket rate limiter with hybrid blocking/dropping behavior.
|
|
21
|
+
|
|
22
|
+
Features:
|
|
23
|
+
- Token bucket algorithm for smooth rate limiting
|
|
24
|
+
- Short blocking period before dropping spans
|
|
25
|
+
- Detailed logging of dropped spans
|
|
26
|
+
- Thread-safe implementation
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
max_spans_per_second: Optional[float] = None,
|
|
32
|
+
max_block_ms: int = 100,
|
|
33
|
+
):
|
|
34
|
+
"""
|
|
35
|
+
Initialize rate limiter.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
max_spans_per_second: Maximum spans per second (None = unlimited)
|
|
39
|
+
max_block_ms: Maximum milliseconds to block before dropping
|
|
40
|
+
"""
|
|
41
|
+
self.max_spans_per_second = max_spans_per_second
|
|
42
|
+
self.max_block_ms = max_block_ms
|
|
43
|
+
self.enabled = max_spans_per_second is not None and max_spans_per_second > 0
|
|
44
|
+
|
|
45
|
+
# Token bucket state
|
|
46
|
+
self._tokens: float = max_spans_per_second or 0
|
|
47
|
+
self._max_tokens: float = max_spans_per_second or 0
|
|
48
|
+
self._last_refill_time: float = time.time()
|
|
49
|
+
self._lock = threading.Lock()
|
|
50
|
+
|
|
51
|
+
# Stats
|
|
52
|
+
self._total_spans = 0
|
|
53
|
+
self._dropped_spans = 0
|
|
54
|
+
self._blocked_spans = 0
|
|
55
|
+
|
|
56
|
+
# Recent timestamps for sliding window (backup)
|
|
57
|
+
self._recent_timestamps: deque = deque()
|
|
58
|
+
self._window_seconds = 1.0
|
|
59
|
+
|
|
60
|
+
def acquire(self, span: Optional[ReadableSpan] = None) -> bool:
|
|
61
|
+
"""
|
|
62
|
+
Try to acquire permission to process a span.
|
|
63
|
+
|
|
64
|
+
Returns True if span should be processed, False if it should be dropped.
|
|
65
|
+
|
|
66
|
+
Behavior:
|
|
67
|
+
1. If unlimited (disabled), always return True
|
|
68
|
+
2. Try to acquire a token immediately
|
|
69
|
+
3. If no token, block for up to max_block_ms
|
|
70
|
+
4. If still no token after blocking, drop and return False
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
span: Optional span for logging purposes
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
True if span should be processed, False if dropped
|
|
77
|
+
"""
|
|
78
|
+
if not self.enabled:
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
self._total_spans += 1
|
|
82
|
+
|
|
83
|
+
with self._lock:
|
|
84
|
+
# Refill tokens based on elapsed time
|
|
85
|
+
self._refill_tokens()
|
|
86
|
+
|
|
87
|
+
# Try to acquire immediately
|
|
88
|
+
if self._tokens >= 1.0:
|
|
89
|
+
self._tokens -= 1.0
|
|
90
|
+
return True
|
|
91
|
+
|
|
92
|
+
# No tokens available, try blocking
|
|
93
|
+
if self.max_block_ms > 0:
|
|
94
|
+
block_start = time.time()
|
|
95
|
+
blocked_ms = 0
|
|
96
|
+
|
|
97
|
+
while blocked_ms < self.max_block_ms:
|
|
98
|
+
# Release lock briefly to allow other threads
|
|
99
|
+
self._lock.release()
|
|
100
|
+
time.sleep(0.001) # Sleep 1ms
|
|
101
|
+
self._lock.acquire()
|
|
102
|
+
|
|
103
|
+
# Refill and try again
|
|
104
|
+
self._refill_tokens()
|
|
105
|
+
if self._tokens >= 1.0:
|
|
106
|
+
self._tokens -= 1.0
|
|
107
|
+
self._blocked_spans += 1
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
blocked_ms = (time.time() - block_start) * 1000
|
|
111
|
+
|
|
112
|
+
# Still no tokens after blocking - drop the span
|
|
113
|
+
self._dropped_spans += 1
|
|
114
|
+
|
|
115
|
+
# Log dropped span
|
|
116
|
+
span_name = span.name if span else "unknown"
|
|
117
|
+
logger.warning(
|
|
118
|
+
f"Rate limit exceeded - dropping span '{span_name}'. "
|
|
119
|
+
f"Total dropped: {self._dropped_spans}/{self._total_spans} "
|
|
120
|
+
f"({self._dropped_spans / self._total_spans * 100:.1f}%)"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
def _refill_tokens(self) -> None:
|
|
126
|
+
"""Refill tokens based on elapsed time (token bucket algorithm)."""
|
|
127
|
+
now = time.time()
|
|
128
|
+
elapsed = now - self._last_refill_time
|
|
129
|
+
|
|
130
|
+
if elapsed > 0:
|
|
131
|
+
# Add tokens based on rate and elapsed time
|
|
132
|
+
new_tokens = elapsed * self.max_spans_per_second
|
|
133
|
+
self._tokens = min(self._max_tokens, self._tokens + new_tokens)
|
|
134
|
+
self._last_refill_time = now
|
|
135
|
+
|
|
136
|
+
def get_stats(self) -> dict:
|
|
137
|
+
"""Get rate limiting statistics."""
|
|
138
|
+
with self._lock:
|
|
139
|
+
drop_rate = (self._dropped_spans / self._total_spans * 100) if self._total_spans > 0 else 0
|
|
140
|
+
return {
|
|
141
|
+
"enabled": self.enabled,
|
|
142
|
+
"max_spans_per_second": self.max_spans_per_second,
|
|
143
|
+
"total_spans": self._total_spans,
|
|
144
|
+
"dropped_spans": self._dropped_spans,
|
|
145
|
+
"blocked_spans": self._blocked_spans,
|
|
146
|
+
"drop_rate_percent": round(drop_rate, 2),
|
|
147
|
+
"current_tokens": round(self._tokens, 2),
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
def reset_stats(self) -> None:
|
|
151
|
+
"""Reset statistics counters."""
|
|
152
|
+
with self._lock:
|
|
153
|
+
self._total_spans = 0
|
|
154
|
+
self._dropped_spans = 0
|
|
155
|
+
self._blocked_spans = 0
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class RateLimitingSpanProcessor:
|
|
159
|
+
"""
|
|
160
|
+
Span processor that enforces rate limiting before passing to next processor.
|
|
161
|
+
|
|
162
|
+
This should be added early in the processor chain to drop spans before
|
|
163
|
+
they consume resources in downstream processors.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
def __init__(
|
|
167
|
+
self,
|
|
168
|
+
next_processor,
|
|
169
|
+
max_spans_per_second: Optional[float] = None,
|
|
170
|
+
max_block_ms: int = 100,
|
|
171
|
+
):
|
|
172
|
+
"""
|
|
173
|
+
Initialize rate limiting processor.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
next_processor: Next processor in the chain
|
|
177
|
+
max_spans_per_second: Maximum spans per second (None = unlimited)
|
|
178
|
+
max_block_ms: Maximum milliseconds to block before dropping
|
|
179
|
+
"""
|
|
180
|
+
self.next_processor = next_processor
|
|
181
|
+
self.rate_limiter = RateLimiter(
|
|
182
|
+
max_spans_per_second=max_spans_per_second,
|
|
183
|
+
max_block_ms=max_block_ms,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def on_start(self, span, parent_context=None):
|
|
187
|
+
"""Called when span starts - pass through to next processor."""
|
|
188
|
+
if self.next_processor and hasattr(self.next_processor, 'on_start'):
|
|
189
|
+
self.next_processor.on_start(span, parent_context)
|
|
190
|
+
|
|
191
|
+
def on_end(self, span):
|
|
192
|
+
"""
|
|
193
|
+
Called when span ends - check rate limit before passing to next processor.
|
|
194
|
+
|
|
195
|
+
If rate limit is exceeded, span is dropped and not passed to next processor.
|
|
196
|
+
"""
|
|
197
|
+
# Check rate limit
|
|
198
|
+
if not self.rate_limiter.acquire(span):
|
|
199
|
+
# Span dropped - don't pass to next processor
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
# Pass to next processor
|
|
203
|
+
if self.next_processor and hasattr(self.next_processor, 'on_end'):
|
|
204
|
+
self.next_processor.on_end(span)
|
|
205
|
+
|
|
206
|
+
def shutdown(self):
|
|
207
|
+
"""Shutdown processor and log final stats."""
|
|
208
|
+
stats = self.rate_limiter.get_stats()
|
|
209
|
+
if stats["enabled"] and stats["dropped_spans"] > 0:
|
|
210
|
+
logger.info(
|
|
211
|
+
f"Rate limiter shutdown. Final stats: "
|
|
212
|
+
f"{stats['dropped_spans']}/{stats['total_spans']} spans dropped "
|
|
213
|
+
f"({stats['drop_rate_percent']}%)"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
if self.next_processor and hasattr(self.next_processor, 'shutdown'):
|
|
217
|
+
self.next_processor.shutdown()
|
|
218
|
+
|
|
219
|
+
def force_flush(self, timeout_millis: int = 30000):
|
|
220
|
+
"""Force flush - pass through to next processor."""
|
|
221
|
+
if self.next_processor and hasattr(self.next_processor, 'force_flush'):
|
|
222
|
+
return self.next_processor.force_flush(timeout_millis)
|
|
223
|
+
return True
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Sampling decisions for traces."""
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class SamplingResult:
|
|
9
|
+
sampled: bool
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Sampler:
|
|
13
|
+
"""Head-based sampler using a fixed probability."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, sample_rate: float = 1.0) -> None:
|
|
16
|
+
if not 0.0 <= sample_rate <= 1.0:
|
|
17
|
+
raise ValueError("sample_rate must be between 0.0 and 1.0")
|
|
18
|
+
self.sample_rate = sample_rate
|
|
19
|
+
|
|
20
|
+
def should_sample(self) -> SamplingResult:
|
|
21
|
+
return SamplingResult(sampled=random.random() <= self.sample_rate)
|
|
22
|
+
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""Token counting utilities and processor for spans with LLM usage.
|
|
2
|
+
|
|
3
|
+
Best practice:
|
|
4
|
+
- Prefer provider-reported usage tokens when available.
|
|
5
|
+
- Otherwise, estimate with the vendor tokenizer when available (tiktoken for
|
|
6
|
+
OpenAI) and record the estimate source on the span.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Any, Dict, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
from traccia.tracer.provider import SpanProcessor
|
|
14
|
+
|
|
15
|
+
try: # optional dependency for accurate counting
|
|
16
|
+
import tiktoken # type: ignore
|
|
17
|
+
except Exception: # pragma: no cover
|
|
18
|
+
tiktoken = None # fallback to heuristic
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
MODEL_TO_ENCODING = {
|
|
22
|
+
# OpenAI mappings (approximate; kept current as of gpt-4o family)
|
|
23
|
+
"gpt-4o": "o200k_base",
|
|
24
|
+
"gpt-4o-mini": "o200k_base",
|
|
25
|
+
"gpt-4": "cl100k_base",
|
|
26
|
+
"gpt-3.5-turbo": "cl100k_base",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _encoding_for_model(model: Optional[str]):
|
|
31
|
+
if tiktoken is None:
|
|
32
|
+
return None
|
|
33
|
+
if not model:
|
|
34
|
+
return None
|
|
35
|
+
m = str(model)
|
|
36
|
+
# First try tiktoken's model registry (best when available).
|
|
37
|
+
try:
|
|
38
|
+
return tiktoken.encoding_for_model(m)
|
|
39
|
+
except Exception:
|
|
40
|
+
pass
|
|
41
|
+
# Then try our explicit mapping, supporting version-suffixed models by prefix.
|
|
42
|
+
encoding_name = MODEL_TO_ENCODING.get(m)
|
|
43
|
+
if encoding_name is None:
|
|
44
|
+
for key in sorted(MODEL_TO_ENCODING.keys(), key=len, reverse=True):
|
|
45
|
+
if m.startswith(key):
|
|
46
|
+
encoding_name = MODEL_TO_ENCODING[key]
|
|
47
|
+
break
|
|
48
|
+
if encoding_name:
|
|
49
|
+
try:
|
|
50
|
+
return tiktoken.get_encoding(encoding_name)
|
|
51
|
+
except Exception:
|
|
52
|
+
return None
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _count_with_tiktoken(text: str, model: Optional[str]) -> Optional[int]:
|
|
57
|
+
if tiktoken is None or not text:
|
|
58
|
+
return None
|
|
59
|
+
encoding = _encoding_for_model(model)
|
|
60
|
+
if encoding is None:
|
|
61
|
+
return None
|
|
62
|
+
try:
|
|
63
|
+
return len(encoding.encode(text))
|
|
64
|
+
except Exception:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def estimate_tokens_from_text(text: str, model: Optional[str] = None) -> int:
|
|
69
|
+
"""
|
|
70
|
+
Estimate tokens. Prefer model-accurate count via tiktoken when available,
|
|
71
|
+
otherwise fall back to a rough whitespace split.
|
|
72
|
+
"""
|
|
73
|
+
if not text:
|
|
74
|
+
return 0
|
|
75
|
+
exact = _count_with_tiktoken(text, model)
|
|
76
|
+
if exact is not None:
|
|
77
|
+
return exact
|
|
78
|
+
return len(text.split())
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def estimate_tokens_from_text_with_source(
|
|
82
|
+
text: str, model: Optional[str] = None
|
|
83
|
+
) -> Tuple[int, str]:
|
|
84
|
+
"""
|
|
85
|
+
Return (token_count, source) where source is:
|
|
86
|
+
- "estimated.tiktoken"
|
|
87
|
+
- "estimated.heuristic"
|
|
88
|
+
"""
|
|
89
|
+
if not text:
|
|
90
|
+
return 0, "estimated.heuristic"
|
|
91
|
+
exact = _count_with_tiktoken(text, model)
|
|
92
|
+
if exact is not None:
|
|
93
|
+
return exact, "estimated.tiktoken"
|
|
94
|
+
return len(text.split()), "estimated.heuristic"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _openai_chat_overhead(model: Optional[str]) -> Tuple[int, int, int]:
|
|
98
|
+
"""
|
|
99
|
+
Return (tokens_per_message, tokens_per_name, tokens_for_reply).
|
|
100
|
+
|
|
101
|
+
These constants are model-dependent in OpenAI's chat format. For estimation
|
|
102
|
+
we use a reasonable default that is close for many modern chat models.
|
|
103
|
+
"""
|
|
104
|
+
# Defaults (works reasonably for gpt-4/4o families as an estimate)
|
|
105
|
+
return 3, 1, 3
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def estimate_openai_chat_prompt_tokens_with_source(
|
|
109
|
+
messages: Any, model: Optional[str] = None
|
|
110
|
+
) -> Optional[Tuple[int, str]]:
|
|
111
|
+
"""
|
|
112
|
+
Estimate prompt tokens from a list of chat messages.
|
|
113
|
+
|
|
114
|
+
This is best-effort and should be treated as an estimate unless provider
|
|
115
|
+
usage is available.
|
|
116
|
+
"""
|
|
117
|
+
if not isinstance(messages, (list, tuple)) or not messages:
|
|
118
|
+
return None
|
|
119
|
+
if tiktoken is None:
|
|
120
|
+
# Heuristic fallback: count whitespace tokens across role/content.
|
|
121
|
+
parts = []
|
|
122
|
+
for msg in list(messages)[:50]:
|
|
123
|
+
if not isinstance(msg, dict):
|
|
124
|
+
continue
|
|
125
|
+
role = msg.get("role") or ""
|
|
126
|
+
content = msg.get("content") or ""
|
|
127
|
+
if not isinstance(content, str):
|
|
128
|
+
content = str(content)
|
|
129
|
+
parts.append(f"{role} {content}".strip())
|
|
130
|
+
text = "\n".join([p for p in parts if p])
|
|
131
|
+
return (len(text.split()), "estimated.chat_heuristic")
|
|
132
|
+
try:
|
|
133
|
+
encoding = _encoding_for_model(model) or tiktoken.get_encoding("cl100k_base")
|
|
134
|
+
except Exception:
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
tokens_per_message, tokens_per_name, tokens_for_reply = _openai_chat_overhead(model)
|
|
138
|
+
total = 0
|
|
139
|
+
for msg in list(messages)[:50]:
|
|
140
|
+
if not isinstance(msg, dict):
|
|
141
|
+
continue
|
|
142
|
+
total += tokens_per_message
|
|
143
|
+
role = msg.get("role") or ""
|
|
144
|
+
name = msg.get("name")
|
|
145
|
+
content = msg.get("content") or ""
|
|
146
|
+
if not isinstance(content, str):
|
|
147
|
+
content = str(content)
|
|
148
|
+
total += len(encoding.encode(str(role)))
|
|
149
|
+
total += len(encoding.encode(content))
|
|
150
|
+
if name:
|
|
151
|
+
total += tokens_per_name
|
|
152
|
+
total += len(encoding.encode(str(name)))
|
|
153
|
+
total += tokens_for_reply
|
|
154
|
+
return total, "estimated.tiktoken_chat"
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class TokenCountingProcessor(SpanProcessor):
|
|
158
|
+
"""
|
|
159
|
+
A processor that infers token counts when not provided by the LLM response.
|
|
160
|
+
It prefers a model-specific tokenizer (tiktoken) when available.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
def on_end(self, span) -> None:
|
|
164
|
+
prompt = span.attributes.get("llm.prompt")
|
|
165
|
+
completion = span.attributes.get("llm.completion")
|
|
166
|
+
model = span.attributes.get("llm.model")
|
|
167
|
+
openai_messages = span.attributes.get("llm.openai.messages")
|
|
168
|
+
|
|
169
|
+
wrote_any = False
|
|
170
|
+
wrote_prompt = False
|
|
171
|
+
wrote_completion = False
|
|
172
|
+
|
|
173
|
+
if "llm.usage.prompt_tokens" not in span.attributes:
|
|
174
|
+
# Prefer chat-structure estimation when available.
|
|
175
|
+
est = estimate_openai_chat_prompt_tokens_with_source(openai_messages, model)
|
|
176
|
+
if est is not None:
|
|
177
|
+
count, source = est
|
|
178
|
+
span.set_attribute("llm.usage.prompt_tokens", count)
|
|
179
|
+
span.set_attribute("llm.usage.prompt_source", source)
|
|
180
|
+
wrote_any = True
|
|
181
|
+
wrote_prompt = True
|
|
182
|
+
elif isinstance(prompt, str):
|
|
183
|
+
count, source = estimate_tokens_from_text_with_source(prompt, model)
|
|
184
|
+
span.set_attribute("llm.usage.prompt_tokens", count)
|
|
185
|
+
span.set_attribute("llm.usage.prompt_source", source)
|
|
186
|
+
wrote_any = True
|
|
187
|
+
wrote_prompt = True
|
|
188
|
+
|
|
189
|
+
if "llm.usage.completion_tokens" not in span.attributes and isinstance(completion, str):
|
|
190
|
+
count, source = estimate_tokens_from_text_with_source(completion, model)
|
|
191
|
+
span.set_attribute("llm.usage.completion_tokens", count)
|
|
192
|
+
span.set_attribute("llm.usage.completion_source", source)
|
|
193
|
+
wrote_any = True
|
|
194
|
+
wrote_completion = True
|
|
195
|
+
|
|
196
|
+
# Synthesize overall usage source if not provided by instrumentation.
|
|
197
|
+
if wrote_any and "llm.usage.source" not in span.attributes:
|
|
198
|
+
ps = span.attributes.get("llm.usage.prompt_source")
|
|
199
|
+
cs = span.attributes.get("llm.usage.completion_source")
|
|
200
|
+
if ps and cs and ps == cs:
|
|
201
|
+
span.set_attribute("llm.usage.source", ps)
|
|
202
|
+
elif ps or cs:
|
|
203
|
+
span.set_attribute("llm.usage.source", "mixed")
|
|
204
|
+
|
|
205
|
+
# If provider already marked usage as provider_usage, and we filled any missing
|
|
206
|
+
# fields, mark it as mixed.
|
|
207
|
+
if wrote_any and span.attributes.get("llm.usage.source") == "provider_usage":
|
|
208
|
+
if wrote_prompt or wrote_completion:
|
|
209
|
+
span.set_attribute("llm.usage.source", "mixed")
|
|
210
|
+
|
|
211
|
+
def shutdown(self) -> None:
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
def force_flush(self, timeout: Optional[float] = None) -> None:
|
|
215
|
+
return None
|
|
216
|
+
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Runtime configuration state management."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, List
|
|
4
|
+
|
|
5
|
+
# Global runtime configuration state
|
|
6
|
+
_config = {
|
|
7
|
+
"auto_instrument_tools": False,
|
|
8
|
+
"tool_include": [],
|
|
9
|
+
"max_tool_spans": 1000,
|
|
10
|
+
"max_span_depth": 100,
|
|
11
|
+
"session_id": None,
|
|
12
|
+
"user_id": None,
|
|
13
|
+
"tenant_id": None,
|
|
14
|
+
"project_id": None,
|
|
15
|
+
"agent_id": None,
|
|
16
|
+
"debug": False,
|
|
17
|
+
"attr_truncation_limit": 1000,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def set_auto_instrument_tools(value: bool) -> None:
|
|
22
|
+
_config["auto_instrument_tools"] = value
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_auto_instrument_tools() -> bool:
|
|
26
|
+
return _config["auto_instrument_tools"]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def set_tool_include(value: List[str]) -> None:
|
|
30
|
+
_config["tool_include"] = value
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_tool_include() -> List[str]:
|
|
34
|
+
return _config["tool_include"]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def set_max_tool_spans(value: int) -> None:
|
|
38
|
+
_config["max_tool_spans"] = value
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_max_tool_spans() -> int:
|
|
42
|
+
return _config["max_tool_spans"]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def set_max_span_depth(value: int) -> None:
|
|
46
|
+
_config["max_span_depth"] = value
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_max_span_depth() -> int:
|
|
50
|
+
return _config["max_span_depth"]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def set_session_id(value: Optional[str]) -> None:
|
|
54
|
+
_config["session_id"] = value
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_session_id() -> Optional[str]:
|
|
58
|
+
return _config["session_id"]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def set_user_id(value: Optional[str]) -> None:
|
|
62
|
+
_config["user_id"] = value
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_user_id() -> Optional[str]:
|
|
66
|
+
return _config["user_id"]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def set_tenant_id(value: Optional[str]) -> None:
|
|
70
|
+
_config["tenant_id"] = value
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_tenant_id() -> Optional[str]:
|
|
74
|
+
return _config["tenant_id"]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def set_project_id(value: Optional[str]) -> None:
|
|
78
|
+
_config["project_id"] = value
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_project_id() -> Optional[str]:
|
|
82
|
+
return _config["project_id"]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def set_agent_id(value: Optional[str]) -> None:
|
|
86
|
+
_config["agent_id"] = value
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def get_agent_id() -> Optional[str]:
|
|
90
|
+
return _config["agent_id"]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def set_debug(value: bool) -> None:
|
|
94
|
+
_config["debug"] = value
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def get_debug() -> bool:
|
|
98
|
+
return _config["debug"]
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def set_attr_truncation_limit(value: int) -> None:
|
|
102
|
+
_config["attr_truncation_limit"] = value
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def get_attr_truncation_limit() -> int:
|
|
106
|
+
return _config["attr_truncation_limit"]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Tracer components for the tracing SDK."""
|
|
2
|
+
|
|
3
|
+
from traccia.tracer.provider import SpanProcessor, TracerProvider
|
|
4
|
+
from traccia.tracer.span import Span, SpanStatus
|
|
5
|
+
from traccia.tracer.span_context import SpanContext
|
|
6
|
+
from traccia.tracer.tracer import Tracer
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"Span",
|
|
10
|
+
"SpanStatus",
|
|
11
|
+
"SpanContext",
|
|
12
|
+
"Tracer",
|
|
13
|
+
"TracerProvider",
|
|
14
|
+
"SpanProcessor",
|
|
15
|
+
]
|