traccia 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. traccia/__init__.py +73 -0
  2. traccia/auto.py +736 -0
  3. traccia/auto_instrumentation.py +74 -0
  4. traccia/cli.py +349 -0
  5. traccia/config.py +693 -0
  6. traccia/context/__init__.py +33 -0
  7. traccia/context/context.py +67 -0
  8. traccia/context/propagators.py +283 -0
  9. traccia/errors.py +48 -0
  10. traccia/exporter/__init__.py +8 -0
  11. traccia/exporter/console_exporter.py +31 -0
  12. traccia/exporter/file_exporter.py +178 -0
  13. traccia/exporter/http_exporter.py +214 -0
  14. traccia/exporter/otlp_exporter.py +190 -0
  15. traccia/instrumentation/__init__.py +20 -0
  16. traccia/instrumentation/anthropic.py +92 -0
  17. traccia/instrumentation/decorator.py +263 -0
  18. traccia/instrumentation/fastapi.py +38 -0
  19. traccia/instrumentation/http_client.py +21 -0
  20. traccia/instrumentation/http_server.py +25 -0
  21. traccia/instrumentation/openai.py +178 -0
  22. traccia/instrumentation/requests.py +68 -0
  23. traccia/integrations/__init__.py +22 -0
  24. traccia/integrations/langchain/__init__.py +14 -0
  25. traccia/integrations/langchain/callback.py +418 -0
  26. traccia/integrations/langchain/utils.py +129 -0
  27. traccia/pricing_config.py +58 -0
  28. traccia/processors/__init__.py +35 -0
  29. traccia/processors/agent_enricher.py +159 -0
  30. traccia/processors/batch_processor.py +140 -0
  31. traccia/processors/cost_engine.py +71 -0
  32. traccia/processors/cost_processor.py +70 -0
  33. traccia/processors/drop_policy.py +44 -0
  34. traccia/processors/logging_processor.py +31 -0
  35. traccia/processors/rate_limiter.py +223 -0
  36. traccia/processors/sampler.py +22 -0
  37. traccia/processors/token_counter.py +216 -0
  38. traccia/runtime_config.py +106 -0
  39. traccia/tracer/__init__.py +15 -0
  40. traccia/tracer/otel_adapter.py +577 -0
  41. traccia/tracer/otel_utils.py +24 -0
  42. traccia/tracer/provider.py +155 -0
  43. traccia/tracer/span.py +286 -0
  44. traccia/tracer/span_context.py +16 -0
  45. traccia/tracer/tracer.py +243 -0
  46. traccia/utils/__init__.py +19 -0
  47. traccia/utils/helpers.py +95 -0
  48. {traccia-0.1.2.dist-info → traccia-0.1.5.dist-info}/METADATA +32 -15
  49. traccia-0.1.5.dist-info/RECORD +53 -0
  50. traccia-0.1.5.dist-info/top_level.txt +1 -0
  51. traccia-0.1.2.dist-info/RECORD +0 -6
  52. traccia-0.1.2.dist-info/top_level.txt +0 -1
  53. {traccia-0.1.2.dist-info → traccia-0.1.5.dist-info}/WHEEL +0 -0
  54. {traccia-0.1.2.dist-info → traccia-0.1.5.dist-info}/entry_points.txt +0 -0
  55. {traccia-0.1.2.dist-info → traccia-0.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,223 @@
1
+ """Rate limiting processor for span export."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import threading
7
+ import time
8
+ from collections import deque
9
+ from typing import Optional
10
+
11
+ from opentelemetry.sdk.trace import ReadableSpan
12
+
13
+ from traccia.errors import RateLimitError
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class RateLimiter:
19
+ """
20
+ Token bucket rate limiter with hybrid blocking/dropping behavior.
21
+
22
+ Features:
23
+ - Token bucket algorithm for smooth rate limiting
24
+ - Short blocking period before dropping spans
25
+ - Detailed logging of dropped spans
26
+ - Thread-safe implementation
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ max_spans_per_second: Optional[float] = None,
32
+ max_block_ms: int = 100,
33
+ ):
34
+ """
35
+ Initialize rate limiter.
36
+
37
+ Args:
38
+ max_spans_per_second: Maximum spans per second (None = unlimited)
39
+ max_block_ms: Maximum milliseconds to block before dropping
40
+ """
41
+ self.max_spans_per_second = max_spans_per_second
42
+ self.max_block_ms = max_block_ms
43
+ self.enabled = max_spans_per_second is not None and max_spans_per_second > 0
44
+
45
+ # Token bucket state
46
+ self._tokens: float = max_spans_per_second or 0
47
+ self._max_tokens: float = max_spans_per_second or 0
48
+ self._last_refill_time: float = time.time()
49
+ self._lock = threading.Lock()
50
+
51
+ # Stats
52
+ self._total_spans = 0
53
+ self._dropped_spans = 0
54
+ self._blocked_spans = 0
55
+
56
+ # Recent timestamps for sliding window (backup)
57
+ self._recent_timestamps: deque = deque()
58
+ self._window_seconds = 1.0
59
+
60
+ def acquire(self, span: Optional[ReadableSpan] = None) -> bool:
61
+ """
62
+ Try to acquire permission to process a span.
63
+
64
+ Returns True if span should be processed, False if it should be dropped.
65
+
66
+ Behavior:
67
+ 1. If unlimited (disabled), always return True
68
+ 2. Try to acquire a token immediately
69
+ 3. If no token, block for up to max_block_ms
70
+ 4. If still no token after blocking, drop and return False
71
+
72
+ Args:
73
+ span: Optional span for logging purposes
74
+
75
+ Returns:
76
+ True if span should be processed, False if dropped
77
+ """
78
+ if not self.enabled:
79
+ return True
80
+
81
+ self._total_spans += 1
82
+
83
+ with self._lock:
84
+ # Refill tokens based on elapsed time
85
+ self._refill_tokens()
86
+
87
+ # Try to acquire immediately
88
+ if self._tokens >= 1.0:
89
+ self._tokens -= 1.0
90
+ return True
91
+
92
+ # No tokens available, try blocking
93
+ if self.max_block_ms > 0:
94
+ block_start = time.time()
95
+ blocked_ms = 0
96
+
97
+ while blocked_ms < self.max_block_ms:
98
+ # Release lock briefly to allow other threads
99
+ self._lock.release()
100
+ time.sleep(0.001) # Sleep 1ms
101
+ self._lock.acquire()
102
+
103
+ # Refill and try again
104
+ self._refill_tokens()
105
+ if self._tokens >= 1.0:
106
+ self._tokens -= 1.0
107
+ self._blocked_spans += 1
108
+ return True
109
+
110
+ blocked_ms = (time.time() - block_start) * 1000
111
+
112
+ # Still no tokens after blocking - drop the span
113
+ self._dropped_spans += 1
114
+
115
+ # Log dropped span
116
+ span_name = span.name if span else "unknown"
117
+ logger.warning(
118
+ f"Rate limit exceeded - dropping span '{span_name}'. "
119
+ f"Total dropped: {self._dropped_spans}/{self._total_spans} "
120
+ f"({self._dropped_spans / self._total_spans * 100:.1f}%)"
121
+ )
122
+
123
+ return False
124
+
125
+ def _refill_tokens(self) -> None:
126
+ """Refill tokens based on elapsed time (token bucket algorithm)."""
127
+ now = time.time()
128
+ elapsed = now - self._last_refill_time
129
+
130
+ if elapsed > 0:
131
+ # Add tokens based on rate and elapsed time
132
+ new_tokens = elapsed * self.max_spans_per_second
133
+ self._tokens = min(self._max_tokens, self._tokens + new_tokens)
134
+ self._last_refill_time = now
135
+
136
+ def get_stats(self) -> dict:
137
+ """Get rate limiting statistics."""
138
+ with self._lock:
139
+ drop_rate = (self._dropped_spans / self._total_spans * 100) if self._total_spans > 0 else 0
140
+ return {
141
+ "enabled": self.enabled,
142
+ "max_spans_per_second": self.max_spans_per_second,
143
+ "total_spans": self._total_spans,
144
+ "dropped_spans": self._dropped_spans,
145
+ "blocked_spans": self._blocked_spans,
146
+ "drop_rate_percent": round(drop_rate, 2),
147
+ "current_tokens": round(self._tokens, 2),
148
+ }
149
+
150
+ def reset_stats(self) -> None:
151
+ """Reset statistics counters."""
152
+ with self._lock:
153
+ self._total_spans = 0
154
+ self._dropped_spans = 0
155
+ self._blocked_spans = 0
156
+
157
+
158
+ class RateLimitingSpanProcessor:
159
+ """
160
+ Span processor that enforces rate limiting before passing to next processor.
161
+
162
+ This should be added early in the processor chain to drop spans before
163
+ they consume resources in downstream processors.
164
+ """
165
+
166
+ def __init__(
167
+ self,
168
+ next_processor,
169
+ max_spans_per_second: Optional[float] = None,
170
+ max_block_ms: int = 100,
171
+ ):
172
+ """
173
+ Initialize rate limiting processor.
174
+
175
+ Args:
176
+ next_processor: Next processor in the chain
177
+ max_spans_per_second: Maximum spans per second (None = unlimited)
178
+ max_block_ms: Maximum milliseconds to block before dropping
179
+ """
180
+ self.next_processor = next_processor
181
+ self.rate_limiter = RateLimiter(
182
+ max_spans_per_second=max_spans_per_second,
183
+ max_block_ms=max_block_ms,
184
+ )
185
+
186
+ def on_start(self, span, parent_context=None):
187
+ """Called when span starts - pass through to next processor."""
188
+ if self.next_processor and hasattr(self.next_processor, 'on_start'):
189
+ self.next_processor.on_start(span, parent_context)
190
+
191
+ def on_end(self, span):
192
+ """
193
+ Called when span ends - check rate limit before passing to next processor.
194
+
195
+ If rate limit is exceeded, span is dropped and not passed to next processor.
196
+ """
197
+ # Check rate limit
198
+ if not self.rate_limiter.acquire(span):
199
+ # Span dropped - don't pass to next processor
200
+ return
201
+
202
+ # Pass to next processor
203
+ if self.next_processor and hasattr(self.next_processor, 'on_end'):
204
+ self.next_processor.on_end(span)
205
+
206
+ def shutdown(self):
207
+ """Shutdown processor and log final stats."""
208
+ stats = self.rate_limiter.get_stats()
209
+ if stats["enabled"] and stats["dropped_spans"] > 0:
210
+ logger.info(
211
+ f"Rate limiter shutdown. Final stats: "
212
+ f"{stats['dropped_spans']}/{stats['total_spans']} spans dropped "
213
+ f"({stats['drop_rate_percent']}%)"
214
+ )
215
+
216
+ if self.next_processor and hasattr(self.next_processor, 'shutdown'):
217
+ self.next_processor.shutdown()
218
+
219
+ def force_flush(self, timeout_millis: int = 30000):
220
+ """Force flush - pass through to next processor."""
221
+ if self.next_processor and hasattr(self.next_processor, 'force_flush'):
222
+ return self.next_processor.force_flush(timeout_millis)
223
+ return True
@@ -0,0 +1,22 @@
1
+ """Sampling decisions for traces."""
2
+
3
+ import random
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class SamplingResult:
9
+ sampled: bool
10
+
11
+
12
+ class Sampler:
13
+ """Head-based sampler using a fixed probability."""
14
+
15
+ def __init__(self, sample_rate: float = 1.0) -> None:
16
+ if not 0.0 <= sample_rate <= 1.0:
17
+ raise ValueError("sample_rate must be between 0.0 and 1.0")
18
+ self.sample_rate = sample_rate
19
+
20
+ def should_sample(self) -> SamplingResult:
21
+ return SamplingResult(sampled=random.random() <= self.sample_rate)
22
+
@@ -0,0 +1,216 @@
1
+ """Token counting utilities and processor for spans with LLM usage.
2
+
3
+ Best practice:
4
+ - Prefer provider-reported usage tokens when available.
5
+ - Otherwise, estimate with the vendor tokenizer when available (tiktoken for
6
+ OpenAI) and record the estimate source on the span.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any, Dict, Optional, Tuple
12
+
13
+ from traccia.tracer.provider import SpanProcessor
14
+
15
+ try: # optional dependency for accurate counting
16
+ import tiktoken # type: ignore
17
+ except Exception: # pragma: no cover
18
+ tiktoken = None # fallback to heuristic
19
+
20
+
21
+ MODEL_TO_ENCODING = {
22
+ # OpenAI mappings (approximate; kept current as of gpt-4o family)
23
+ "gpt-4o": "o200k_base",
24
+ "gpt-4o-mini": "o200k_base",
25
+ "gpt-4": "cl100k_base",
26
+ "gpt-3.5-turbo": "cl100k_base",
27
+ }
28
+
29
+
30
+ def _encoding_for_model(model: Optional[str]):
31
+ if tiktoken is None:
32
+ return None
33
+ if not model:
34
+ return None
35
+ m = str(model)
36
+ # First try tiktoken's model registry (best when available).
37
+ try:
38
+ return tiktoken.encoding_for_model(m)
39
+ except Exception:
40
+ pass
41
+ # Then try our explicit mapping, supporting version-suffixed models by prefix.
42
+ encoding_name = MODEL_TO_ENCODING.get(m)
43
+ if encoding_name is None:
44
+ for key in sorted(MODEL_TO_ENCODING.keys(), key=len, reverse=True):
45
+ if m.startswith(key):
46
+ encoding_name = MODEL_TO_ENCODING[key]
47
+ break
48
+ if encoding_name:
49
+ try:
50
+ return tiktoken.get_encoding(encoding_name)
51
+ except Exception:
52
+ return None
53
+ return None
54
+
55
+
56
+ def _count_with_tiktoken(text: str, model: Optional[str]) -> Optional[int]:
57
+ if tiktoken is None or not text:
58
+ return None
59
+ encoding = _encoding_for_model(model)
60
+ if encoding is None:
61
+ return None
62
+ try:
63
+ return len(encoding.encode(text))
64
+ except Exception:
65
+ return None
66
+
67
+
68
+ def estimate_tokens_from_text(text: str, model: Optional[str] = None) -> int:
69
+ """
70
+ Estimate tokens. Prefer model-accurate count via tiktoken when available,
71
+ otherwise fall back to a rough whitespace split.
72
+ """
73
+ if not text:
74
+ return 0
75
+ exact = _count_with_tiktoken(text, model)
76
+ if exact is not None:
77
+ return exact
78
+ return len(text.split())
79
+
80
+
81
+ def estimate_tokens_from_text_with_source(
82
+ text: str, model: Optional[str] = None
83
+ ) -> Tuple[int, str]:
84
+ """
85
+ Return (token_count, source) where source is:
86
+ - "estimated.tiktoken"
87
+ - "estimated.heuristic"
88
+ """
89
+ if not text:
90
+ return 0, "estimated.heuristic"
91
+ exact = _count_with_tiktoken(text, model)
92
+ if exact is not None:
93
+ return exact, "estimated.tiktoken"
94
+ return len(text.split()), "estimated.heuristic"
95
+
96
+
97
+ def _openai_chat_overhead(model: Optional[str]) -> Tuple[int, int, int]:
98
+ """
99
+ Return (tokens_per_message, tokens_per_name, tokens_for_reply).
100
+
101
+ These constants are model-dependent in OpenAI's chat format. For estimation
102
+ we use a reasonable default that is close for many modern chat models.
103
+ """
104
+ # Defaults (works reasonably for gpt-4/4o families as an estimate)
105
+ return 3, 1, 3
106
+
107
+
108
+ def estimate_openai_chat_prompt_tokens_with_source(
109
+ messages: Any, model: Optional[str] = None
110
+ ) -> Optional[Tuple[int, str]]:
111
+ """
112
+ Estimate prompt tokens from a list of chat messages.
113
+
114
+ This is best-effort and should be treated as an estimate unless provider
115
+ usage is available.
116
+ """
117
+ if not isinstance(messages, (list, tuple)) or not messages:
118
+ return None
119
+ if tiktoken is None:
120
+ # Heuristic fallback: count whitespace tokens across role/content.
121
+ parts = []
122
+ for msg in list(messages)[:50]:
123
+ if not isinstance(msg, dict):
124
+ continue
125
+ role = msg.get("role") or ""
126
+ content = msg.get("content") or ""
127
+ if not isinstance(content, str):
128
+ content = str(content)
129
+ parts.append(f"{role} {content}".strip())
130
+ text = "\n".join([p for p in parts if p])
131
+ return (len(text.split()), "estimated.chat_heuristic")
132
+ try:
133
+ encoding = _encoding_for_model(model) or tiktoken.get_encoding("cl100k_base")
134
+ except Exception:
135
+ return None
136
+
137
+ tokens_per_message, tokens_per_name, tokens_for_reply = _openai_chat_overhead(model)
138
+ total = 0
139
+ for msg in list(messages)[:50]:
140
+ if not isinstance(msg, dict):
141
+ continue
142
+ total += tokens_per_message
143
+ role = msg.get("role") or ""
144
+ name = msg.get("name")
145
+ content = msg.get("content") or ""
146
+ if not isinstance(content, str):
147
+ content = str(content)
148
+ total += len(encoding.encode(str(role)))
149
+ total += len(encoding.encode(content))
150
+ if name:
151
+ total += tokens_per_name
152
+ total += len(encoding.encode(str(name)))
153
+ total += tokens_for_reply
154
+ return total, "estimated.tiktoken_chat"
155
+
156
+
157
+ class TokenCountingProcessor(SpanProcessor):
158
+ """
159
+ A processor that infers token counts when not provided by the LLM response.
160
+ It prefers a model-specific tokenizer (tiktoken) when available.
161
+ """
162
+
163
+ def on_end(self, span) -> None:
164
+ prompt = span.attributes.get("llm.prompt")
165
+ completion = span.attributes.get("llm.completion")
166
+ model = span.attributes.get("llm.model")
167
+ openai_messages = span.attributes.get("llm.openai.messages")
168
+
169
+ wrote_any = False
170
+ wrote_prompt = False
171
+ wrote_completion = False
172
+
173
+ if "llm.usage.prompt_tokens" not in span.attributes:
174
+ # Prefer chat-structure estimation when available.
175
+ est = estimate_openai_chat_prompt_tokens_with_source(openai_messages, model)
176
+ if est is not None:
177
+ count, source = est
178
+ span.set_attribute("llm.usage.prompt_tokens", count)
179
+ span.set_attribute("llm.usage.prompt_source", source)
180
+ wrote_any = True
181
+ wrote_prompt = True
182
+ elif isinstance(prompt, str):
183
+ count, source = estimate_tokens_from_text_with_source(prompt, model)
184
+ span.set_attribute("llm.usage.prompt_tokens", count)
185
+ span.set_attribute("llm.usage.prompt_source", source)
186
+ wrote_any = True
187
+ wrote_prompt = True
188
+
189
+ if "llm.usage.completion_tokens" not in span.attributes and isinstance(completion, str):
190
+ count, source = estimate_tokens_from_text_with_source(completion, model)
191
+ span.set_attribute("llm.usage.completion_tokens", count)
192
+ span.set_attribute("llm.usage.completion_source", source)
193
+ wrote_any = True
194
+ wrote_completion = True
195
+
196
+ # Synthesize overall usage source if not provided by instrumentation.
197
+ if wrote_any and "llm.usage.source" not in span.attributes:
198
+ ps = span.attributes.get("llm.usage.prompt_source")
199
+ cs = span.attributes.get("llm.usage.completion_source")
200
+ if ps and cs and ps == cs:
201
+ span.set_attribute("llm.usage.source", ps)
202
+ elif ps or cs:
203
+ span.set_attribute("llm.usage.source", "mixed")
204
+
205
+ # If provider already marked usage as provider_usage, and we filled any missing
206
+ # fields, mark it as mixed.
207
+ if wrote_any and span.attributes.get("llm.usage.source") == "provider_usage":
208
+ if wrote_prompt or wrote_completion:
209
+ span.set_attribute("llm.usage.source", "mixed")
210
+
211
+ def shutdown(self) -> None:
212
+ return None
213
+
214
+ def force_flush(self, timeout: Optional[float] = None) -> None:
215
+ return None
216
+
@@ -0,0 +1,106 @@
1
+ """Runtime configuration state management."""
2
+
3
+ from typing import Optional, List
4
+
5
+ # Global runtime configuration state
6
+ _config = {
7
+ "auto_instrument_tools": False,
8
+ "tool_include": [],
9
+ "max_tool_spans": 1000,
10
+ "max_span_depth": 100,
11
+ "session_id": None,
12
+ "user_id": None,
13
+ "tenant_id": None,
14
+ "project_id": None,
15
+ "agent_id": None,
16
+ "debug": False,
17
+ "attr_truncation_limit": 1000,
18
+ }
19
+
20
+
21
+ def set_auto_instrument_tools(value: bool) -> None:
22
+ _config["auto_instrument_tools"] = value
23
+
24
+
25
+ def get_auto_instrument_tools() -> bool:
26
+ return _config["auto_instrument_tools"]
27
+
28
+
29
+ def set_tool_include(value: List[str]) -> None:
30
+ _config["tool_include"] = value
31
+
32
+
33
+ def get_tool_include() -> List[str]:
34
+ return _config["tool_include"]
35
+
36
+
37
+ def set_max_tool_spans(value: int) -> None:
38
+ _config["max_tool_spans"] = value
39
+
40
+
41
+ def get_max_tool_spans() -> int:
42
+ return _config["max_tool_spans"]
43
+
44
+
45
+ def set_max_span_depth(value: int) -> None:
46
+ _config["max_span_depth"] = value
47
+
48
+
49
+ def get_max_span_depth() -> int:
50
+ return _config["max_span_depth"]
51
+
52
+
53
+ def set_session_id(value: Optional[str]) -> None:
54
+ _config["session_id"] = value
55
+
56
+
57
+ def get_session_id() -> Optional[str]:
58
+ return _config["session_id"]
59
+
60
+
61
+ def set_user_id(value: Optional[str]) -> None:
62
+ _config["user_id"] = value
63
+
64
+
65
+ def get_user_id() -> Optional[str]:
66
+ return _config["user_id"]
67
+
68
+
69
+ def set_tenant_id(value: Optional[str]) -> None:
70
+ _config["tenant_id"] = value
71
+
72
+
73
+ def get_tenant_id() -> Optional[str]:
74
+ return _config["tenant_id"]
75
+
76
+
77
+ def set_project_id(value: Optional[str]) -> None:
78
+ _config["project_id"] = value
79
+
80
+
81
+ def get_project_id() -> Optional[str]:
82
+ return _config["project_id"]
83
+
84
+
85
+ def set_agent_id(value: Optional[str]) -> None:
86
+ _config["agent_id"] = value
87
+
88
+
89
+ def get_agent_id() -> Optional[str]:
90
+ return _config["agent_id"]
91
+
92
+
93
+ def set_debug(value: bool) -> None:
94
+ _config["debug"] = value
95
+
96
+
97
+ def get_debug() -> bool:
98
+ return _config["debug"]
99
+
100
+
101
+ def set_attr_truncation_limit(value: int) -> None:
102
+ _config["attr_truncation_limit"] = value
103
+
104
+
105
+ def get_attr_truncation_limit() -> int:
106
+ return _config["attr_truncation_limit"]
@@ -0,0 +1,15 @@
1
+ """Tracer components for the tracing SDK."""
2
+
3
+ from traccia.tracer.provider import SpanProcessor, TracerProvider
4
+ from traccia.tracer.span import Span, SpanStatus
5
+ from traccia.tracer.span_context import SpanContext
6
+ from traccia.tracer.tracer import Tracer
7
+
8
+ __all__ = [
9
+ "Span",
10
+ "SpanStatus",
11
+ "SpanContext",
12
+ "Tracer",
13
+ "TracerProvider",
14
+ "SpanProcessor",
15
+ ]