traccia 0.1.2__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. traccia/__init__.py +73 -0
  2. traccia/auto.py +748 -0
  3. traccia/auto_instrumentation.py +74 -0
  4. traccia/cli.py +349 -0
  5. traccia/config.py +699 -0
  6. traccia/context/__init__.py +33 -0
  7. traccia/context/context.py +67 -0
  8. traccia/context/propagators.py +283 -0
  9. traccia/errors.py +48 -0
  10. traccia/exporter/__init__.py +8 -0
  11. traccia/exporter/console_exporter.py +31 -0
  12. traccia/exporter/file_exporter.py +178 -0
  13. traccia/exporter/http_exporter.py +214 -0
  14. traccia/exporter/otlp_exporter.py +190 -0
  15. traccia/instrumentation/__init__.py +26 -0
  16. traccia/instrumentation/anthropic.py +92 -0
  17. traccia/instrumentation/decorator.py +263 -0
  18. traccia/instrumentation/fastapi.py +38 -0
  19. traccia/instrumentation/http_client.py +21 -0
  20. traccia/instrumentation/http_server.py +25 -0
  21. traccia/instrumentation/openai.py +358 -0
  22. traccia/instrumentation/requests.py +68 -0
  23. traccia/integrations/__init__.py +39 -0
  24. traccia/integrations/langchain/__init__.py +14 -0
  25. traccia/integrations/langchain/callback.py +418 -0
  26. traccia/integrations/langchain/utils.py +129 -0
  27. traccia/integrations/openai_agents/__init__.py +73 -0
  28. traccia/integrations/openai_agents/processor.py +262 -0
  29. traccia/pricing_config.py +58 -0
  30. traccia/processors/__init__.py +35 -0
  31. traccia/processors/agent_enricher.py +159 -0
  32. traccia/processors/batch_processor.py +140 -0
  33. traccia/processors/cost_engine.py +71 -0
  34. traccia/processors/cost_processor.py +70 -0
  35. traccia/processors/drop_policy.py +44 -0
  36. traccia/processors/logging_processor.py +31 -0
  37. traccia/processors/rate_limiter.py +223 -0
  38. traccia/processors/sampler.py +22 -0
  39. traccia/processors/token_counter.py +216 -0
  40. traccia/runtime_config.py +127 -0
  41. traccia/tracer/__init__.py +15 -0
  42. traccia/tracer/otel_adapter.py +577 -0
  43. traccia/tracer/otel_utils.py +24 -0
  44. traccia/tracer/provider.py +155 -0
  45. traccia/tracer/span.py +286 -0
  46. traccia/tracer/span_context.py +16 -0
  47. traccia/tracer/tracer.py +243 -0
  48. traccia/utils/__init__.py +19 -0
  49. traccia/utils/helpers.py +95 -0
  50. {traccia-0.1.2.dist-info → traccia-0.1.6.dist-info}/METADATA +72 -15
  51. traccia-0.1.6.dist-info/RECORD +55 -0
  52. traccia-0.1.6.dist-info/top_level.txt +1 -0
  53. traccia-0.1.2.dist-info/RECORD +0 -6
  54. traccia-0.1.2.dist-info/top_level.txt +0 -1
  55. {traccia-0.1.2.dist-info → traccia-0.1.6.dist-info}/WHEEL +0 -0
  56. {traccia-0.1.2.dist-info → traccia-0.1.6.dist-info}/entry_points.txt +0 -0
  57. {traccia-0.1.2.dist-info → traccia-0.1.6.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,262 @@
1
+ """Traccia processor for OpenAI Agents SDK tracing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Any, TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ try:
10
+ from agents.tracing import Span, Trace
11
+ from agents.tracing.span_data import (
12
+ AgentSpanData,
13
+ FunctionSpanData,
14
+ GenerationSpanData,
15
+ HandoffSpanData,
16
+ GuardrailSpanData,
17
+ ResponseSpanData,
18
+ CustomSpanData,
19
+ )
20
+ except ImportError:
21
+ pass
22
+
23
+
24
+ class TracciaAgentsTracingProcessor:
25
+ """
26
+ Traccia processor for OpenAI Agents SDK.
27
+
28
+ Implements the Agents SDK TracingProcessor interface to capture agent
29
+ runs, tool calls, handoffs, and generations into Traccia spans.
30
+ """
31
+
32
+ def __init__(self):
33
+ """Initialize the processor."""
34
+ self._trace_map = {} # Map Agents trace_id -> Traccia trace context
35
+ self._span_map = {} # Map Agents span_id -> Traccia span
36
+ self._tracer = None
37
+
38
+ def _get_tracer(self):
39
+ """Get or create the Traccia tracer."""
40
+ if self._tracer is None:
41
+ import traccia
42
+ self._tracer = traccia.get_tracer("openai.agents")
43
+ return self._tracer
44
+
45
+ def on_trace_start(self, trace: Trace) -> None:
46
+ """Called when an Agents trace starts."""
47
+ try:
48
+ # Store trace context for span correlation
49
+ self._trace_map[trace.trace_id] = {
50
+ "trace_id": trace.trace_id,
51
+ "started_at": getattr(trace, "started_at", None),
52
+ }
53
+ except Exception:
54
+ # Don't break agent execution on tracing errors
55
+ pass
56
+
57
+ def on_trace_end(self, trace: Trace) -> None:
58
+ """Called when an Agents trace ends."""
59
+ try:
60
+ # Clean up trace mapping
61
+ self._trace_map.pop(trace.trace_id, None)
62
+ except Exception:
63
+ pass
64
+
65
+ def on_span_start(self, span: Span[Any]) -> None:
66
+ """Called when an Agents span starts."""
67
+ try:
68
+ tracer = self._get_tracer()
69
+ span_data = span.span_data
70
+
71
+ # Determine span name based on span type
72
+ span_name = self._get_span_name(span_data)
73
+
74
+ # Start Traccia span
75
+ attributes = self._extract_attributes(span_data)
76
+ traccia_span = tracer.start_span(span_name, attributes=attributes)
77
+
78
+ # Store mapping
79
+ self._span_map[span.span_id] = traccia_span
80
+ except Exception:
81
+ # Don't break agent execution
82
+ pass
83
+
84
+ def on_span_end(self, span: Span[Any]) -> None:
85
+ """Called when an Agents span ends."""
86
+ try:
87
+ traccia_span = self._span_map.pop(span.span_id, None)
88
+ if not traccia_span:
89
+ return
90
+
91
+ # Update attributes with final data
92
+ span_data = span.span_data
93
+ self._update_span_attributes(traccia_span, span_data)
94
+
95
+ # Record error if present
96
+ error = getattr(span, "error", None)
97
+ if error:
98
+ from traccia.tracer.span import SpanStatus
99
+ error_msg = str(error.get("message", "Unknown error") if isinstance(error, dict) else error)
100
+ traccia_span.set_status(SpanStatus.ERROR, error_msg)
101
+
102
+ # End the span
103
+ traccia_span.end()
104
+ except Exception:
105
+ # Ensure span ends even if there's an error
106
+ try:
107
+ if traccia_span:
108
+ traccia_span.end()
109
+ except:
110
+ pass
111
+
112
+ def _get_span_name(self, span_data: Any) -> str:
113
+ """Determine Traccia span name from Agents span data."""
114
+ span_type = getattr(span_data, "type", "unknown")
115
+
116
+ if span_type == "agent":
117
+ agent_name = getattr(span_data, "name", "unknown")
118
+ return f"agent.{agent_name}"
119
+ elif span_type == "generation":
120
+ return "llm.agents.generation"
121
+ elif span_type == "function":
122
+ func_name = getattr(span_data, "name", "unknown")
123
+ return f"agent.tool.{func_name}"
124
+ elif span_type == "handoff":
125
+ return "agent.handoff"
126
+ elif span_type == "guardrail":
127
+ guardrail_name = getattr(span_data, "name", "unknown")
128
+ return f"agent.guardrail.{guardrail_name}"
129
+ elif span_type == "response":
130
+ return "agent.response"
131
+ elif span_type == "custom":
132
+ custom_name = getattr(span_data, "name", "unknown")
133
+ return f"agent.custom.{custom_name}"
134
+ else:
135
+ return f"agent.{span_type}"
136
+
137
+ def _extract_attributes(self, span_data: Any) -> dict[str, Any]:
138
+ """Extract initial attributes from Agents span data."""
139
+ attrs = {
140
+ "agent.span.type": getattr(span_data, "type", "unknown"),
141
+ }
142
+
143
+ span_type = getattr(span_data, "type", None)
144
+
145
+ if span_type == "agent":
146
+ attrs["agent.name"] = getattr(span_data, "name", None)
147
+ tools = getattr(span_data, "tools", None)
148
+ if tools:
149
+ attrs["agent.tools"] = json.dumps(tools)[:500]
150
+ handoffs = getattr(span_data, "handoffs", None)
151
+ if handoffs:
152
+ attrs["agent.handoffs"] = json.dumps(handoffs)[:500]
153
+ output_type = getattr(span_data, "output_type", None)
154
+ if output_type:
155
+ attrs["agent.output_type"] = str(output_type)
156
+
157
+ elif span_type == "generation":
158
+ model = getattr(span_data, "model", None)
159
+ if model:
160
+ attrs["llm.model"] = str(model)
161
+ model_config = getattr(span_data, "model_config", None)
162
+ if model_config:
163
+ attrs["llm.model_config"] = json.dumps(model_config)[:500]
164
+
165
+ elif span_type == "function":
166
+ func_name = getattr(span_data, "name", None)
167
+ if func_name:
168
+ attrs["agent.tool.name"] = func_name
169
+
170
+ elif span_type == "handoff":
171
+ from_agent = getattr(span_data, "from_agent", None)
172
+ to_agent = getattr(span_data, "to_agent", None)
173
+ if from_agent:
174
+ attrs["agent.handoff.from"] = from_agent
175
+ if to_agent:
176
+ attrs["agent.handoff.to"] = to_agent
177
+
178
+ elif span_type == "guardrail":
179
+ guardrail_name = getattr(span_data, "name", None)
180
+ if guardrail_name:
181
+ attrs["agent.guardrail.name"] = guardrail_name
182
+
183
+ return attrs
184
+
185
+ def _update_span_attributes(self, traccia_span: Any, span_data: Any) -> None:
186
+ """Update Traccia span with final attributes from completed Agents span."""
187
+ try:
188
+ span_type = getattr(span_data, "type", None)
189
+
190
+ if span_type == "generation":
191
+ # Add usage info
192
+ usage = getattr(span_data, "usage", None)
193
+ if usage and isinstance(usage, dict):
194
+ input_tokens = usage.get("input_tokens")
195
+ output_tokens = usage.get("output_tokens")
196
+ if input_tokens is not None:
197
+ traccia_span.set_attribute("llm.usage.input_tokens", input_tokens)
198
+ traccia_span.set_attribute("llm.usage.prompt_tokens", input_tokens)
199
+ if output_tokens is not None:
200
+ traccia_span.set_attribute("llm.usage.output_tokens", output_tokens)
201
+ traccia_span.set_attribute("llm.usage.completion_tokens", output_tokens)
202
+ if input_tokens is not None and output_tokens is not None:
203
+ traccia_span.set_attribute("llm.usage.total_tokens", input_tokens + output_tokens)
204
+
205
+ # Add truncated input/output for observability
206
+ input_data = getattr(span_data, "input", None)
207
+ if input_data:
208
+ try:
209
+ input_str = json.dumps(input_data)[:1000]
210
+ traccia_span.set_attribute("llm.input", input_str)
211
+ except:
212
+ traccia_span.set_attribute("llm.input", str(input_data)[:1000])
213
+
214
+ output_data = getattr(span_data, "output", None)
215
+ if output_data:
216
+ try:
217
+ output_str = json.dumps(output_data)[:1000]
218
+ traccia_span.set_attribute("llm.output", output_str)
219
+ except:
220
+ traccia_span.set_attribute("llm.output", str(output_data)[:1000])
221
+
222
+ elif span_type == "function":
223
+ # Add function input/output
224
+ func_input = getattr(span_data, "input", None)
225
+ if func_input:
226
+ traccia_span.set_attribute("agent.tool.input", str(func_input)[:500])
227
+
228
+ func_output = getattr(span_data, "output", None)
229
+ if func_output:
230
+ traccia_span.set_attribute("agent.tool.output", str(func_output)[:500])
231
+
232
+ mcp_data = getattr(span_data, "mcp_data", None)
233
+ if mcp_data:
234
+ traccia_span.set_attribute("agent.tool.mcp", json.dumps(mcp_data)[:500])
235
+
236
+ elif span_type == "guardrail":
237
+ triggered = getattr(span_data, "triggered", False)
238
+ traccia_span.set_attribute("agent.guardrail.triggered", triggered)
239
+
240
+ elif span_type == "response":
241
+ response = getattr(span_data, "response", None)
242
+ if response:
243
+ response_id = getattr(response, "id", None)
244
+ if response_id:
245
+ traccia_span.set_attribute("agent.response.id", response_id)
246
+
247
+ except Exception:
248
+ # Don't break tracing on attribute errors
249
+ pass
250
+
251
+ def shutdown(self) -> None:
252
+ """Shutdown the processor."""
253
+ try:
254
+ self._trace_map.clear()
255
+ self._span_map.clear()
256
+ except Exception:
257
+ pass
258
+
259
+ def force_flush(self) -> None:
260
+ """Force flush any queued spans."""
261
+ # Traccia handles flushing at the provider level
262
+ pass
@@ -0,0 +1,58 @@
1
+ """Pricing configuration fetcher with optional env override.
2
+
3
+ Pricing should be treated as configuration, not source code: vendors update
4
+ prices and model versions frequently. The SDK therefore supports:
5
+ - defaults (stub)
6
+ - env override: AGENT_DASHBOARD_PRICING_JSON
7
+ - direct override via start_tracing(pricing_override=...)
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import os
14
+ from typing import Dict, Literal, Tuple
15
+
16
+ from traccia.processors.cost_engine import DEFAULT_PRICING
17
+
18
+
19
+ def fetch_remote_pricing() -> Dict[str, Dict[str, float]]:
20
+ """
21
+ Placeholder for remote pricing sync.
22
+ In production this would fetch from backend service; here we return defaults.
23
+ """
24
+ return DEFAULT_PRICING.copy()
25
+
26
+
27
+ PricingSource = Literal["default", "env", "override"]
28
+
29
+
30
+ def load_pricing_with_source(
31
+ override: Dict[str, Dict[str, float]] | None = None,
32
+ ) -> Tuple[Dict[str, Dict[str, float]], PricingSource]:
33
+ """
34
+ Return (pricing_table, source_of_latest_override).
35
+ """
36
+ pricing = fetch_remote_pricing()
37
+ source: PricingSource = "default"
38
+
39
+ env_override = os.getenv("AGENT_DASHBOARD_PRICING_JSON")
40
+ if env_override:
41
+ try:
42
+ env_pricing = json.loads(env_override)
43
+ if isinstance(env_pricing, dict):
44
+ pricing.update(env_pricing)
45
+ source = "env"
46
+ except Exception:
47
+ pass
48
+ if override:
49
+ pricing.update(override)
50
+ source = "override"
51
+ return pricing, source
52
+
53
+
54
+ def load_pricing(override: Dict[str, Dict[str, float]] | None = None) -> Dict[str, Dict[str, float]]:
55
+ """Backward-compatible helper returning only the pricing table."""
56
+ pricing, _ = load_pricing_with_source(override)
57
+ return pricing
58
+
@@ -0,0 +1,35 @@
1
+ """Span processors and supporting utilities."""
2
+
3
+ from traccia.processors.batch_processor import BatchSpanProcessor
4
+ from traccia.processors.drop_policy import (
5
+ DEFAULT_DROP_POLICY,
6
+ DropNewestPolicy,
7
+ DropOldestPolicy,
8
+ DropPolicy,
9
+ )
10
+ from traccia.processors.sampler import Sampler, SamplingResult
11
+ from traccia.processors.token_counter import TokenCountingProcessor, estimate_tokens_from_text
12
+ from traccia.processors.cost_engine import compute_cost, DEFAULT_PRICING
13
+ from traccia.processors.cost_processor import CostAnnotatingProcessor
14
+ from traccia.processors.logging_processor import LoggingSpanProcessor
15
+ from traccia.processors.agent_enricher import AgentEnrichmentProcessor
16
+ from traccia.processors.rate_limiter import RateLimiter, RateLimitingSpanProcessor
17
+
18
+ __all__ = [
19
+ "BatchSpanProcessor",
20
+ "DropPolicy",
21
+ "DropOldestPolicy",
22
+ "DropNewestPolicy",
23
+ "DEFAULT_DROP_POLICY",
24
+ "Sampler",
25
+ "SamplingResult",
26
+ "TokenCountingProcessor",
27
+ "estimate_tokens_from_text",
28
+ "compute_cost",
29
+ "DEFAULT_PRICING",
30
+ "CostAnnotatingProcessor",
31
+ "LoggingSpanProcessor",
32
+ "AgentEnrichmentProcessor",
33
+ "RateLimiter",
34
+ "RateLimitingSpanProcessor",
35
+ ]
@@ -0,0 +1,159 @@
1
+ """Span processor that enriches spans with agent metadata and cost."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from typing import Any, Dict, Optional
8
+
9
+ from traccia.processors.cost_engine import compute_cost
10
+ from traccia.tracer.provider import SpanProcessor
11
+
12
+
13
+ def _load_agent_catalog(path: Optional[str]) -> Dict[str, Dict[str, Any]]:
14
+ """
15
+ Load agent metadata from a JSON file.
16
+ Supports:
17
+ { "agents": [ { "id": "...", "name": "...", ... } ] }
18
+ or { "agent-id": { "name": "...", ... }, ... }
19
+ """
20
+ if not path:
21
+ return {}
22
+ if not os.path.exists(path):
23
+ return {}
24
+ try:
25
+ with open(path, "r", encoding="utf-8") as f:
26
+ data = json.load(f)
27
+ except Exception:
28
+ return {}
29
+ if isinstance(data, dict) and "agents" in data and isinstance(data["agents"], list):
30
+ out = {}
31
+ for agent in data["agents"]:
32
+ if not isinstance(agent, dict):
33
+ continue
34
+ aid = agent.get("id")
35
+ if aid:
36
+ out[str(aid)] = agent
37
+ return out
38
+ if isinstance(data, dict):
39
+ return {str(k): v for k, v in data.items() if isinstance(v, dict)}
40
+ return {}
41
+
42
+
43
+ class AgentEnrichmentProcessor(SpanProcessor):
44
+ """
45
+ Enrich spans with agent metadata (id/name/env/owner/team/org) and compute llm.cost.usd if missing.
46
+ Static metadata can come from:
47
+ - span attributes (preferred)
48
+ - environment variables (AGENT_DASHBOARD_AGENT_ID/NAME/ENV/OWNER/TEAM/ORG_ID/SUB_ORG_ID/DESCRIPTION)
49
+ - JSON config file pointed by AGENT_DASHBOARD_AGENT_CONFIG
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ *,
55
+ agent_config_path: Optional[str] = None,
56
+ default_agent_id: Optional[str] = None,
57
+ default_env: str = "production",
58
+ ) -> None:
59
+ self.default_agent_id = default_agent_id or os.getenv("AGENT_DASHBOARD_AGENT_ID")
60
+ self.default_env = os.getenv("AGENT_DASHBOARD_ENV") or default_env
61
+ self.default_name = os.getenv("AGENT_DASHBOARD_AGENT_NAME")
62
+ self.default_type = os.getenv("AGENT_DASHBOARD_AGENT_TYPE")
63
+ self.default_owner = os.getenv("AGENT_DASHBOARD_AGENT_OWNER")
64
+ self.default_team = os.getenv("AGENT_DASHBOARD_AGENT_TEAM")
65
+ self.default_org = os.getenv("AGENT_DASHBOARD_ORG_ID")
66
+ self.default_sub_org = os.getenv("AGENT_DASHBOARD_SUB_ORG_ID")
67
+ self.default_description = os.getenv("AGENT_DASHBOARD_AGENT_DESCRIPTION")
68
+ cfg_path = (
69
+ agent_config_path
70
+ or os.getenv("AGENT_DASHBOARD_AGENT_CONFIG")
71
+ or "agent_config.json"
72
+ )
73
+ self.catalog = _load_agent_catalog(cfg_path)
74
+ # If only one agent is declared, remember it for convenient fallback.
75
+ self.single_agent_id: Optional[str] = None
76
+ if len(self.catalog) == 1:
77
+ self.single_agent_id = next(iter(self.catalog.keys()))
78
+
79
+ def on_end(self, span) -> None:
80
+ attrs = span.attributes
81
+ # Resolve agent id
82
+ agent_id = (
83
+ attrs.get("agent.id")
84
+ or attrs.get("agent")
85
+ or self.default_agent_id
86
+ )
87
+ # Try using tracer instrumentation scope as a fallback id
88
+ if not agent_id and getattr(span, "tracer", None) is not None:
89
+ agent_id = getattr(span.tracer, "instrumentation_scope", None)
90
+ # If not found in attributes/env/scope, and only one agent exists in catalog, use it
91
+ if not agent_id and self.single_agent_id:
92
+ agent_id = self.single_agent_id
93
+ # If still missing, skip enrichment
94
+ if not agent_id:
95
+ return
96
+
97
+ # Look up static metadata
98
+ meta = self.catalog.get(agent_id, {})
99
+ # If the resolved id is not in catalog but we have a single agent defined, use that entry
100
+ if not meta and self.single_agent_id:
101
+ agent_id = self.single_agent_id
102
+ meta = self.catalog.get(agent_id, {})
103
+
104
+ def set_if_missing(key: str, value: Any) -> None:
105
+ if value is None:
106
+ return
107
+ if key not in attrs or attrs.get(key) in (None, ""):
108
+ attrs[key] = value
109
+
110
+ attrs["agent.id"] = agent_id
111
+ set_if_missing("agent.name", meta.get("name") or self.default_name or agent_id)
112
+ set_if_missing("agent.type", meta.get("type") or self.default_type or "workflow")
113
+ set_if_missing("agent.description", meta.get("description") or self.default_description or "")
114
+ set_if_missing("owner", meta.get("owner") or self.default_owner)
115
+ set_if_missing("team", meta.get("team") or self.default_team)
116
+ set_if_missing("org.id", meta.get("org_id") or self.default_org)
117
+ set_if_missing("sub_org.id", meta.get("sub_org_id") or self.default_sub_org)
118
+
119
+ # Environment
120
+ set_if_missing("env", meta.get("env") or self.default_env)
121
+ set_if_missing("environment", meta.get("env") or self.default_env)
122
+
123
+ # Consumers (store as list)
124
+ consumers = meta.get("consuming_teams")
125
+ if consumers and "agent.consuming_teams" not in attrs:
126
+ attrs["agent.consuming_teams"] = consumers
127
+
128
+ # Cost: fill llm.cost.usd if we have tokens + model
129
+ if "llm.cost.usd" not in attrs:
130
+ model = attrs.get("llm.model")
131
+ prompt_tokens = attrs.get("llm.usage.prompt_tokens") or 0
132
+ completion_tokens = attrs.get("llm.usage.completion_tokens") or 0
133
+ if model and (prompt_tokens or completion_tokens):
134
+ try:
135
+ cost = compute_cost(
136
+ model=model,
137
+ prompt_tokens=int(prompt_tokens or 0),
138
+ completion_tokens=int(completion_tokens or 0),
139
+ )
140
+ if cost is not None:
141
+ attrs["llm.cost.usd"] = cost
142
+ except Exception:
143
+ pass
144
+
145
+ # Span type inference if missing
146
+ if "span.type" not in attrs and "type" not in attrs:
147
+ span_type = None
148
+ if attrs.get("llm.model"):
149
+ span_type = "LLM"
150
+ elif attrs.get("tool.name") or attrs.get("tool") or attrs.get("http.url"):
151
+ span_type = "TOOL"
152
+ if span_type:
153
+ attrs["span.type"] = span_type
154
+
155
+ def shutdown(self) -> None:
156
+ return
157
+
158
+ def force_flush(self, timeout: Optional[float] = None) -> None:
159
+ return
@@ -0,0 +1,140 @@
1
+ """Batching span processor with bounded queue and background flush."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ import time
7
+ from collections import deque
8
+ from typing import Deque, Iterable, List, Optional
9
+
10
+ from traccia.processors.drop_policy import DEFAULT_DROP_POLICY, DropPolicy
11
+ from traccia.processors.sampler import Sampler
12
+ from traccia.tracer.provider import SpanProcessor
13
+ from traccia.tracer.span import Span
14
+
15
+
16
+ class BatchSpanProcessor(SpanProcessor):
17
+ """
18
+ Batch span processor that queues spans for export.
19
+
20
+ Note: This runs as an enrichment processor (before span.end()),
21
+ but it queues spans and exports them after they end.
22
+ When exporting, it extracts ReadableSpan from the OTel span.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ exporter=None,
28
+ *,
29
+ max_queue_size: int = 5000,
30
+ max_export_batch_size: int = 512,
31
+ schedule_delay_millis: int = 5000,
32
+ drop_policy: Optional[DropPolicy] = None,
33
+ sampler: Optional[Sampler] = None,
34
+ ) -> None:
35
+ self.exporter = exporter
36
+ self.max_queue_size = max_queue_size
37
+ self.max_export_batch_size = max_export_batch_size
38
+ self.schedule_delay = schedule_delay_millis / 1000.0
39
+ self.drop_policy = drop_policy or DEFAULT_DROP_POLICY
40
+ self.sampler = sampler
41
+
42
+ self._queue: Deque[Span] = deque()
43
+ self._lock = threading.Lock()
44
+ self._event = threading.Event()
45
+ self._shutdown = False
46
+ self._worker = threading.Thread(target=self._worker_loop, daemon=True)
47
+ self._worker.start()
48
+
49
+ def on_end(self, span: Span) -> None:
50
+ """
51
+ Called when a span ends (BEFORE span.end() is called).
52
+
53
+ We queue the span here, but it hasn't ended yet.
54
+ The span will end after enrichment processors run.
55
+
56
+ Note: We mark the span as queued to prevent double-queuing.
57
+ """
58
+ if self._shutdown:
59
+ return
60
+
61
+ # Head-based sampling is recorded on SpanContext.trace_flags.
62
+ # If a sampler is configured, traces marked as not-sampled (0) are dropped.
63
+ if self.sampler and getattr(span.context, "trace_flags", 1) == 0:
64
+ return
65
+
66
+ # Prevent double-queuing (span might be queued multiple times if on_end is called multiple times)
67
+ if hasattr(span, '_batch_queued') and span._batch_queued:
68
+ return
69
+
70
+ with self._lock:
71
+ enqueued = self.drop_policy.handle(self._queue, span, self.max_queue_size)
72
+ if enqueued:
73
+ span._batch_queued = True # Mark as queued
74
+ self._event.set()
75
+
76
+ def force_flush(self, timeout: Optional[float] = None) -> None:
77
+ """Force flush any pending spans."""
78
+ deadline = time.time() + timeout if timeout else None
79
+ while True:
80
+ flushed_any = self._flush_once()
81
+ if not flushed_any:
82
+ return
83
+ if deadline and time.time() >= deadline:
84
+ return
85
+
86
+ def shutdown(self) -> None:
87
+ """Shutdown the processor."""
88
+ self._shutdown = True
89
+ self._event.set()
90
+ self._worker.join(timeout=self.schedule_delay * 2)
91
+ self.force_flush()
92
+
93
+ # Internal
94
+ def _worker_loop(self) -> None:
95
+ """Background worker that periodically flushes spans."""
96
+ while not self._shutdown:
97
+ self._event.wait(timeout=self.schedule_delay)
98
+ self._event.clear()
99
+ self._flush_once()
100
+
101
+ def _flush_once(self) -> bool:
102
+ """Flush one batch of spans."""
103
+ spans = self._drain_queue(self.max_export_batch_size)
104
+ if not spans:
105
+ return False
106
+ self._export(spans)
107
+ return True
108
+
109
+ def _drain_queue(self, limit: int) -> List[Span]:
110
+ """Drain spans from queue up to limit."""
111
+ items: List[Span] = []
112
+ with self._lock:
113
+ while self._queue and len(items) < limit:
114
+ items.append(self._queue.popleft())
115
+ return items
116
+
117
+ def _export(self, spans: Iterable[Span]) -> None:
118
+ """
119
+ Export spans to exporter.
120
+
121
+ Spans should be ended by the time they're flushed from the queue.
122
+ We filter out any spans that aren't ended yet.
123
+ """
124
+ if self.exporter is None:
125
+ return
126
+
127
+ try:
128
+ # Filter to only ended spans
129
+ ended_spans = [span for span in spans if span._ended]
130
+
131
+ if not ended_spans:
132
+ return
133
+
134
+ # Export spans - exporter will handle conversion if needed
135
+ self.exporter.export(ended_spans)
136
+ except Exception as e:
137
+ # Export errors are swallowed; resilience over strictness.
138
+ import traceback
139
+ traceback.print_exc()
140
+ return