splunk-otel-util-genai 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. opentelemetry/util/genai/__init__.py +17 -0
  2. opentelemetry/util/genai/_fsspec_upload/__init__.py +39 -0
  3. opentelemetry/util/genai/_fsspec_upload/fsspec_hook.py +184 -0
  4. opentelemetry/util/genai/attributes.py +60 -0
  5. opentelemetry/util/genai/callbacks.py +24 -0
  6. opentelemetry/util/genai/config.py +184 -0
  7. opentelemetry/util/genai/debug.py +183 -0
  8. opentelemetry/util/genai/emitters/__init__.py +25 -0
  9. opentelemetry/util/genai/emitters/composite.py +186 -0
  10. opentelemetry/util/genai/emitters/configuration.py +324 -0
  11. opentelemetry/util/genai/emitters/content_events.py +153 -0
  12. opentelemetry/util/genai/emitters/evaluation.py +519 -0
  13. opentelemetry/util/genai/emitters/metrics.py +308 -0
  14. opentelemetry/util/genai/emitters/span.py +774 -0
  15. opentelemetry/util/genai/emitters/spec.py +48 -0
  16. opentelemetry/util/genai/emitters/utils.py +961 -0
  17. opentelemetry/util/genai/environment_variables.py +200 -0
  18. opentelemetry/util/genai/handler.py +1002 -0
  19. opentelemetry/util/genai/instruments.py +44 -0
  20. opentelemetry/util/genai/interfaces.py +58 -0
  21. opentelemetry/util/genai/plugins.py +114 -0
  22. opentelemetry/util/genai/span_context.py +80 -0
  23. opentelemetry/util/genai/types.py +440 -0
  24. opentelemetry/util/genai/upload_hook.py +119 -0
  25. opentelemetry/util/genai/utils.py +182 -0
  26. opentelemetry/util/genai/version.py +15 -0
  27. splunk_otel_util_genai-0.1.3.dist-info/METADATA +70 -0
  28. splunk_otel_util_genai-0.1.3.dist-info/RECORD +31 -0
  29. splunk_otel_util_genai-0.1.3.dist-info/WHEEL +4 -0
  30. splunk_otel_util_genai-0.1.3.dist-info/entry_points.txt +5 -0
  31. splunk_otel_util_genai-0.1.3.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,1002 @@
1
+ # Copyright The OpenTelemetry Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Telemetry handler for GenAI invocations.
17
+
18
+ This module exposes the `TelemetryHandler` class, which manages the lifecycle of
19
+ GenAI (Generative AI) invocations and emits telemetry data (spans and related attributes).
20
+ It supports starting, stopping, and failing LLM invocations.
21
+
22
+ Classes:
23
+ - TelemetryHandler: Manages GenAI invocation lifecycles and emits telemetry.
24
+
25
+ Functions:
26
+ - get_telemetry_handler: Returns a singleton `TelemetryHandler` instance.
27
+
28
+ Usage:
29
+ handler = get_telemetry_handler()
30
+
31
+ # Create an invocation object with your request data
32
+ invocation = LLMInvocation(
33
+ request_model="my-model",
34
+ input_messages=[...],
35
+ provider="my-provider",
36
+ attributes={"custom": "attr"},
37
+ )
38
+
39
+ # Start the invocation (opens a span)
40
+ handler.start_llm(invocation)
41
+
42
+ # Populate outputs and any additional attributes, then stop (closes the span)
43
+ invocation.output_messages = [...]
44
+ invocation.attributes.update({"more": "attrs"})
45
+ handler.stop_llm(invocation)
46
+
47
+ # Or, in case of error
48
+ # handler.fail_llm(invocation, Error(type="...", message="..."))
49
+ """
50
+
51
+ import logging
52
+ import os
53
+ import time
54
+ from typing import Any, Optional
55
+
56
+ try:
57
+ from opentelemetry.util.genai.debug import genai_debug_log
58
+ except Exception: # pragma: no cover - fallback if debug module missing
59
+
60
+ def genai_debug_log(*_args: Any, **_kwargs: Any) -> None: # type: ignore
61
+ return None
62
+
63
+
64
+ from opentelemetry import _events as _otel_events
65
+ from opentelemetry import trace as _trace_mod
66
+ from opentelemetry._logs import Logger, LoggerProvider, get_logger
67
+ from opentelemetry.metrics import MeterProvider, get_meter
68
+ from opentelemetry.sdk.trace.sampling import Decision, TraceIdRatioBased
69
+ from opentelemetry.semconv.attributes import (
70
+ error_attributes as ErrorAttributes,
71
+ )
72
+ from opentelemetry.semconv.schemas import Schemas
73
+ from opentelemetry.trace import (
74
+ TracerProvider,
75
+ get_tracer,
76
+ )
77
+ from opentelemetry.util.genai.emitters.configuration import (
78
+ build_emitter_pipeline,
79
+ )
80
+ from opentelemetry.util.genai.span_context import (
81
+ extract_span_context,
82
+ span_context_hex_ids,
83
+ store_span_context,
84
+ )
85
+ from opentelemetry.util.genai.types import (
86
+ AgentCreation,
87
+ AgentInvocation,
88
+ ContentCapturingMode,
89
+ EmbeddingInvocation,
90
+ Error,
91
+ EvaluationResult,
92
+ GenAI,
93
+ LLMInvocation,
94
+ Step,
95
+ ToolCall,
96
+ Workflow,
97
+ )
98
+ from opentelemetry.util.genai.utils import (
99
+ get_content_capturing_mode,
100
+ is_truthy_env,
101
+ load_completion_callbacks,
102
+ parse_callback_filter,
103
+ )
104
+ from opentelemetry.util.genai.version import __version__
105
+
106
+ from .callbacks import CompletionCallback
107
+ from .config import parse_env
108
+ from .environment_variables import (
109
+ OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
110
+ OTEL_INSTRUMENTATION_GENAI_COMPLETION_CALLBACKS,
111
+ OTEL_INSTRUMENTATION_GENAI_DISABLE_DEFAULT_COMPLETION_CALLBACKS,
112
+ )
113
+
114
+ _LOGGER = logging.getLogger(__name__)
115
+
116
+ _TRUTHY_VALUES = {"1", "true", "yes", "on"}
117
+
118
+
119
+ class TelemetryHandler:
120
+ """
121
+ High-level handler managing GenAI invocation lifecycles and emitting
122
+ them as spans, metrics, and events. Evaluation execution & emission is
123
+ delegated to EvaluationManager for extensibility (mirrors emitter design).
124
+ """
125
+
126
+ def __init__(
127
+ self,
128
+ tracer_provider: TracerProvider | None = None,
129
+ logger_provider: LoggerProvider | None = None,
130
+ meter_provider: MeterProvider | None = None,
131
+ ):
132
+ self._tracer = get_tracer(
133
+ __name__,
134
+ __version__,
135
+ tracer_provider,
136
+ schema_url=Schemas.V1_37_0.value,
137
+ )
138
+
139
+ # Logger for content events (uses Logs API, not Events API)
140
+ self._content_logger: Logger = get_logger(
141
+ __name__,
142
+ __version__,
143
+ logger_provider=logger_provider,
144
+ schema_url=Schemas.V1_37_0.value,
145
+ )
146
+ self._meter_provider = meter_provider
147
+ meter = get_meter(
148
+ __name__,
149
+ __version__,
150
+ meter_provider=meter_provider,
151
+ schema_url=Schemas.V1_37_0.value,
152
+ )
153
+
154
+ self._event_logger = _otel_events.get_event_logger(__name__)
155
+
156
+ settings = parse_env()
157
+
158
+ evaluation_sample_rate = settings.evaluation_sample_rate
159
+ self._sampler = TraceIdRatioBased(evaluation_sample_rate)
160
+
161
+ # Fixed canonical evaluation histograms (no longer dynamic):
162
+ # gen_ai.evaluation.(relevance|hallucination|sentiment|toxicity|bias)
163
+ self._evaluation_histograms: dict[str, Any] = {}
164
+
165
+ _CANONICAL_METRICS = {
166
+ "relevance",
167
+ "hallucination",
168
+ "sentiment",
169
+ "toxicity",
170
+ "bias",
171
+ }
172
+
173
+ def _get_eval_histogram(canonical_name: str):
174
+ name = canonical_name.strip().lower()
175
+ if name not in _CANONICAL_METRICS:
176
+ return None # ignore unknown metrics (no emission)
177
+ full_name = f"gen_ai.evaluation.{name}"
178
+ hist = self._evaluation_histograms.get(full_name)
179
+ if hist is not None:
180
+ return hist
181
+ try:
182
+ hist = meter.create_histogram(
183
+ name=full_name,
184
+ unit="1",
185
+ description=f"GenAI evaluation metric '{name}' (0-1 score where applicable)",
186
+ )
187
+ self._evaluation_histograms[full_name] = hist
188
+ except Exception: # pragma: no cover - defensive
189
+ return None
190
+ return hist
191
+
192
+ self._get_eval_histogram = _get_eval_histogram # type: ignore[attr-defined]
193
+
194
+ self._completion_callbacks: list[CompletionCallback] = []
195
+ composite, capture_control = build_emitter_pipeline(
196
+ tracer=self._tracer,
197
+ meter=meter,
198
+ event_logger=self._event_logger,
199
+ content_logger=self._content_logger,
200
+ evaluation_histogram=self._get_eval_histogram,
201
+ settings=settings,
202
+ )
203
+ self._emitter = composite
204
+ self._capture_control = capture_control
205
+ self._evaluation_manager = None
206
+ # Active agent identity stack (name, id) for implicit propagation to nested operations
207
+ self._agent_context_stack: list[tuple[str, str]] = []
208
+ # Span registry (run_id -> Span) to allow parenting even after original invocation ended.
209
+ # We intentionally retain ended parent spans to preserve trace linkage for late children
210
+ # (e.g., final LLM call after agent/workflow termination). A lightweight size cap can be
211
+ # added later if memory pressure surfaces.
212
+ self._span_registry: dict[str, _trace_mod.Span] = {}
213
+ # Generic entity registry (run_id -> entity object) allowing instrumentation
214
+ # layers to avoid storing lifecycle objects. This supports simplified
215
+ # instrumentations that only pass run_id on end/error callbacks.
216
+ self._entity_registry: dict[str, GenAI] = {}
217
+ self._initialize_default_callbacks()
218
+
219
+ def _should_sample_for_evaluation(self, trace_id: Optional[int]) -> bool:
220
+ try:
221
+ if trace_id:
222
+ sampling_result = self._sampler.should_sample(
223
+ trace_id=trace_id,
224
+ parent_context=None,
225
+ name="",
226
+ )
227
+ if (
228
+ sampling_result
229
+ and sampling_result.decision is Decision.RECORD_AND_SAMPLE
230
+ ):
231
+ return True
232
+ else:
233
+ return False
234
+ else: # TODO remove else branch when trace_id is set on all invocations
235
+ _LOGGER.debug(
236
+ "Trace based sampling not applied as trace id is not set.",
237
+ exc_info=True,
238
+ )
239
+ return True
240
+ except Exception:
241
+ _LOGGER.debug("Sampler raised an exception", exc_info=True)
242
+ return True
243
+
244
+ def _refresh_capture_content(
245
+ self,
246
+ ): # re-evaluate env each start in case singleton created before patching
247
+ try:
248
+ mode = get_content_capturing_mode()
249
+ emitters = list(
250
+ self._emitter.iter_emitters(("span", "content_events"))
251
+ )
252
+ # Determine new values for span-like emitters
253
+ new_value_span = mode in (
254
+ ContentCapturingMode.SPAN_ONLY,
255
+ ContentCapturingMode.SPAN_AND_EVENT,
256
+ )
257
+ control = getattr(self, "_capture_control", None)
258
+ span_capture_allowed = True
259
+ if control is not None:
260
+ span_capture_allowed = control.span_allowed
261
+ if is_truthy_env(
262
+ os.environ.get(
263
+ OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT
264
+ )
265
+ ):
266
+ span_capture_allowed = True
267
+ # Respect the content capture mode for all generator kinds
268
+ new_value_events = mode in (
269
+ ContentCapturingMode.EVENT_ONLY,
270
+ ContentCapturingMode.SPAN_AND_EVENT,
271
+ )
272
+ for em in emitters:
273
+ role = getattr(em, "role", None)
274
+ if role == "content_event" and hasattr(em, "_capture_content"):
275
+ try:
276
+ em._capture_content = new_value_events # type: ignore[attr-defined]
277
+ except Exception:
278
+ pass
279
+ elif role in ("span", "traceloop_compat") and hasattr(
280
+ em, "set_capture_content"
281
+ ):
282
+ try:
283
+ desired_span = new_value_span and span_capture_allowed
284
+ if role == "traceloop_compat":
285
+ desired = desired_span or new_value_events
286
+ else:
287
+ desired = desired_span
288
+ em.set_capture_content(desired) # type: ignore[attr-defined]
289
+ except Exception:
290
+ pass
291
+ except Exception:
292
+ pass
293
+
294
+ def start_llm(
295
+ self,
296
+ invocation: LLMInvocation,
297
+ ) -> LLMInvocation:
298
+ """Start an LLM invocation and create a pending span entry."""
299
+ # Ensure capture content settings are current
300
+ self._refresh_capture_content()
301
+ genai_debug_log("handler.start_llm.begin", invocation)
302
+ # Implicit agent inheritance
303
+ if (
304
+ not invocation.agent_name or not invocation.agent_id
305
+ ) and self._agent_context_stack:
306
+ top_name, top_id = self._agent_context_stack[-1]
307
+ if not invocation.agent_name:
308
+ invocation.agent_name = top_name
309
+ if not invocation.agent_id:
310
+ invocation.agent_id = top_id
311
+ # Start invocation span; tracer context propagation handles parent/child links
312
+ self._emitter.on_start(invocation)
313
+ # Register span if created
314
+ span = getattr(invocation, "span", None)
315
+ if span is not None:
316
+ self._span_registry[str(invocation.run_id)] = span
317
+ # Register entity for later stop/fail by run_id
318
+ self._entity_registry[str(invocation.run_id)] = invocation
319
+ try:
320
+ span_context = invocation.span_context
321
+ if span_context is None and invocation.span is not None:
322
+ span_context = extract_span_context(invocation.span)
323
+ store_span_context(invocation, span_context)
324
+ trace_hex, span_hex = span_context_hex_ids(span_context)
325
+ if trace_hex and span_hex:
326
+ genai_debug_log(
327
+ "handler.start_llm.span_created",
328
+ invocation,
329
+ trace_id=trace_hex,
330
+ span_id=span_hex,
331
+ )
332
+ else:
333
+ genai_debug_log("handler.start_llm.no_span", invocation)
334
+ except Exception: # pragma: no cover
335
+ pass
336
+ return invocation
337
+
338
+ def stop_llm(self, invocation: LLMInvocation) -> LLMInvocation:
339
+ """Finalize an LLM invocation successfully and end its span."""
340
+ invocation.end_time = time.time()
341
+
342
+ # Determine if this invocation should be sampled for evaluation
343
+ invocation.sample_for_evaluation = self._should_sample_for_evaluation(
344
+ invocation.trace_id
345
+ )
346
+
347
+ self._emitter.on_end(invocation)
348
+ self._notify_completion(invocation)
349
+ self._entity_registry.pop(str(invocation.run_id), None)
350
+ try:
351
+ span_context = invocation.span_context
352
+ if span_context is None and invocation.span is not None:
353
+ span_context = extract_span_context(invocation.span)
354
+ store_span_context(invocation, span_context)
355
+ trace_hex, span_hex = span_context_hex_ids(span_context)
356
+ genai_debug_log(
357
+ "handler.stop_llm.complete",
358
+ invocation,
359
+ duration_ms=round(
360
+ (invocation.end_time - invocation.start_time) * 1000, 3
361
+ )
362
+ if invocation.end_time
363
+ else None,
364
+ trace_id=trace_hex,
365
+ span_id=span_hex,
366
+ )
367
+ except Exception: # pragma: no cover
368
+ pass
369
+ # Force flush metrics if a custom provider with force_flush is present
370
+ if (
371
+ hasattr(self, "_meter_provider")
372
+ and self._meter_provider is not None
373
+ ):
374
+ try: # pragma: no cover - defensive
375
+ self._meter_provider.force_flush() # type: ignore[attr-defined]
376
+ except Exception:
377
+ pass
378
+ return invocation
379
+
380
+ def fail_llm(
381
+ self, invocation: LLMInvocation, error: Error
382
+ ) -> LLMInvocation:
383
+ """Fail an LLM invocation and end its span with error status."""
384
+ invocation.end_time = time.time()
385
+ self._emitter.on_error(error, invocation)
386
+ self._notify_completion(invocation)
387
+ self._entity_registry.pop(str(invocation.run_id), None)
388
+ try:
389
+ span_context = invocation.span_context
390
+ if span_context is None and invocation.span is not None:
391
+ span_context = extract_span_context(invocation.span)
392
+ store_span_context(invocation, span_context)
393
+ trace_hex, span_hex = span_context_hex_ids(span_context)
394
+ genai_debug_log(
395
+ "handler.fail_llm.error",
396
+ invocation,
397
+ error_type=getattr(error, "type", None),
398
+ error_message=getattr(error, "message", None),
399
+ trace_id=trace_hex,
400
+ span_id=span_hex,
401
+ )
402
+ except Exception: # pragma: no cover
403
+ pass
404
+ if (
405
+ hasattr(self, "_meter_provider")
406
+ and self._meter_provider is not None
407
+ ):
408
+ try: # pragma: no cover
409
+ self._meter_provider.force_flush() # type: ignore[attr-defined]
410
+ except Exception:
411
+ pass
412
+ return invocation
413
+
414
+ def start_embedding(
415
+ self, invocation: EmbeddingInvocation
416
+ ) -> EmbeddingInvocation:
417
+ """Start an embedding invocation and create a pending span entry."""
418
+ self._refresh_capture_content()
419
+ if (
420
+ not invocation.agent_name or not invocation.agent_id
421
+ ) and self._agent_context_stack:
422
+ top_name, top_id = self._agent_context_stack[-1]
423
+ if not invocation.agent_name:
424
+ invocation.agent_name = top_name
425
+ if not invocation.agent_id:
426
+ invocation.agent_id = top_id
427
+ invocation.start_time = time.time()
428
+ self._emitter.on_start(invocation)
429
+ span = getattr(invocation, "span", None)
430
+ if span is not None:
431
+ self._span_registry[str(invocation.run_id)] = span
432
+ self._entity_registry[str(invocation.run_id)] = invocation
433
+ return invocation
434
+
435
+ def stop_embedding(
436
+ self, invocation: EmbeddingInvocation
437
+ ) -> EmbeddingInvocation:
438
+ """Finalize an embedding invocation successfully and end its span."""
439
+ invocation.end_time = time.time()
440
+
441
+ # Determine if this invocation should be sampled for evaluation
442
+ invocation.sample_for_evaluation = self._should_sample_for_evaluation(
443
+ invocation.trace_id
444
+ )
445
+
446
+ self._emitter.on_end(invocation)
447
+ self._notify_completion(invocation)
448
+ self._entity_registry.pop(str(invocation.run_id), None)
449
+ # Force flush metrics if a custom provider with force_flush is present
450
+ if (
451
+ hasattr(self, "_meter_provider")
452
+ and self._meter_provider is not None
453
+ ):
454
+ try: # pragma: no cover
455
+ self._meter_provider.force_flush() # type: ignore[attr-defined]
456
+ except Exception:
457
+ pass
458
+ return invocation
459
+
460
+ def fail_embedding(
461
+ self, invocation: EmbeddingInvocation, error: Error
462
+ ) -> EmbeddingInvocation:
463
+ """Fail an embedding invocation and end its span with error status."""
464
+ invocation.end_time = time.time()
465
+ self._emitter.on_error(error, invocation)
466
+ self._notify_completion(invocation)
467
+ self._entity_registry.pop(str(invocation.run_id), None)
468
+ if (
469
+ hasattr(self, "_meter_provider")
470
+ and self._meter_provider is not None
471
+ ):
472
+ try: # pragma: no cover
473
+ self._meter_provider.force_flush() # type: ignore[attr-defined]
474
+ except Exception:
475
+ pass
476
+ return invocation
477
+
478
+ # ToolCall lifecycle --------------------------------------------------
479
+ def start_tool_call(self, invocation: ToolCall) -> ToolCall:
480
+ """Start a tool call invocation and create a pending span entry."""
481
+ if (
482
+ not invocation.agent_name or not invocation.agent_id
483
+ ) and self._agent_context_stack:
484
+ top_name, top_id = self._agent_context_stack[-1]
485
+ if not invocation.agent_name:
486
+ invocation.agent_name = top_name
487
+ if not invocation.agent_id:
488
+ invocation.agent_id = top_id
489
+ self._emitter.on_start(invocation)
490
+ span = getattr(invocation, "span", None)
491
+ if span is not None:
492
+ self._span_registry[str(invocation.run_id)] = span
493
+ self._entity_registry[str(invocation.run_id)] = invocation
494
+ return invocation
495
+
496
+ def stop_tool_call(self, invocation: ToolCall) -> ToolCall:
497
+ """Finalize a tool call invocation successfully and end its span."""
498
+ invocation.end_time = time.time()
499
+
500
+ # Determine if this invocation should be sampled for evaluation
501
+ invocation.sample_for_evaluation = self._should_sample_for_evaluation(
502
+ invocation.trace_id
503
+ )
504
+
505
+ self._emitter.on_end(invocation)
506
+ self._notify_completion(invocation)
507
+ self._entity_registry.pop(str(invocation.run_id), None)
508
+ return invocation
509
+
510
+ def fail_tool_call(self, invocation: ToolCall, error: Error) -> ToolCall:
511
+ """Fail a tool call invocation and end its span with error status."""
512
+ invocation.end_time = time.time()
513
+ self._emitter.on_error(error, invocation)
514
+ self._notify_completion(invocation)
515
+ self._entity_registry.pop(str(invocation.run_id), None)
516
+ return invocation
517
+
518
+ # Workflow lifecycle --------------------------------------------------
519
+ def start_workflow(self, workflow: Workflow) -> Workflow:
520
+ """Start a workflow and create a pending span entry."""
521
+ self._refresh_capture_content()
522
+ self._emitter.on_start(workflow)
523
+ span = getattr(workflow, "span", None)
524
+ if span is not None:
525
+ self._span_registry[str(workflow.run_id)] = span
526
+ self._entity_registry[str(workflow.run_id)] = workflow
527
+ return workflow
528
+
529
+ def _handle_evaluation_results(
530
+ self, invocation: GenAI, results: list[EvaluationResult]
531
+ ) -> None:
532
+ if not results:
533
+ return
534
+ try:
535
+ self._emitter.on_evaluation_results(results, invocation)
536
+ except Exception: # pragma: no cover - defensive
537
+ pass
538
+
539
+ def evaluation_results(
540
+ self, invocation: GenAI, results: list[EvaluationResult]
541
+ ) -> None:
542
+ """Public hook for completion callbacks to report evaluation output."""
543
+
544
+ try:
545
+ genai_debug_log(
546
+ "handler.evaluation_results.begin",
547
+ invocation,
548
+ result_count=len(results),
549
+ )
550
+ except Exception: # pragma: no cover - defensive
551
+ pass
552
+ self._handle_evaluation_results(invocation, results)
553
+ try:
554
+ genai_debug_log(
555
+ "handler.evaluation_results.end",
556
+ invocation,
557
+ result_count=len(results),
558
+ )
559
+ except Exception: # pragma: no cover - defensive
560
+ pass
561
+
562
+ def register_completion_callback(
563
+ self, callback: CompletionCallback
564
+ ) -> None:
565
+ if callback in self._completion_callbacks:
566
+ return
567
+ self._completion_callbacks.append(callback)
568
+
569
+ def unregister_completion_callback(
570
+ self, callback: CompletionCallback
571
+ ) -> None:
572
+ try:
573
+ self._completion_callbacks.remove(callback)
574
+ except ValueError:
575
+ pass
576
+
577
+ def _notify_completion(self, invocation: GenAI) -> None:
578
+ if not self._completion_callbacks:
579
+ return
580
+ callbacks = list(self._completion_callbacks)
581
+ for callback in callbacks:
582
+ try:
583
+ callback.on_completion(invocation)
584
+ except Exception: # pragma: no cover - defensive
585
+ continue
586
+
587
+ def _initialize_default_callbacks(self) -> None:
588
+ disable_defaults = is_truthy_env(
589
+ os.getenv(
590
+ OTEL_INSTRUMENTATION_GENAI_DISABLE_DEFAULT_COMPLETION_CALLBACKS
591
+ )
592
+ )
593
+ if disable_defaults:
594
+ _LOGGER.debug(
595
+ "Default completion callbacks disabled via %s",
596
+ OTEL_INSTRUMENTATION_GENAI_DISABLE_DEFAULT_COMPLETION_CALLBACKS,
597
+ )
598
+ return
599
+
600
+ selected = parse_callback_filter(
601
+ os.getenv(OTEL_INSTRUMENTATION_GENAI_COMPLETION_CALLBACKS)
602
+ )
603
+ callbacks, seen = load_completion_callbacks(selected)
604
+ if selected:
605
+ missing = selected - seen
606
+ for name in missing:
607
+ _LOGGER.debug(
608
+ "Completion callback '%s' not found in entry points",
609
+ name,
610
+ )
611
+ if not callbacks:
612
+ return
613
+
614
+ for name, callback in callbacks:
615
+ bound_ok = True
616
+ binder = getattr(callback, "bind_handler", None)
617
+ if callable(binder):
618
+ try:
619
+ bound_ok = bool(binder(self))
620
+ except Exception as exc: # pragma: no cover - defensive
621
+ _LOGGER.warning(
622
+ "Completion callback '%s' failed to bind: %s",
623
+ name,
624
+ exc,
625
+ )
626
+ shutdown = getattr(callback, "shutdown", None)
627
+ if callable(shutdown):
628
+ try:
629
+ shutdown()
630
+ except Exception: # pragma: no cover - defensive
631
+ pass
632
+ continue
633
+ if not bound_ok:
634
+ shutdown = getattr(callback, "shutdown", None)
635
+ if callable(shutdown):
636
+ try:
637
+ shutdown()
638
+ except Exception: # pragma: no cover - defensive
639
+ pass
640
+ continue
641
+ manager = getattr(callback, "manager", None)
642
+ if manager is not None:
643
+ self._evaluation_manager = manager
644
+ self.register_completion_callback(callback)
645
+
646
+ def stop_workflow(self, workflow: Workflow) -> Workflow:
647
+ """Finalize a workflow successfully and end its span."""
648
+ workflow.end_time = time.time()
649
+
650
+ # Determine if this invocation should be sampled for evaluation
651
+ workflow.sample_for_evaluation = self._should_sample_for_evaluation(
652
+ workflow.trace_id
653
+ )
654
+
655
+ self._emitter.on_end(workflow)
656
+ self._notify_completion(workflow)
657
+ self._entity_registry.pop(str(workflow.run_id), None)
658
+ if (
659
+ hasattr(self, "_meter_provider")
660
+ and self._meter_provider is not None
661
+ ):
662
+ try: # pragma: no cover
663
+ self._meter_provider.force_flush() # type: ignore[attr-defined]
664
+ except Exception:
665
+ pass
666
+ return workflow
667
+
668
+ def fail_workflow(self, workflow: Workflow, error: Error) -> Workflow:
669
+ """Fail a workflow and end its span with error status."""
670
+ workflow.end_time = time.time()
671
+ self._emitter.on_error(error, workflow)
672
+ self._notify_completion(workflow)
673
+ self._entity_registry.pop(str(workflow.run_id), None)
674
+ if (
675
+ hasattr(self, "_meter_provider")
676
+ and self._meter_provider is not None
677
+ ):
678
+ try: # pragma: no cover
679
+ self._meter_provider.force_flush() # type: ignore[attr-defined]
680
+ except Exception:
681
+ pass
682
+ return workflow
683
+
684
+ # Agent lifecycle -----------------------------------------------------
685
+ def start_agent(
686
+ self, agent: AgentCreation | AgentInvocation
687
+ ) -> AgentCreation | AgentInvocation:
688
+ """Start an agent operation (create or invoke) and create a pending span entry."""
689
+ self._refresh_capture_content()
690
+ self._emitter.on_start(agent)
691
+ span = getattr(agent, "span", None)
692
+ if span is not None:
693
+ self._span_registry[str(agent.run_id)] = span
694
+ self._entity_registry[str(agent.run_id)] = agent
695
+ # Push agent identity context (use run_id as canonical id)
696
+ if isinstance(agent, AgentInvocation):
697
+ try:
698
+ if agent.name:
699
+ self._agent_context_stack.append(
700
+ (agent.name, str(agent.run_id))
701
+ )
702
+ except Exception: # pragma: no cover - defensive
703
+ pass
704
+ return agent
705
+
706
+ def stop_agent(
707
+ self, agent: AgentCreation | AgentInvocation
708
+ ) -> AgentCreation | AgentInvocation:
709
+ """Finalize an agent operation successfully and end its span."""
710
+ agent.end_time = time.time()
711
+
712
+ # Determine if this invocation should be sampled for evaluation
713
+ agent.sample_for_evaluation = self._should_sample_for_evaluation(
714
+ agent.trace_id
715
+ )
716
+
717
+ self._emitter.on_end(agent)
718
+ self._notify_completion(agent)
719
+ self._entity_registry.pop(str(agent.run_id), None)
720
+ if (
721
+ hasattr(self, "_meter_provider")
722
+ and self._meter_provider is not None
723
+ ):
724
+ try: # pragma: no cover
725
+ self._meter_provider.force_flush() # type: ignore[attr-defined]
726
+ except Exception:
727
+ pass
728
+ # Pop context if matches top
729
+ if isinstance(agent, AgentInvocation):
730
+ try:
731
+ if self._agent_context_stack:
732
+ top_name, top_id = self._agent_context_stack[-1]
733
+ if top_name == agent.name and top_id == str(agent.run_id):
734
+ self._agent_context_stack.pop()
735
+ except Exception:
736
+ pass
737
+ return agent
738
+
739
+ def fail_agent(
740
+ self, agent: AgentCreation | AgentInvocation, error: Error
741
+ ) -> AgentCreation | AgentInvocation:
742
+ """Fail an agent operation and end its span with error status."""
743
+ agent.end_time = time.time()
744
+ self._emitter.on_error(error, agent)
745
+ self._notify_completion(agent)
746
+ self._entity_registry.pop(str(agent.run_id), None)
747
+ if (
748
+ hasattr(self, "_meter_provider")
749
+ and self._meter_provider is not None
750
+ ):
751
+ try: # pragma: no cover
752
+ self._meter_provider.force_flush() # type: ignore[attr-defined]
753
+ except Exception:
754
+ pass
755
+ # Pop context if this agent is active
756
+ if isinstance(agent, AgentInvocation):
757
+ try:
758
+ if self._agent_context_stack:
759
+ top_name, top_id = self._agent_context_stack[-1]
760
+ if top_name == agent.name and top_id == str(agent.run_id):
761
+ self._agent_context_stack.pop()
762
+ except Exception:
763
+ pass
764
+ return agent
765
+
766
+ # Step lifecycle ------------------------------------------------------
767
+ def start_step(self, step: Step) -> Step:
768
+ """Start a step and create a pending span entry."""
769
+ self._refresh_capture_content()
770
+ self._emitter.on_start(step)
771
+ span = getattr(step, "span", None)
772
+ if span is not None:
773
+ self._span_registry[str(step.run_id)] = span
774
+ self._entity_registry[str(step.run_id)] = step
775
+ return step
776
+
777
+ def stop_step(self, step: Step) -> Step:
778
+ """Finalize a step successfully and end its span."""
779
+ step.end_time = time.time()
780
+
781
+ # Determine if this invocation should be sampled for evaluation
782
+ step.sample_for_evaluation = self._should_sample_for_evaluation(
783
+ step.trace_id
784
+ )
785
+
786
+ self._emitter.on_end(step)
787
+ self._notify_completion(step)
788
+ self._entity_registry.pop(str(step.run_id), None)
789
+ if (
790
+ hasattr(self, "_meter_provider")
791
+ and self._meter_provider is not None
792
+ ):
793
+ try: # pragma: no cover
794
+ self._meter_provider.force_flush() # type: ignore[attr-defined]
795
+ except Exception:
796
+ pass
797
+ return step
798
+
799
+ def fail_step(self, step: Step, error: Error) -> Step:
800
+ """Fail a step and end its span with error status."""
801
+ step.end_time = time.time()
802
+ self._emitter.on_error(error, step)
803
+ self._notify_completion(step)
804
+ self._entity_registry.pop(str(step.run_id), None)
805
+ if (
806
+ hasattr(self, "_meter_provider")
807
+ and self._meter_provider is not None
808
+ ):
809
+ try: # pragma: no cover
810
+ self._meter_provider.force_flush() # type: ignore[attr-defined]
811
+ except Exception:
812
+ pass
813
+ return step
814
+
815
+ def evaluate_llm(
816
+ self,
817
+ invocation: LLMInvocation,
818
+ evaluators: Optional[list[str]] = None,
819
+ ) -> list[EvaluationResult]:
820
+ """Proxy to EvaluationManager for running evaluators.
821
+
822
+ Retained public signature for backward compatibility. The underlying
823
+ implementation has been refactored into EvaluationManager to allow
824
+ pluggable emission similar to emitters.
825
+ """
826
+ manager = getattr(self, "_evaluation_manager", None)
827
+ if manager is None or not manager.has_evaluators:
828
+ return []
829
+ if evaluators:
830
+ _LOGGER.warning(
831
+ "Direct evaluator overrides are ignored; using configured evaluators"
832
+ )
833
+ return manager.evaluate_now(invocation) # type: ignore[attr-defined]
834
+
835
+ def evaluate_agent(
836
+ self,
837
+ agent: AgentInvocation,
838
+ evaluators: Optional[list[str]] = None,
839
+ ) -> list[EvaluationResult]:
840
+ """Run evaluators against an AgentInvocation.
841
+
842
+ Mirrors evaluate_llm to allow explicit agent evaluation triggering.
843
+ """
844
+ if not isinstance(agent, AgentInvocation):
845
+ _LOGGER.debug(
846
+ "Skipping agent evaluation for non-invocation type: %s",
847
+ type(agent).__name__,
848
+ )
849
+ return []
850
+ manager = getattr(self, "_evaluation_manager", None)
851
+ if manager is None or not manager.has_evaluators:
852
+ return []
853
+ if evaluators:
854
+ _LOGGER.warning(
855
+ "Direct evaluator overrides are ignored; using configured evaluators"
856
+ )
857
+ return manager.evaluate_now(agent) # type: ignore[attr-defined]
858
+
859
+ def wait_for_evaluations(self, timeout: Optional[float] = None) -> None:
860
+ """Wait for all pending evaluations to complete, up to the specified timeout.
861
+
862
+ This is primarily intended for use in test scenarios to ensure that
863
+ all asynchronous evaluation steps have finished before assertions are made.
864
+ """
865
+ manager = getattr(self, "_evaluation_manager", None)
866
+ if manager is None or not manager.has_evaluators:
867
+ return
868
+ manager.wait_for_all(timeout) # type: ignore[attr-defined]
869
+
870
+ # Generic lifecycle API ------------------------------------------------
871
+ def start(self, obj: Any) -> Any:
872
+ """Generic start method for any invocation type."""
873
+ if isinstance(obj, Workflow):
874
+ return self.start_workflow(obj)
875
+ if isinstance(obj, (AgentCreation, AgentInvocation)):
876
+ return self.start_agent(obj)
877
+ if isinstance(obj, Step):
878
+ return self.start_step(obj)
879
+ if isinstance(obj, LLMInvocation):
880
+ return self.start_llm(obj)
881
+ if isinstance(obj, EmbeddingInvocation):
882
+ return self.start_embedding(obj)
883
+ if isinstance(obj, ToolCall):
884
+ return self.start_tool_call(obj)
885
+ return obj
886
+
887
+ # ---- registry helpers -----------------------------------------------
888
+ def get_span_by_run_id(
889
+ self, run_id: Any
890
+ ) -> Optional[_trace_mod.Span]: # run_id may be UUID or str
891
+ try:
892
+ key = str(run_id)
893
+ except Exception:
894
+ return None
895
+ return self._span_registry.get(key)
896
+
897
+ def has_span(self, run_id: Any) -> bool:
898
+ try:
899
+ return str(run_id) in self._span_registry
900
+ except Exception:
901
+ return False
902
+
903
+ # ---- entity registry helpers ---------------------------------------
904
+ def get_entity(self, run_id: Any) -> Optional[GenAI]:
905
+ try:
906
+ return self._entity_registry.get(str(run_id))
907
+ except Exception:
908
+ return None
909
+
910
+ def finish_by_run_id(self, run_id: Any) -> None:
911
+ entity = self.get_entity(run_id)
912
+ if entity is None:
913
+ return
914
+ if isinstance(entity, Workflow):
915
+ self.stop_workflow(entity)
916
+ elif isinstance(entity, (AgentCreation, AgentInvocation)):
917
+ self.stop_agent(entity)
918
+ elif isinstance(entity, Step):
919
+ self.stop_step(entity)
920
+ elif isinstance(entity, LLMInvocation):
921
+ self.stop_llm(entity)
922
+ elif isinstance(entity, EmbeddingInvocation):
923
+ self.stop_embedding(entity)
924
+ elif isinstance(entity, ToolCall):
925
+ self.stop_tool_call(entity)
926
+
927
+ def fail_by_run_id(self, run_id: Any, error: Error) -> None:
928
+ entity = self.get_entity(run_id)
929
+ if entity is None:
930
+ return
931
+ entity.attributes.update(
932
+ {
933
+ ErrorAttributes.ERROR_TYPE: getattr(
934
+ error.type, "__qualname__", str(error.type)
935
+ )
936
+ }
937
+ )
938
+ if isinstance(entity, Workflow):
939
+ self.fail_workflow(entity, error)
940
+ elif isinstance(entity, (AgentCreation, AgentInvocation)):
941
+ self.fail_agent(entity, error)
942
+ elif isinstance(entity, Step):
943
+ self.fail_step(entity, error)
944
+ elif isinstance(entity, LLMInvocation):
945
+ self.fail_llm(entity, error)
946
+ elif isinstance(entity, EmbeddingInvocation):
947
+ self.fail_embedding(entity, error)
948
+ elif isinstance(entity, ToolCall):
949
+ self.fail_tool_call(entity, error)
950
+
951
+ def finish(self, obj: Any) -> Any:
952
+ """Generic finish method for any invocation type."""
953
+ if isinstance(obj, Workflow):
954
+ return self.stop_workflow(obj)
955
+ if isinstance(obj, (AgentCreation, AgentInvocation)):
956
+ return self.stop_agent(obj)
957
+ if isinstance(obj, Step):
958
+ return self.stop_step(obj)
959
+ if isinstance(obj, LLMInvocation):
960
+ return self.stop_llm(obj)
961
+ if isinstance(obj, EmbeddingInvocation):
962
+ return self.stop_embedding(obj)
963
+ if isinstance(obj, ToolCall):
964
+ return self.stop_tool_call(obj)
965
+ return obj
966
+
967
+ def fail(self, obj: Any, error: Error) -> Any:
968
+ """Generic fail method for any invocation type."""
969
+ if isinstance(obj, Workflow):
970
+ return self.fail_workflow(obj, error)
971
+ if isinstance(obj, (AgentCreation, AgentInvocation)):
972
+ return self.fail_agent(obj, error)
973
+ if isinstance(obj, Step):
974
+ return self.fail_step(obj, error)
975
+ if isinstance(obj, LLMInvocation):
976
+ return self.fail_llm(obj, error)
977
+ if isinstance(obj, EmbeddingInvocation):
978
+ return self.fail_embedding(obj, error)
979
+ if isinstance(obj, ToolCall):
980
+ return self.fail_tool_call(obj, error)
981
+ return obj
982
+
983
+
984
+ def get_telemetry_handler(
985
+ tracer_provider: TracerProvider | None = None,
986
+ meter_provider: MeterProvider | None = None,
987
+ logger_provider: LoggerProvider | None = None,
988
+ ) -> TelemetryHandler:
989
+ """
990
+ Returns a singleton TelemetryHandler instance.
991
+ """
992
+ handler: Optional[TelemetryHandler] = getattr(
993
+ get_telemetry_handler, "_default_handler", None
994
+ )
995
+ if handler is None:
996
+ handler = TelemetryHandler(
997
+ tracer_provider=tracer_provider,
998
+ meter_provider=meter_provider,
999
+ logger_provider=logger_provider,
1000
+ )
1001
+ setattr(get_telemetry_handler, "_default_handler", handler)
1002
+ return handler