splunk-otel-util-genai 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. opentelemetry/util/genai/__init__.py +17 -0
  2. opentelemetry/util/genai/_fsspec_upload/__init__.py +39 -0
  3. opentelemetry/util/genai/_fsspec_upload/fsspec_hook.py +184 -0
  4. opentelemetry/util/genai/attributes.py +60 -0
  5. opentelemetry/util/genai/callbacks.py +24 -0
  6. opentelemetry/util/genai/config.py +184 -0
  7. opentelemetry/util/genai/debug.py +183 -0
  8. opentelemetry/util/genai/emitters/__init__.py +25 -0
  9. opentelemetry/util/genai/emitters/composite.py +186 -0
  10. opentelemetry/util/genai/emitters/configuration.py +324 -0
  11. opentelemetry/util/genai/emitters/content_events.py +153 -0
  12. opentelemetry/util/genai/emitters/evaluation.py +519 -0
  13. opentelemetry/util/genai/emitters/metrics.py +308 -0
  14. opentelemetry/util/genai/emitters/span.py +774 -0
  15. opentelemetry/util/genai/emitters/spec.py +48 -0
  16. opentelemetry/util/genai/emitters/utils.py +961 -0
  17. opentelemetry/util/genai/environment_variables.py +200 -0
  18. opentelemetry/util/genai/handler.py +1002 -0
  19. opentelemetry/util/genai/instruments.py +44 -0
  20. opentelemetry/util/genai/interfaces.py +58 -0
  21. opentelemetry/util/genai/plugins.py +114 -0
  22. opentelemetry/util/genai/span_context.py +80 -0
  23. opentelemetry/util/genai/types.py +440 -0
  24. opentelemetry/util/genai/upload_hook.py +119 -0
  25. opentelemetry/util/genai/utils.py +182 -0
  26. opentelemetry/util/genai/version.py +15 -0
  27. splunk_otel_util_genai-0.1.3.dist-info/METADATA +70 -0
  28. splunk_otel_util_genai-0.1.3.dist-info/RECORD +31 -0
  29. splunk_otel_util_genai-0.1.3.dist-info/WHEEL +4 -0
  30. splunk_otel_util_genai-0.1.3.dist-info/entry_points.txt +5 -0
  31. splunk_otel_util_genai-0.1.3.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,961 @@
1
+ # Shared utility functions for GenAI emitters (migrated from generators/utils.py)
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from dataclasses import asdict
6
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence
7
+
8
+ from opentelemetry import trace
9
+
10
+ # Removed unused Logger import (was only for backward compatibility)
11
+ from opentelemetry.metrics import Histogram
12
+ from opentelemetry.sdk._logs._internal import LogRecord as SDKLogRecord
13
+ from opentelemetry.semconv._incubating.attributes import (
14
+ gen_ai_attributes as GenAI,
15
+ )
16
+ from opentelemetry.semconv.attributes import (
17
+ server_attributes as ServerAttributes,
18
+ )
19
+ from opentelemetry.trace import Span
20
+ from opentelemetry.util.types import AttributeValue
21
+
22
+ from ..attributes import (
23
+ GEN_AI_EMBEDDINGS_DIMENSION_COUNT,
24
+ GEN_AI_EMBEDDINGS_INPUT_TEXTS,
25
+ GEN_AI_FRAMEWORK,
26
+ GEN_AI_REQUEST_ENCODING_FORMATS,
27
+ )
28
+ from ..span_context import (
29
+ build_otel_context,
30
+ extract_span_context,
31
+ store_span_context,
32
+ )
33
+ from ..types import (
34
+ AgentCreation,
35
+ AgentInvocation,
36
+ EmbeddingInvocation,
37
+ InputMessage,
38
+ LLMInvocation,
39
+ OutputMessage,
40
+ Step,
41
+ Text,
42
+ ToolCall,
43
+ ToolCallResponse,
44
+ Workflow,
45
+ )
46
+
47
+ _MISSING_GEN_AI_ATTRS = {
48
+ "GEN_AI_INPUT_MESSAGES": "gen_ai.input.messages",
49
+ "GEN_AI_OUTPUT_MESSAGES": "gen_ai.output.messages",
50
+ "GEN_AI_SYSTEM_INSTRUCTIONS": "gen_ai.system_instructions",
51
+ }
52
+ for _attr, _value in _MISSING_GEN_AI_ATTRS.items():
53
+ if not hasattr(GenAI, _attr):
54
+ setattr(GenAI, _attr, _value)
55
+
56
+ _SEMCONV_GEN_AI_KEYS: set[str] = {
57
+ value
58
+ for value in GenAI.__dict__.values()
59
+ if isinstance(value, str) and value.startswith("gen_ai.")
60
+ }
61
+
62
+
63
+ def _ensure_span_context(entity: Any) -> None:
64
+ """Populate cached span context metadata on the entity if missing."""
65
+
66
+ if entity is None:
67
+ return
68
+ if getattr(entity, "span_context", None) is not None:
69
+ return
70
+ span = getattr(entity, "span", None)
71
+ if span is None:
72
+ return
73
+ span_context = extract_span_context(span)
74
+ store_span_context(entity, span_context)
75
+
76
+
77
+ def _build_log_record(
78
+ entity: Any,
79
+ *,
80
+ event_name: str,
81
+ attributes: Dict[str, Any],
82
+ body: Optional[Dict[str, Any]] = None,
83
+ ) -> SDKLogRecord:
84
+ """Generic log record builder shared by all GenAI entities."""
85
+ _ensure_span_context(entity)
86
+ otel_context = build_otel_context(
87
+ getattr(entity, "span", None), getattr(entity, "span_context", None)
88
+ )
89
+ trace_id = getattr(entity, "trace_id", None)
90
+ span_id = getattr(entity, "span_id", None)
91
+ trace_flags = getattr(entity, "trace_flags", None)
92
+ record = SDKLogRecord(
93
+ body=body or None,
94
+ attributes=attributes,
95
+ event_name=event_name,
96
+ context=otel_context,
97
+ )
98
+ if trace_id is not None:
99
+ record.trace_id = trace_id
100
+ if span_id is not None:
101
+ record.span_id = span_id
102
+ if trace_flags is not None:
103
+ record.trace_flags = trace_flags
104
+ return record
105
+
106
+
107
+ def _evaluation_to_log_record(
108
+ invocation: Any,
109
+ event_name: str,
110
+ attributes: Dict[str, Any],
111
+ body: Dict[str, Any] | None = None,
112
+ ) -> SDKLogRecord:
113
+ """Create a log record for an evaluation result using shared builder."""
114
+ return _build_log_record(
115
+ invocation, event_name=event_name, attributes=attributes, body=body
116
+ )
117
+
118
+
119
+ def filter_semconv_gen_ai_attributes(
120
+ attributes: Optional[Mapping[str, Any]],
121
+ *,
122
+ extras: Iterable[str] = (),
123
+ ) -> dict[str, Any]:
124
+ """Return attribute subset limited to GenAI semantic-convention keys.
125
+
126
+ Args:
127
+ attributes: Existing invocation attribute mapping.
128
+ extras: Supplemental keys (e.g. "gen_ai.framework") explicitly allowed.
129
+ """
130
+
131
+ if not attributes:
132
+ return {}
133
+ allowed: set[str] = set(_SEMCONV_GEN_AI_KEYS)
134
+ if extras:
135
+ allowed.update(extras)
136
+ filtered: dict[str, Any] = {}
137
+ for key, value in attributes.items():
138
+ if key not in allowed:
139
+ continue
140
+ filtered[key] = value
141
+ return filtered
142
+
143
+
144
+ def _flatten_message_parts(parts: Sequence[Any]) -> str:
145
+ payloads: list[str] = []
146
+ for part in parts:
147
+ if isinstance(part, Text):
148
+ payloads.append(part.content)
149
+ continue
150
+ if isinstance(part, ToolCall):
151
+ try:
152
+ payloads.append(
153
+ json.dumps(
154
+ {
155
+ "type": part.type,
156
+ "id": part.id,
157
+ "name": part.name,
158
+ "arguments": part.arguments,
159
+ }
160
+ )
161
+ )
162
+ except (TypeError, ValueError):
163
+ payloads.append(str(part))
164
+ continue
165
+ if isinstance(part, ToolCallResponse):
166
+ try:
167
+ payloads.append(
168
+ json.dumps(
169
+ {
170
+ "type": part.type,
171
+ "id": part.id,
172
+ "response": part.response,
173
+ }
174
+ )
175
+ )
176
+ except (TypeError, ValueError):
177
+ payloads.append(str(part))
178
+ continue
179
+ try:
180
+ payloads.append(json.dumps(part))
181
+ except (TypeError, ValueError):
182
+ payloads.append(str(part))
183
+ return "\n\n".join(p for p in payloads if p)
184
+
185
+
186
+ def build_prompt_enumeration(
187
+ messages: Sequence[InputMessage],
188
+ ) -> dict[str, Any]:
189
+ """Flatten prompt messages into Traceloop enumerated attributes."""
190
+
191
+ enumerated: dict[str, Any] = {}
192
+ for idx, message in enumerate(messages):
193
+ enumerated[f"gen_ai.prompt.{idx}.role"] = message.role
194
+ content = _flatten_message_parts(message.parts)
195
+ if content:
196
+ enumerated[f"gen_ai.prompt.{idx}.content"] = content
197
+ return enumerated
198
+
199
+
200
+ def build_completion_enumeration(
201
+ messages: Sequence[OutputMessage],
202
+ ) -> dict[str, Any]:
203
+ """Flatten completion messages into Traceloop enumerated attributes."""
204
+
205
+ enumerated: dict[str, Any] = {}
206
+ for idx, message in enumerate(messages):
207
+ enumerated[f"gen_ai.completion.{idx}.role"] = message.role
208
+ content = _flatten_message_parts(message.parts)
209
+ if content:
210
+ enumerated[f"gen_ai.completion.{idx}.content"] = content
211
+ finish_reason = getattr(message, "finish_reason", None)
212
+ if finish_reason:
213
+ enumerated[f"gen_ai.completion.{idx}.finish_reason"] = (
214
+ finish_reason
215
+ )
216
+ return enumerated
217
+
218
+
219
+ def _serialize_messages(
220
+ messages: Sequence[InputMessage | OutputMessage],
221
+ exclude_system: bool = False,
222
+ ) -> Optional[str]:
223
+ """Safely JSON serialize a sequence of dataclass messages.
224
+
225
+ Uses the same format as events for consistency with semantic conventions.
226
+
227
+ Args:
228
+ messages: List of InputMessage or OutputMessage objects
229
+ exclude_system: If True, exclude messages with role="system"
230
+
231
+ Returns a JSON string or None on failure.
232
+ """
233
+ try: # pragma: no cover - defensive
234
+ serialized_msgs: list[dict[str, Any]] = []
235
+
236
+ for msg in messages:
237
+ # Handle both .role (standard) and .type (LangChain) attributes
238
+ msg_role = getattr(msg, "role", None) or getattr(msg, "type", None)
239
+
240
+ # Skip system messages if exclude_system is True
241
+ if exclude_system and msg_role == "system":
242
+ continue
243
+
244
+ msg_dict: dict[str, Any] = {
245
+ "role": msg_role,
246
+ "parts": [],
247
+ } # parts: list[Any]
248
+
249
+ # Add finish_reason for output messages
250
+ if isinstance(
251
+ msg, OutputMessage
252
+ ): # Only OutputMessage has finish_reason
253
+ msg_dict["finish_reason"] = msg.finish_reason or "stop"
254
+
255
+ # Process parts (text, tool_call, tool_call_response)
256
+ for part in msg.parts:
257
+ if isinstance(part, Text):
258
+ msg_dict["parts"].append(
259
+ {
260
+ "type": "text",
261
+ "content": part.content,
262
+ }
263
+ )
264
+ elif isinstance(part, ToolCall):
265
+ msg_dict["parts"].append(
266
+ {
267
+ "type": "tool_call",
268
+ "id": part.id,
269
+ "name": part.name,
270
+ "arguments": part.arguments,
271
+ }
272
+ )
273
+ elif isinstance(part, ToolCallResponse):
274
+ msg_dict["parts"].append(
275
+ {
276
+ "type": "tool_call_response",
277
+ "id": part.id,
278
+ "result": part.response,
279
+ }
280
+ )
281
+ else:
282
+ msg_dict["parts"].append(
283
+ asdict(part)
284
+ if hasattr(part, "__dataclass_fields__")
285
+ else part
286
+ )
287
+
288
+ serialized_msgs.append(msg_dict)
289
+
290
+ return json.dumps(serialized_msgs)
291
+ except (TypeError, ValueError): # pragma: no cover
292
+ return None
293
+
294
+
295
+ def _extract_system_instructions(
296
+ messages: Sequence[InputMessage | OutputMessage],
297
+ ) -> Optional[str]:
298
+ """Extract and serialize system instructions from messages.
299
+
300
+ Extracts messages with role="system" and serializes their parts.
301
+ Uses the same format as events for consistency.
302
+
303
+ Returns a JSON string or None if no system instructions found.
304
+ """
305
+ try: # pragma: no cover - defensive
306
+ system_parts = []
307
+
308
+ for msg in messages:
309
+ # Handle both .role (standard) and .type (LangChain) attributes
310
+ msg_role = getattr(msg, "role", None) or getattr(msg, "type", None)
311
+ if msg_role == "system":
312
+ for part in msg.parts:
313
+ if isinstance(part, Text):
314
+ part_dict = {
315
+ "type": "text",
316
+ "content": part.content,
317
+ }
318
+ system_parts.append(part_dict)
319
+ else:
320
+ # Fallback for other part types
321
+ part_dict = (
322
+ asdict(part)
323
+ if hasattr(part, "__dataclass_fields__")
324
+ else part
325
+ )
326
+ system_parts.append(part_dict)
327
+
328
+ if system_parts:
329
+ return json.dumps(system_parts)
330
+ return None
331
+ except (TypeError, ValueError): # pragma: no cover
332
+ return None
333
+
334
+
335
+ def _apply_function_definitions(
336
+ span: trace.Span, request_functions: Optional[List[dict[str, Any]]]
337
+ ) -> None:
338
+ """Apply request function definition attributes (idempotent).
339
+
340
+ Shared between span emitters to avoid duplicated loops.
341
+ """
342
+ if not request_functions:
343
+ return
344
+ for idx, fn in enumerate(request_functions):
345
+ try:
346
+ name = fn.get("name")
347
+ if name:
348
+ span.set_attribute(f"gen_ai.request.function.{idx}.name", name)
349
+ desc = fn.get("description")
350
+ if desc:
351
+ span.set_attribute(
352
+ f"gen_ai.request.function.{idx}.description", desc
353
+ )
354
+ params = fn.get("parameters")
355
+ if params is not None:
356
+ span.set_attribute(
357
+ f"gen_ai.request.function.{idx}.parameters", str(params)
358
+ )
359
+ except (
360
+ KeyError,
361
+ TypeError,
362
+ AttributeError,
363
+ ): # pragma: no cover - defensive
364
+ pass
365
+
366
+
367
+ def _apply_llm_finish_semconv(
368
+ span: trace.Span, invocation: LLMInvocation
369
+ ) -> None:
370
+ """Apply finish-time semantic convention attributes for an LLMInvocation.
371
+
372
+ Includes response model/id, usage tokens, and function definitions (re-applied).
373
+ """
374
+ try: # pragma: no cover - defensive
375
+ if invocation.response_model_name:
376
+ span.set_attribute(
377
+ GenAI.GEN_AI_RESPONSE_MODEL, invocation.response_model_name
378
+ )
379
+ if invocation.response_id:
380
+ span.set_attribute(
381
+ GenAI.GEN_AI_RESPONSE_ID, invocation.response_id
382
+ )
383
+ if invocation.input_tokens is not None:
384
+ span.set_attribute(
385
+ GenAI.GEN_AI_USAGE_INPUT_TOKENS, invocation.input_tokens
386
+ )
387
+ if invocation.output_tokens is not None:
388
+ span.set_attribute(
389
+ GenAI.GEN_AI_USAGE_OUTPUT_TOKENS, invocation.output_tokens
390
+ )
391
+ _apply_function_definitions(span, invocation.request_functions)
392
+ except (AttributeError, TypeError): # pragma: no cover
393
+ pass
394
+
395
+
396
+ def _llm_invocation_to_log_record(
397
+ invocation: LLMInvocation,
398
+ capture_content: bool,
399
+ ) -> Optional[SDKLogRecord]:
400
+ """Create a log record for an LLM invocation"""
401
+ _ensure_span_context(invocation)
402
+ otel_context = build_otel_context(
403
+ getattr(invocation, "span", None),
404
+ getattr(invocation, "span_context", None),
405
+ )
406
+ trace_id = getattr(invocation, "trace_id", None)
407
+ span_id = getattr(invocation, "span_id", None)
408
+ trace_flags = getattr(invocation, "trace_flags", None)
409
+
410
+ attributes: Dict[str, Any] = {
411
+ "event.name": "gen_ai.client.inference.operation.details",
412
+ }
413
+ if invocation.framework:
414
+ attributes[GEN_AI_FRAMEWORK] = invocation.framework
415
+ if invocation.provider:
416
+ attributes[GenAI.GEN_AI_PROVIDER_NAME] = invocation.provider
417
+ if invocation.operation:
418
+ attributes[GenAI.GEN_AI_OPERATION_NAME] = invocation.operation
419
+ if invocation.request_model:
420
+ attributes[GenAI.GEN_AI_REQUEST_MODEL] = invocation.request_model
421
+
422
+ # Optional attributes from semantic conventions table
423
+ if invocation.response_model_name:
424
+ attributes[GenAI.GEN_AI_RESPONSE_MODEL] = (
425
+ invocation.response_model_name
426
+ )
427
+ if invocation.response_id:
428
+ attributes[GenAI.GEN_AI_RESPONSE_ID] = invocation.response_id
429
+ if invocation.input_tokens is not None:
430
+ attributes[GenAI.GEN_AI_USAGE_INPUT_TOKENS] = invocation.input_tokens
431
+ if invocation.output_tokens is not None:
432
+ attributes[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS] = invocation.output_tokens
433
+ semantic_attrs = invocation.semantic_convention_attributes()
434
+ for key, value in semantic_attrs.items():
435
+ attributes[key] = value
436
+
437
+ # If choice count not in attributes, infer from output_messages length
438
+ if (
439
+ GenAI.GEN_AI_REQUEST_CHOICE_COUNT not in attributes
440
+ and invocation.output_messages
441
+ and len(invocation.output_messages) != 1
442
+ ):
443
+ attributes[GenAI.GEN_AI_REQUEST_CHOICE_COUNT] = len(
444
+ invocation.output_messages
445
+ )
446
+
447
+ # Add agent context if available
448
+ if invocation.agent_name:
449
+ attributes[GenAI.GEN_AI_AGENT_NAME] = invocation.agent_name
450
+ if invocation.agent_id:
451
+ attributes[GenAI.GEN_AI_AGENT_ID] = invocation.agent_id
452
+
453
+ body: Dict[str, Any] = {}
454
+ system_instructions = []
455
+
456
+ if invocation.input_messages:
457
+ input_msgs = []
458
+ for msg in invocation.input_messages:
459
+ # Handle both .role (standard) and .type (LangChain) attributes
460
+ msg_role = getattr(msg, "role", None) or getattr(msg, "type", None)
461
+ if msg_role == "system":
462
+ for part in msg.parts:
463
+ if isinstance(part, Text):
464
+ part_dict = {
465
+ "type": "text",
466
+ "content": part.content if capture_content else "",
467
+ }
468
+ system_instructions.append(part_dict)
469
+ else:
470
+ try:
471
+ part_dict = (
472
+ asdict(part)
473
+ if hasattr(part, "__dataclass_fields__")
474
+ else part
475
+ )
476
+ if (
477
+ not capture_content
478
+ and isinstance(part_dict, dict)
479
+ and "content" in part_dict
480
+ ):
481
+ part_dict["content"] = ""
482
+ system_instructions.append(part_dict)
483
+ except (TypeError, ValueError, AttributeError):
484
+ pass
485
+ continue # Don't include in input_messages
486
+
487
+ # Message structure: role and parts array
488
+ input_msg = {"role": msg.role, "parts": []}
489
+
490
+ # Process parts (text, tool_call, tool_call_response)
491
+ for part in msg.parts:
492
+ if isinstance(part, Text):
493
+ part_dict = {
494
+ "type": "text",
495
+ "content": part.content if capture_content else "",
496
+ }
497
+ input_msg["parts"].append(part_dict)
498
+ elif isinstance(part, ToolCall):
499
+ tool_dict = {
500
+ "type": "tool_call",
501
+ "id": part.id,
502
+ "name": part.name,
503
+ "arguments": part.arguments if capture_content else {},
504
+ }
505
+ input_msg["parts"].append(tool_dict)
506
+ elif isinstance(part, ToolCallResponse):
507
+ tool_response_dict = {
508
+ "type": "tool_call_response",
509
+ "id": part.id,
510
+ "result": part.response if capture_content else "",
511
+ }
512
+ input_msg["parts"].append(tool_response_dict)
513
+ else:
514
+ try:
515
+ part_dict = (
516
+ asdict(part)
517
+ if hasattr(part, "__dataclass_fields__")
518
+ else part
519
+ )
520
+ if not capture_content and isinstance(part_dict, dict):
521
+ # Clear content fields
522
+ if "content" in part_dict:
523
+ part_dict["content"] = ""
524
+ if "arguments" in part_dict:
525
+ part_dict["arguments"] = {}
526
+ if "response" in part_dict:
527
+ part_dict["response"] = ""
528
+ input_msg["parts"].append(part_dict)
529
+ except (TypeError, ValueError, AttributeError):
530
+ pass
531
+
532
+ input_msgs.append(input_msg)
533
+
534
+ if input_msgs:
535
+ body[GenAI.GEN_AI_INPUT_MESSAGES] = input_msgs
536
+
537
+ if system_instructions:
538
+ body[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = system_instructions
539
+
540
+ if invocation.output_messages:
541
+ output_msgs = []
542
+
543
+ for msg in invocation.output_messages:
544
+ output_msg = {
545
+ "role": msg.role,
546
+ "parts": [],
547
+ "finish_reason": msg.finish_reason or "stop",
548
+ }
549
+
550
+ # Process parts (text, tool_calls, etc.)
551
+ for part in msg.parts:
552
+ if isinstance(part, Text):
553
+ part_dict = {
554
+ "type": "text",
555
+ "content": part.content if capture_content else "",
556
+ }
557
+ output_msg["parts"].append(part_dict)
558
+ elif isinstance(part, ToolCall):
559
+ tool_dict = {
560
+ "type": "tool_call",
561
+ "id": part.id,
562
+ "name": part.name,
563
+ "arguments": part.arguments if capture_content else {},
564
+ }
565
+ output_msg["parts"].append(tool_dict)
566
+ else:
567
+ try:
568
+ part_dict = (
569
+ asdict(part)
570
+ if hasattr(part, "__dataclass_fields__")
571
+ else part
572
+ )
573
+ if not capture_content and isinstance(part_dict, dict):
574
+ # Clear content fields
575
+ if "content" in part_dict:
576
+ part_dict["content"] = ""
577
+ if "arguments" in part_dict:
578
+ part_dict["arguments"] = {}
579
+ output_msg["parts"].append(part_dict)
580
+ except (TypeError, ValueError, AttributeError):
581
+ pass
582
+
583
+ output_msgs.append(output_msg)
584
+ body[GenAI.GEN_AI_OUTPUT_MESSAGES] = output_msgs
585
+
586
+ record = SDKLogRecord(
587
+ body=body or None,
588
+ attributes=attributes,
589
+ event_name="gen_ai.client.inference.operation.details",
590
+ context=otel_context,
591
+ )
592
+ if trace_id is not None:
593
+ record.trace_id = trace_id
594
+ if span_id is not None:
595
+ record.span_id = span_id
596
+ if trace_flags is not None:
597
+ record.trace_flags = trace_flags
598
+ return record
599
+
600
+
601
+ def _get_metric_attributes(
602
+ request_model: Optional[str],
603
+ response_model: Optional[str],
604
+ operation_name: Optional[str],
605
+ provider: Optional[str],
606
+ framework: Optional[str],
607
+ server_address: Optional[str] = None,
608
+ server_port: Optional[int] = None,
609
+ ) -> Dict[str, AttributeValue]:
610
+ attributes: Dict[str, AttributeValue] = {}
611
+ if framework is not None:
612
+ attributes[GEN_AI_FRAMEWORK] = framework
613
+ if provider:
614
+ attributes[GenAI.GEN_AI_PROVIDER_NAME] = provider
615
+ if operation_name:
616
+ attributes[GenAI.GEN_AI_OPERATION_NAME] = operation_name
617
+ if request_model:
618
+ attributes[GenAI.GEN_AI_REQUEST_MODEL] = request_model
619
+ if response_model:
620
+ attributes[GenAI.GEN_AI_RESPONSE_MODEL] = response_model
621
+ if server_address:
622
+ attributes[ServerAttributes.SERVER_ADDRESS] = server_address
623
+ if server_port:
624
+ attributes[ServerAttributes.SERVER_PORT] = server_port
625
+ return attributes
626
+
627
+
628
+ def _record_token_metrics(
629
+ token_histogram: Histogram,
630
+ prompt_tokens: Optional[AttributeValue],
631
+ completion_tokens: Optional[AttributeValue],
632
+ metric_attributes: Dict[str, AttributeValue],
633
+ *,
634
+ span: Optional[Span] = None,
635
+ ) -> None:
636
+ context = None
637
+ if span is not None:
638
+ try:
639
+ context = trace.set_span_in_context(span)
640
+ except (TypeError, ValueError): # pragma: no cover - defensive
641
+ context = None
642
+ prompt_attrs: Dict[str, AttributeValue] = {
643
+ GenAI.GEN_AI_TOKEN_TYPE: GenAI.GenAiTokenTypeValues.INPUT.value
644
+ }
645
+ prompt_attrs.update(metric_attributes)
646
+ if isinstance(prompt_tokens, (int, float)):
647
+ token_histogram.record(
648
+ prompt_tokens, attributes=prompt_attrs, context=context
649
+ )
650
+
651
+ completion_attrs: Dict[str, AttributeValue] = {
652
+ GenAI.GEN_AI_TOKEN_TYPE: GenAI.GenAiTokenTypeValues.COMPLETION.value
653
+ }
654
+ completion_attrs.update(metric_attributes)
655
+ if isinstance(completion_tokens, (int, float)):
656
+ token_histogram.record(
657
+ completion_tokens, attributes=completion_attrs, context=context
658
+ )
659
+
660
+
661
+ def _record_duration(
662
+ duration_histogram: Histogram,
663
+ invocation: LLMInvocation | EmbeddingInvocation | ToolCall,
664
+ metric_attributes: Dict[str, AttributeValue],
665
+ *,
666
+ span: Optional[Span] = None,
667
+ ) -> None:
668
+ if invocation.end_time is not None:
669
+ elapsed: float = invocation.end_time - invocation.start_time
670
+ context = None
671
+ if span is not None:
672
+ try:
673
+ context = trace.set_span_in_context(span)
674
+ except (
675
+ TypeError,
676
+ ValueError,
677
+ AttributeError,
678
+ ): # pragma: no cover - defensive
679
+ context = None
680
+ duration_histogram.record(
681
+ elapsed, attributes=metric_attributes, context=context
682
+ )
683
+
684
+
685
+ # Helper functions for agentic types
686
+ def _build_text_message(
687
+ role: str, text: str, *, capture: bool, finish_reason: Optional[str] = None
688
+ ) -> dict[str, Any]:
689
+ msg: dict[str, Any] = {
690
+ "role": role,
691
+ "parts": [{"type": "text", "content": text if capture else ""}],
692
+ }
693
+ if finish_reason is not None:
694
+ msg["finish_reason"] = finish_reason
695
+ return msg
696
+
697
+
698
+ def _workflow_to_log_record(
699
+ workflow: Workflow, capture_content: bool
700
+ ) -> Optional[SDKLogRecord]:
701
+ """Create a workflow log record using unified message format."""
702
+ attributes: Dict[str, Any] = {
703
+ # TODO: fixme in UI
704
+ # "event.name": "gen_ai.client.workflow.operation.details",
705
+ "event.name": "gen_ai.client.inference.operation.details",
706
+ "gen_ai.workflow.name": workflow.name,
707
+ }
708
+ if workflow.workflow_type:
709
+ attributes["gen_ai.workflow.type"] = workflow.workflow_type
710
+ if workflow.description:
711
+ attributes["gen_ai.workflow.description"] = workflow.description
712
+ if workflow.framework:
713
+ attributes[GEN_AI_FRAMEWORK] = workflow.framework
714
+
715
+ body: Dict[str, Any] = {}
716
+ # Represent initial input / final output as standardized messages
717
+ input_msgs: list[dict[str, Any]] = []
718
+ output_msgs: list[dict[str, Any]] = []
719
+ if workflow.initial_input:
720
+ input_msgs.append(
721
+ _build_text_message(
722
+ "user", workflow.initial_input, capture=capture_content
723
+ )
724
+ )
725
+ if workflow.final_output:
726
+ output_msgs.append(
727
+ _build_text_message(
728
+ "assistant",
729
+ workflow.final_output,
730
+ capture=capture_content,
731
+ finish_reason="stop",
732
+ )
733
+ )
734
+ if input_msgs:
735
+ body[GenAI.GEN_AI_INPUT_MESSAGES] = input_msgs
736
+ if output_msgs:
737
+ body[GenAI.GEN_AI_OUTPUT_MESSAGES] = output_msgs
738
+ # Always include system instructions key (empty list if none). Use workflow.description as source.
739
+ workflow_instructions: list[dict[str, Any]] = []
740
+ if workflow.description:
741
+ workflow_instructions.append(
742
+ {
743
+ "type": "text",
744
+ "content": workflow.description if capture_content else "",
745
+ }
746
+ )
747
+ body[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = workflow_instructions
748
+ # Ensure finish_reason present on all output messages (defensive)
749
+ if GenAI.GEN_AI_OUTPUT_MESSAGES in body:
750
+ for m in body[GenAI.GEN_AI_OUTPUT_MESSAGES]:
751
+ if "finish_reason" not in m:
752
+ m["finish_reason"] = "stop"
753
+ return _build_log_record(
754
+ workflow,
755
+ # TODO: fixme in UI
756
+ # event_name="gen_ai.client.workflow.operation.details",
757
+ event_name="gen_ai.client.inference.operation.details",
758
+ attributes=attributes,
759
+ body=body or None,
760
+ )
761
+
762
+
763
+ def _agent_to_log_record(
764
+ agent: AgentCreation | AgentInvocation, capture_content: bool
765
+ ) -> Optional[SDKLogRecord]:
766
+ """Create a log record for an agent event using unified message format."""
767
+ attributes: Dict[str, Any] = {
768
+ # TODO: fixme in UI
769
+ # "event.name": "gen_ai.client.agent.operation.details",
770
+ "event.name": "gen_ai.client.inference.operation.details",
771
+ }
772
+ if agent.framework:
773
+ attributes[GEN_AI_FRAMEWORK] = agent.framework
774
+ attributes[GenAI.GEN_AI_AGENT_NAME] = agent.name
775
+ attributes[GenAI.GEN_AI_AGENT_ID] = str(agent.run_id)
776
+
777
+ body: Dict[str, Any] = {}
778
+ # System instructions treated similarly to LLM system messages
779
+ if agent.system_instructions:
780
+ pass # handled below for unified always-present key
781
+ # Always include system instructions key (empty list if none)
782
+ agent_instructions: list[dict[str, Any]] = []
783
+ if agent.system_instructions:
784
+ agent_instructions.append(
785
+ {
786
+ "type": "text",
787
+ "content": agent.system_instructions
788
+ if capture_content
789
+ else "",
790
+ }
791
+ )
792
+ body[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = agent_instructions
793
+ input_context = getattr(agent, "input_context", None)
794
+ if input_context:
795
+ body[GenAI.GEN_AI_INPUT_MESSAGES] = [
796
+ _build_text_message("user", input_context, capture=capture_content)
797
+ ]
798
+ output_result = getattr(agent, "output_result", None)
799
+ if output_result:
800
+ body[GenAI.GEN_AI_OUTPUT_MESSAGES] = [
801
+ _build_text_message(
802
+ "assistant",
803
+ output_result,
804
+ capture=capture_content,
805
+ finish_reason="stop",
806
+ )
807
+ ]
808
+ # Ensure finish_reason present on all output messages (defensive)
809
+ if GenAI.GEN_AI_OUTPUT_MESSAGES in body:
810
+ for m in body[GenAI.GEN_AI_OUTPUT_MESSAGES]:
811
+ if "finish_reason" not in m:
812
+ m["finish_reason"] = "stop"
813
+ if not body:
814
+ return None
815
+ return _build_log_record(
816
+ agent,
817
+ # TODO: fixme in UI
818
+ # event_name="gen_ai.client.agent.operation.details",
819
+ event_name="gen_ai.client.inference.operation.details",
820
+ attributes=attributes,
821
+ body=body,
822
+ )
823
+
824
+
825
+ def _step_to_log_record(
826
+ step: Step, capture_content: bool
827
+ ) -> Optional[SDKLogRecord]:
828
+ """Create a log record for a step event.
829
+
830
+ Note: Step events are not yet in semantic conventions but follow
831
+ the message structure pattern for consistency.
832
+ """
833
+ # Attributes contain metadata (not content)
834
+ _ensure_span_context(step)
835
+ otel_context = build_otel_context(
836
+ getattr(step, "span", None),
837
+ getattr(step, "span_context", None),
838
+ )
839
+ trace_id = getattr(step, "trace_id", None)
840
+ span_id = getattr(step, "span_id", None)
841
+ trace_flags = getattr(step, "trace_flags", None)
842
+
843
+ attributes: Dict[str, Any] = {
844
+ "event.name": "gen_ai.client.step.operation.details",
845
+ "gen_ai.step.name": step.name,
846
+ }
847
+
848
+ if step.step_type:
849
+ attributes["gen_ai.step.type"] = step.step_type
850
+ if step.objective:
851
+ attributes["gen_ai.step.objective"] = step.objective
852
+ if step.source:
853
+ attributes["gen_ai.step.source"] = step.source
854
+ if step.assigned_agent:
855
+ attributes[GenAI.GEN_AI_AGENT_NAME] = step.assigned_agent
856
+ if step.status:
857
+ attributes["gen_ai.step.status"] = step.status
858
+
859
+ # Body contains messages/content only (following semantic conventions pattern)
860
+ # If capture_content is disabled, emit empty content (like LLM messages do)
861
+ body: Dict[str, Any] = {}
862
+
863
+ if capture_content:
864
+ if step.input_data:
865
+ body["input_data"] = step.input_data
866
+ if step.output_data:
867
+ body["output_data"] = step.output_data
868
+ else:
869
+ # Emit structure with empty content when capture is disabled
870
+ if step.input_data:
871
+ body["input_data"] = ""
872
+ if step.output_data:
873
+ body["output_data"] = ""
874
+
875
+ record = SDKLogRecord(
876
+ body=body or None,
877
+ attributes=attributes,
878
+ event_name="gen_ai.client.step.operation.details",
879
+ context=otel_context,
880
+ )
881
+ if trace_id is not None:
882
+ record.trace_id = trace_id
883
+ if span_id is not None:
884
+ record.span_id = span_id
885
+ if trace_flags is not None:
886
+ record.trace_flags = trace_flags
887
+ return record
888
+
889
+
890
+ def _embedding_to_log_record(
891
+ embedding: EmbeddingInvocation, capture_content: bool
892
+ ) -> Optional[SDKLogRecord]:
893
+ """Create a log record for an embedding event."""
894
+ _ensure_span_context(embedding)
895
+ otel_context = build_otel_context(
896
+ getattr(embedding, "span", None),
897
+ getattr(embedding, "span_context", None),
898
+ )
899
+ trace_id = getattr(embedding, "trace_id", None)
900
+ span_id = getattr(embedding, "span_id", None)
901
+ trace_flags = getattr(embedding, "trace_flags", None)
902
+ # Attributes contain metadata (not content)
903
+ attributes: Dict[str, Any] = {
904
+ "event.name": "gen_ai.client.embedding.operation.details",
905
+ }
906
+
907
+ # Core attributes
908
+ if embedding.operation_name:
909
+ attributes[GenAI.GEN_AI_OPERATION_NAME] = embedding.operation_name
910
+ if embedding.provider:
911
+ attributes[GenAI.GEN_AI_PROVIDER_NAME] = embedding.provider
912
+ if embedding.request_model:
913
+ attributes[GenAI.GEN_AI_REQUEST_MODEL] = embedding.request_model
914
+
915
+ # Optional attributes
916
+ if embedding.dimension_count:
917
+ attributes[GEN_AI_EMBEDDINGS_DIMENSION_COUNT] = (
918
+ embedding.dimension_count
919
+ )
920
+ if embedding.input_tokens is not None:
921
+ attributes[GenAI.GEN_AI_USAGE_INPUT_TOKENS] = embedding.input_tokens
922
+ if embedding.server_address:
923
+ attributes[ServerAttributes.SERVER_ADDRESS] = embedding.server_address
924
+ if embedding.server_port:
925
+ attributes[ServerAttributes.SERVER_PORT] = embedding.server_port
926
+ if embedding.encoding_formats:
927
+ attributes[GEN_AI_REQUEST_ENCODING_FORMATS] = (
928
+ embedding.encoding_formats
929
+ )
930
+ if embedding.error_type:
931
+ attributes["error.type"] = embedding.error_type
932
+
933
+ # Add agent context if available
934
+ if embedding.agent_name:
935
+ attributes[GenAI.GEN_AI_AGENT_NAME] = embedding.agent_name
936
+ if embedding.agent_id:
937
+ attributes[GenAI.GEN_AI_AGENT_ID] = embedding.agent_id
938
+
939
+ # Body contains content (input texts)
940
+ body: Dict[str, Any] = {}
941
+
942
+ if embedding.input_texts:
943
+ if capture_content:
944
+ body[GEN_AI_EMBEDDINGS_INPUT_TEXTS] = embedding.input_texts
945
+ else:
946
+ # Emit structure with empty content when capture is disabled
947
+ body[GEN_AI_EMBEDDINGS_INPUT_TEXTS] = []
948
+
949
+ record = SDKLogRecord(
950
+ body=body or None,
951
+ attributes=attributes,
952
+ event_name="gen_ai.client.embedding.operation.details",
953
+ context=otel_context,
954
+ )
955
+ if trace_id is not None:
956
+ record.trace_id = trace_id # type: ignore[attr-defined]
957
+ if span_id is not None:
958
+ record.span_id = span_id # type: ignore[attr-defined]
959
+ if trace_flags is not None:
960
+ record.trace_flags = trace_flags # type: ignore[attr-defined]
961
+ return record