PyPI - splunk-otel-util-genai - Versions diffs - 0.1.3__py3-none-any.whl - Mend

splunk-otel-util-genai 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

opentelemetry/util/genai/__init__.py +17 -0
opentelemetry/util/genai/_fsspec_upload/__init__.py +39 -0
opentelemetry/util/genai/_fsspec_upload/fsspec_hook.py +184 -0
opentelemetry/util/genai/attributes.py +60 -0
opentelemetry/util/genai/callbacks.py +24 -0
opentelemetry/util/genai/config.py +184 -0
opentelemetry/util/genai/debug.py +183 -0
opentelemetry/util/genai/emitters/__init__.py +25 -0
opentelemetry/util/genai/emitters/composite.py +186 -0
opentelemetry/util/genai/emitters/configuration.py +324 -0
opentelemetry/util/genai/emitters/content_events.py +153 -0
opentelemetry/util/genai/emitters/evaluation.py +519 -0
opentelemetry/util/genai/emitters/metrics.py +308 -0
opentelemetry/util/genai/emitters/span.py +774 -0
opentelemetry/util/genai/emitters/spec.py +48 -0
opentelemetry/util/genai/emitters/utils.py +961 -0
opentelemetry/util/genai/environment_variables.py +200 -0
opentelemetry/util/genai/handler.py +1002 -0
opentelemetry/util/genai/instruments.py +44 -0
opentelemetry/util/genai/interfaces.py +58 -0
opentelemetry/util/genai/plugins.py +114 -0
opentelemetry/util/genai/span_context.py +80 -0
opentelemetry/util/genai/types.py +440 -0
opentelemetry/util/genai/upload_hook.py +119 -0
opentelemetry/util/genai/utils.py +182 -0
opentelemetry/util/genai/version.py +15 -0
splunk_otel_util_genai-0.1.3.dist-info/METADATA +70 -0
splunk_otel_util_genai-0.1.3.dist-info/RECORD +31 -0
splunk_otel_util_genai-0.1.3.dist-info/WHEEL +4 -0
splunk_otel_util_genai-0.1.3.dist-info/entry_points.txt +5 -0
splunk_otel_util_genai-0.1.3.dist-info/licenses/LICENSE +201 -0

opentelemetry/util/genai/emitters/utils.py ADDED Viewed

@@ -0,0 +1,961 @@
+# Shared utility functions for GenAI emitters (migrated from generators/utils.py)
+from __future__ import annotations
+import json
+from dataclasses import asdict
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence
+from opentelemetry import trace
+# Removed unused Logger import (was only for backward compatibility)
+from opentelemetry.metrics import Histogram
+from opentelemetry.sdk._logs._internal import LogRecord as SDKLogRecord
+from opentelemetry.semconv._incubating.attributes import (
+    gen_ai_attributes as GenAI,
+)
+from opentelemetry.semconv.attributes import (
+    server_attributes as ServerAttributes,
+)
+from opentelemetry.trace import Span
+from opentelemetry.util.types import AttributeValue
+from ..attributes import (
+    GEN_AI_EMBEDDINGS_DIMENSION_COUNT,
+    GEN_AI_EMBEDDINGS_INPUT_TEXTS,
+    GEN_AI_FRAMEWORK,
+    GEN_AI_REQUEST_ENCODING_FORMATS,
+)
+from ..span_context import (
+    build_otel_context,
+    extract_span_context,
+    store_span_context,
+)
+from ..types import (
+    AgentCreation,
+    AgentInvocation,
+    EmbeddingInvocation,
+    InputMessage,
+    LLMInvocation,
+    OutputMessage,
+    Step,
+    Text,
+    ToolCall,
+    ToolCallResponse,
+    Workflow,
+)
+_MISSING_GEN_AI_ATTRS = {
+    "GEN_AI_INPUT_MESSAGES": "gen_ai.input.messages",
+    "GEN_AI_OUTPUT_MESSAGES": "gen_ai.output.messages",
+    "GEN_AI_SYSTEM_INSTRUCTIONS": "gen_ai.system_instructions",
+}
+for _attr, _value in _MISSING_GEN_AI_ATTRS.items():
+    if not hasattr(GenAI, _attr):
+        setattr(GenAI, _attr, _value)
+_SEMCONV_GEN_AI_KEYS: set[str] = {
+    value
+    for value in GenAI.__dict__.values()
+    if isinstance(value, str) and value.startswith("gen_ai.")
+}
+def _ensure_span_context(entity: Any) -> None:
+    """Populate cached span context metadata on the entity if missing."""
+    if entity is None:
+        return
+    if getattr(entity, "span_context", None) is not None:
+        return
+    span = getattr(entity, "span", None)
+    if span is None:
+        return
+    span_context = extract_span_context(span)
+    store_span_context(entity, span_context)
+def _build_log_record(
+    entity: Any,
+    *,
+    event_name: str,
+    attributes: Dict[str, Any],
+    body: Optional[Dict[str, Any]] = None,
+) -> SDKLogRecord:
+    """Generic log record builder shared by all GenAI entities."""
+    _ensure_span_context(entity)
+    otel_context = build_otel_context(
+        getattr(entity, "span", None), getattr(entity, "span_context", None)
+    )
+    trace_id = getattr(entity, "trace_id", None)
+    span_id = getattr(entity, "span_id", None)
+    trace_flags = getattr(entity, "trace_flags", None)
+    record = SDKLogRecord(
+        body=body or None,
+        attributes=attributes,
+        event_name=event_name,
+        context=otel_context,
+    )
+    if trace_id is not None:
+        record.trace_id = trace_id
+    if span_id is not None:
+        record.span_id = span_id
+    if trace_flags is not None:
+        record.trace_flags = trace_flags
+    return record
+def _evaluation_to_log_record(
+    invocation: Any,
+    event_name: str,
+    attributes: Dict[str, Any],
+    body: Dict[str, Any] | None = None,
+) -> SDKLogRecord:
+    """Create a log record for an evaluation result using shared builder."""
+    return _build_log_record(
+        invocation, event_name=event_name, attributes=attributes, body=body
+    )
+def filter_semconv_gen_ai_attributes(
+    attributes: Optional[Mapping[str, Any]],
+    *,
+    extras: Iterable[str] = (),
+) -> dict[str, Any]:
+    """Return attribute subset limited to GenAI semantic-convention keys.
+    Args:
+        attributes: Existing invocation attribute mapping.
+        extras: Supplemental keys (e.g. "gen_ai.framework") explicitly allowed.
+    """
+    if not attributes:
+        return {}
+    allowed: set[str] = set(_SEMCONV_GEN_AI_KEYS)
+    if extras:
+        allowed.update(extras)
+    filtered: dict[str, Any] = {}
+    for key, value in attributes.items():
+        if key not in allowed:
+            continue
+        filtered[key] = value
+    return filtered
+def _flatten_message_parts(parts: Sequence[Any]) -> str:
+    payloads: list[str] = []
+    for part in parts:
+        if isinstance(part, Text):
+            payloads.append(part.content)
+            continue
+        if isinstance(part, ToolCall):
+            try:
+                payloads.append(
+                    json.dumps(
+                        {
+                            "type": part.type,
+                            "id": part.id,
+                            "name": part.name,
+                            "arguments": part.arguments,
+                        }
+                    )
+                )
+            except (TypeError, ValueError):
+                payloads.append(str(part))
+            continue
+        if isinstance(part, ToolCallResponse):
+            try:
+                payloads.append(
+                    json.dumps(
+                        {
+                            "type": part.type,
+                            "id": part.id,
+                            "response": part.response,
+                        }
+                    )
+                )
+            except (TypeError, ValueError):
+                payloads.append(str(part))
+            continue
+        try:
+            payloads.append(json.dumps(part))
+        except (TypeError, ValueError):
+            payloads.append(str(part))
+    return "\n\n".join(p for p in payloads if p)
+def build_prompt_enumeration(
+    messages: Sequence[InputMessage],
+) -> dict[str, Any]:
+    """Flatten prompt messages into Traceloop enumerated attributes."""
+    enumerated: dict[str, Any] = {}
+    for idx, message in enumerate(messages):
+        enumerated[f"gen_ai.prompt.{idx}.role"] = message.role
+        content = _flatten_message_parts(message.parts)
+        if content:
+            enumerated[f"gen_ai.prompt.{idx}.content"] = content
+    return enumerated
+def build_completion_enumeration(
+    messages: Sequence[OutputMessage],
+) -> dict[str, Any]:
+    """Flatten completion messages into Traceloop enumerated attributes."""
+    enumerated: dict[str, Any] = {}
+    for idx, message in enumerate(messages):
+        enumerated[f"gen_ai.completion.{idx}.role"] = message.role
+        content = _flatten_message_parts(message.parts)
+        if content:
+            enumerated[f"gen_ai.completion.{idx}.content"] = content
+        finish_reason = getattr(message, "finish_reason", None)
+        if finish_reason:
+            enumerated[f"gen_ai.completion.{idx}.finish_reason"] = (
+                finish_reason
+            )
+    return enumerated
+def _serialize_messages(
+    messages: Sequence[InputMessage | OutputMessage],
+    exclude_system: bool = False,
+) -> Optional[str]:
+    """Safely JSON serialize a sequence of dataclass messages.
+    Uses the same format as events for consistency with semantic conventions.
+    Args:
+        messages: List of InputMessage or OutputMessage objects
+        exclude_system: If True, exclude messages with role="system"
+    Returns a JSON string or None on failure.
+    """
+    try:  # pragma: no cover - defensive
+        serialized_msgs: list[dict[str, Any]] = []
+        for msg in messages:
+            # Handle both .role (standard) and .type (LangChain) attributes
+            msg_role = getattr(msg, "role", None) or getattr(msg, "type", None)
+            # Skip system messages if exclude_system is True
+            if exclude_system and msg_role == "system":
+                continue
+            msg_dict: dict[str, Any] = {
+                "role": msg_role,
+                "parts": [],
+            }  # parts: list[Any]
+            # Add finish_reason for output messages
+            if isinstance(
+                msg, OutputMessage
+            ):  # Only OutputMessage has finish_reason
+                msg_dict["finish_reason"] = msg.finish_reason or "stop"
+            # Process parts (text, tool_call, tool_call_response)
+            for part in msg.parts:
+                if isinstance(part, Text):
+                    msg_dict["parts"].append(
+                        {
+                            "type": "text",
+                            "content": part.content,
+                        }
+                    )
+                elif isinstance(part, ToolCall):
+                    msg_dict["parts"].append(
+                        {
+                            "type": "tool_call",
+                            "id": part.id,
+                            "name": part.name,
+                            "arguments": part.arguments,
+                        }
+                    )
+                elif isinstance(part, ToolCallResponse):
+                    msg_dict["parts"].append(
+                        {
+                            "type": "tool_call_response",
+                            "id": part.id,
+                            "result": part.response,
+                        }
+                    )
+                else:
+                    msg_dict["parts"].append(
+                        asdict(part)
+                        if hasattr(part, "__dataclass_fields__")
+                        else part
+                    )
+            serialized_msgs.append(msg_dict)
+        return json.dumps(serialized_msgs)
+    except (TypeError, ValueError):  # pragma: no cover
+        return None
+def _extract_system_instructions(
+    messages: Sequence[InputMessage | OutputMessage],
+) -> Optional[str]:
+    """Extract and serialize system instructions from messages.
+    Extracts messages with role="system" and serializes their parts.
+    Uses the same format as events for consistency.
+    Returns a JSON string or None if no system instructions found.
+    """
+    try:  # pragma: no cover - defensive
+        system_parts = []
+        for msg in messages:
+            # Handle both .role (standard) and .type (LangChain) attributes
+            msg_role = getattr(msg, "role", None) or getattr(msg, "type", None)
+            if msg_role == "system":
+                for part in msg.parts:
+                    if isinstance(part, Text):
+                        part_dict = {
+                            "type": "text",
+                            "content": part.content,
+                        }
+                        system_parts.append(part_dict)
+                    else:
+                        # Fallback for other part types
+                        part_dict = (
+                            asdict(part)
+                            if hasattr(part, "__dataclass_fields__")
+                            else part
+                        )
+                        system_parts.append(part_dict)
+        if system_parts:
+            return json.dumps(system_parts)
+        return None
+    except (TypeError, ValueError):  # pragma: no cover
+        return None
+def _apply_function_definitions(
+    span: trace.Span, request_functions: Optional[List[dict[str, Any]]]
+) -> None:
+    """Apply request function definition attributes (idempotent).
+    Shared between span emitters to avoid duplicated loops.
+    """
+    if not request_functions:
+        return
+    for idx, fn in enumerate(request_functions):
+        try:
+            name = fn.get("name")
+            if name:
+                span.set_attribute(f"gen_ai.request.function.{idx}.name", name)
+            desc = fn.get("description")
+            if desc:
+                span.set_attribute(
+                    f"gen_ai.request.function.{idx}.description", desc
+                )
+            params = fn.get("parameters")
+            if params is not None:
+                span.set_attribute(
+                    f"gen_ai.request.function.{idx}.parameters", str(params)
+                )
+        except (
+            KeyError,
+            TypeError,
+            AttributeError,
+        ):  # pragma: no cover - defensive
+            pass
+def _apply_llm_finish_semconv(
+    span: trace.Span, invocation: LLMInvocation
+) -> None:
+    """Apply finish-time semantic convention attributes for an LLMInvocation.
+    Includes response model/id, usage tokens, and function definitions (re-applied).
+    """
+    try:  # pragma: no cover - defensive
+        if invocation.response_model_name:
+            span.set_attribute(
+                GenAI.GEN_AI_RESPONSE_MODEL, invocation.response_model_name
+            )
+        if invocation.response_id:
+            span.set_attribute(
+                GenAI.GEN_AI_RESPONSE_ID, invocation.response_id
+            )
+        if invocation.input_tokens is not None:
+            span.set_attribute(
+                GenAI.GEN_AI_USAGE_INPUT_TOKENS, invocation.input_tokens
+            )
+        if invocation.output_tokens is not None:
+            span.set_attribute(
+                GenAI.GEN_AI_USAGE_OUTPUT_TOKENS, invocation.output_tokens
+            )
+        _apply_function_definitions(span, invocation.request_functions)
+    except (AttributeError, TypeError):  # pragma: no cover
+        pass
+def _llm_invocation_to_log_record(
+    invocation: LLMInvocation,
+    capture_content: bool,
+) -> Optional[SDKLogRecord]:
+    """Create a log record for an LLM invocation"""
+    _ensure_span_context(invocation)
+    otel_context = build_otel_context(
+        getattr(invocation, "span", None),
+        getattr(invocation, "span_context", None),
+    )
+    trace_id = getattr(invocation, "trace_id", None)
+    span_id = getattr(invocation, "span_id", None)
+    trace_flags = getattr(invocation, "trace_flags", None)
+    attributes: Dict[str, Any] = {
+        "event.name": "gen_ai.client.inference.operation.details",
+    }
+    if invocation.framework:
+        attributes[GEN_AI_FRAMEWORK] = invocation.framework
+    if invocation.provider:
+        attributes[GenAI.GEN_AI_PROVIDER_NAME] = invocation.provider
+    if invocation.operation:
+        attributes[GenAI.GEN_AI_OPERATION_NAME] = invocation.operation
+    if invocation.request_model:
+        attributes[GenAI.GEN_AI_REQUEST_MODEL] = invocation.request_model
+    # Optional attributes from semantic conventions table
+    if invocation.response_model_name:
+        attributes[GenAI.GEN_AI_RESPONSE_MODEL] = (
+            invocation.response_model_name
+        )
+    if invocation.response_id:
+        attributes[GenAI.GEN_AI_RESPONSE_ID] = invocation.response_id
+    if invocation.input_tokens is not None:
+        attributes[GenAI.GEN_AI_USAGE_INPUT_TOKENS] = invocation.input_tokens
+    if invocation.output_tokens is not None:
+        attributes[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS] = invocation.output_tokens
+    semantic_attrs = invocation.semantic_convention_attributes()
+    for key, value in semantic_attrs.items():
+        attributes[key] = value
+    # If choice count not in attributes, infer from output_messages length
+    if (
+        GenAI.GEN_AI_REQUEST_CHOICE_COUNT not in attributes
+        and invocation.output_messages
+        and len(invocation.output_messages) != 1
+    ):
+        attributes[GenAI.GEN_AI_REQUEST_CHOICE_COUNT] = len(
+            invocation.output_messages
+        )
+    # Add agent context if available
+    if invocation.agent_name:
+        attributes[GenAI.GEN_AI_AGENT_NAME] = invocation.agent_name
+    if invocation.agent_id:
+        attributes[GenAI.GEN_AI_AGENT_ID] = invocation.agent_id
+    body: Dict[str, Any] = {}
+    system_instructions = []
+    if invocation.input_messages:
+        input_msgs = []
+        for msg in invocation.input_messages:
+            # Handle both .role (standard) and .type (LangChain) attributes
+            msg_role = getattr(msg, "role", None) or getattr(msg, "type", None)
+            if msg_role == "system":
+                for part in msg.parts:
+                    if isinstance(part, Text):
+                        part_dict = {
+                            "type": "text",
+                            "content": part.content if capture_content else "",
+                        }
+                        system_instructions.append(part_dict)
+                    else:
+                        try:
+                            part_dict = (
+                                asdict(part)
+                                if hasattr(part, "__dataclass_fields__")
+                                else part
+                            )
+                            if (
+                                not capture_content
+                                and isinstance(part_dict, dict)
+                                and "content" in part_dict
+                            ):
+                                part_dict["content"] = ""
+                            system_instructions.append(part_dict)
+                        except (TypeError, ValueError, AttributeError):
+                            pass
+                continue  # Don't include in input_messages
+            # Message structure: role and parts array
+            input_msg = {"role": msg.role, "parts": []}
+            # Process parts (text, tool_call, tool_call_response)
+            for part in msg.parts:
+                if isinstance(part, Text):
+                    part_dict = {
+                        "type": "text",
+                        "content": part.content if capture_content else "",
+                    }
+                    input_msg["parts"].append(part_dict)
+                elif isinstance(part, ToolCall):
+                    tool_dict = {
+                        "type": "tool_call",
+                        "id": part.id,
+                        "name": part.name,
+                        "arguments": part.arguments if capture_content else {},
+                    }
+                    input_msg["parts"].append(tool_dict)
+                elif isinstance(part, ToolCallResponse):
+                    tool_response_dict = {
+                        "type": "tool_call_response",
+                        "id": part.id,
+                        "result": part.response if capture_content else "",
+                    }
+                    input_msg["parts"].append(tool_response_dict)
+                else:
+                    try:
+                        part_dict = (
+                            asdict(part)
+                            if hasattr(part, "__dataclass_fields__")
+                            else part
+                        )
+                        if not capture_content and isinstance(part_dict, dict):
+                            # Clear content fields
+                            if "content" in part_dict:
+                                part_dict["content"] = ""
+                            if "arguments" in part_dict:
+                                part_dict["arguments"] = {}
+                            if "response" in part_dict:
+                                part_dict["response"] = ""
+                        input_msg["parts"].append(part_dict)
+                    except (TypeError, ValueError, AttributeError):
+                        pass
+            input_msgs.append(input_msg)
+        if input_msgs:
+            body[GenAI.GEN_AI_INPUT_MESSAGES] = input_msgs
+    if system_instructions:
+        body[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = system_instructions
+    if invocation.output_messages:
+        output_msgs = []
+        for msg in invocation.output_messages:
+            output_msg = {
+                "role": msg.role,
+                "parts": [],
+                "finish_reason": msg.finish_reason or "stop",
+            }
+            # Process parts (text, tool_calls, etc.)
+            for part in msg.parts:
+                if isinstance(part, Text):
+                    part_dict = {
+                        "type": "text",
+                        "content": part.content if capture_content else "",
+                    }
+                    output_msg["parts"].append(part_dict)
+                elif isinstance(part, ToolCall):
+                    tool_dict = {
+                        "type": "tool_call",
+                        "id": part.id,
+                        "name": part.name,
+                        "arguments": part.arguments if capture_content else {},
+                    }
+                    output_msg["parts"].append(tool_dict)
+                else:
+                    try:
+                        part_dict = (
+                            asdict(part)
+                            if hasattr(part, "__dataclass_fields__")
+                            else part
+                        )
+                        if not capture_content and isinstance(part_dict, dict):
+                            # Clear content fields
+                            if "content" in part_dict:
+                                part_dict["content"] = ""
+                            if "arguments" in part_dict:
+                                part_dict["arguments"] = {}
+                        output_msg["parts"].append(part_dict)
+                    except (TypeError, ValueError, AttributeError):
+                        pass
+            output_msgs.append(output_msg)
+        body[GenAI.GEN_AI_OUTPUT_MESSAGES] = output_msgs
+    record = SDKLogRecord(
+        body=body or None,
+        attributes=attributes,
+        event_name="gen_ai.client.inference.operation.details",
+        context=otel_context,
+    )
+    if trace_id is not None:
+        record.trace_id = trace_id
+    if span_id is not None:
+        record.span_id = span_id
+    if trace_flags is not None:
+        record.trace_flags = trace_flags
+    return record
+def _get_metric_attributes(
+    request_model: Optional[str],
+    response_model: Optional[str],
+    operation_name: Optional[str],
+    provider: Optional[str],
+    framework: Optional[str],
+    server_address: Optional[str] = None,
+    server_port: Optional[int] = None,
+) -> Dict[str, AttributeValue]:
+    attributes: Dict[str, AttributeValue] = {}
+    if framework is not None:
+        attributes[GEN_AI_FRAMEWORK] = framework
+    if provider:
+        attributes[GenAI.GEN_AI_PROVIDER_NAME] = provider
+    if operation_name:
+        attributes[GenAI.GEN_AI_OPERATION_NAME] = operation_name
+    if request_model:
+        attributes[GenAI.GEN_AI_REQUEST_MODEL] = request_model
+    if response_model:
+        attributes[GenAI.GEN_AI_RESPONSE_MODEL] = response_model
+    if server_address:
+        attributes[ServerAttributes.SERVER_ADDRESS] = server_address
+    if server_port:
+        attributes[ServerAttributes.SERVER_PORT] = server_port
+    return attributes
+def _record_token_metrics(
+    token_histogram: Histogram,
+    prompt_tokens: Optional[AttributeValue],
+    completion_tokens: Optional[AttributeValue],
+    metric_attributes: Dict[str, AttributeValue],
+    *,
+    span: Optional[Span] = None,
+) -> None:
+    context = None
+    if span is not None:
+        try:
+            context = trace.set_span_in_context(span)
+        except (TypeError, ValueError):  # pragma: no cover - defensive
+            context = None
+    prompt_attrs: Dict[str, AttributeValue] = {
+        GenAI.GEN_AI_TOKEN_TYPE: GenAI.GenAiTokenTypeValues.INPUT.value
+    }
+    prompt_attrs.update(metric_attributes)
+    if isinstance(prompt_tokens, (int, float)):
+        token_histogram.record(
+            prompt_tokens, attributes=prompt_attrs, context=context
+        )
+    completion_attrs: Dict[str, AttributeValue] = {
+        GenAI.GEN_AI_TOKEN_TYPE: GenAI.GenAiTokenTypeValues.COMPLETION.value
+    }
+    completion_attrs.update(metric_attributes)
+    if isinstance(completion_tokens, (int, float)):
+        token_histogram.record(
+            completion_tokens, attributes=completion_attrs, context=context
+        )
+def _record_duration(
+    duration_histogram: Histogram,
+    invocation: LLMInvocation | EmbeddingInvocation | ToolCall,
+    metric_attributes: Dict[str, AttributeValue],
+    *,
+    span: Optional[Span] = None,
+) -> None:
+    if invocation.end_time is not None:
+        elapsed: float = invocation.end_time - invocation.start_time
+        context = None
+        if span is not None:
+            try:
+                context = trace.set_span_in_context(span)
+            except (
+                TypeError,
+                ValueError,
+                AttributeError,
+            ):  # pragma: no cover - defensive
+                context = None
+        duration_histogram.record(
+            elapsed, attributes=metric_attributes, context=context
+        )
+# Helper functions for agentic types
+def _build_text_message(
+    role: str, text: str, *, capture: bool, finish_reason: Optional[str] = None
+) -> dict[str, Any]:
+    msg: dict[str, Any] = {
+        "role": role,
+        "parts": [{"type": "text", "content": text if capture else ""}],
+    }
+    if finish_reason is not None:
+        msg["finish_reason"] = finish_reason
+    return msg
+def _workflow_to_log_record(
+    workflow: Workflow, capture_content: bool
+) -> Optional[SDKLogRecord]:
+    """Create a workflow log record using unified message format."""
+    attributes: Dict[str, Any] = {
+        # TODO: fixme in UI
+        # "event.name": "gen_ai.client.workflow.operation.details",
+        "event.name": "gen_ai.client.inference.operation.details",
+        "gen_ai.workflow.name": workflow.name,
+    }
+    if workflow.workflow_type:
+        attributes["gen_ai.workflow.type"] = workflow.workflow_type
+    if workflow.description:
+        attributes["gen_ai.workflow.description"] = workflow.description
+    if workflow.framework:
+        attributes[GEN_AI_FRAMEWORK] = workflow.framework
+    body: Dict[str, Any] = {}
+    # Represent initial input / final output as standardized messages
+    input_msgs: list[dict[str, Any]] = []
+    output_msgs: list[dict[str, Any]] = []
+    if workflow.initial_input:
+        input_msgs.append(
+            _build_text_message(
+                "user", workflow.initial_input, capture=capture_content
+            )
+        )
+    if workflow.final_output:
+        output_msgs.append(
+            _build_text_message(
+                "assistant",
+                workflow.final_output,
+                capture=capture_content,
+                finish_reason="stop",
+            )
+        )
+    if input_msgs:
+        body[GenAI.GEN_AI_INPUT_MESSAGES] = input_msgs
+    if output_msgs:
+        body[GenAI.GEN_AI_OUTPUT_MESSAGES] = output_msgs
+    # Always include system instructions key (empty list if none). Use workflow.description as source.
+    workflow_instructions: list[dict[str, Any]] = []
+    if workflow.description:
+        workflow_instructions.append(
+            {
+                "type": "text",
+                "content": workflow.description if capture_content else "",
+            }
+        )
+    body[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = workflow_instructions
+    # Ensure finish_reason present on all output messages (defensive)
+    if GenAI.GEN_AI_OUTPUT_MESSAGES in body:
+        for m in body[GenAI.GEN_AI_OUTPUT_MESSAGES]:
+            if "finish_reason" not in m:
+                m["finish_reason"] = "stop"
+    return _build_log_record(
+        workflow,
+        # TODO: fixme in UI
+        # event_name="gen_ai.client.workflow.operation.details",
+        event_name="gen_ai.client.inference.operation.details",
+        attributes=attributes,
+        body=body or None,
+    )
+def _agent_to_log_record(
+    agent: AgentCreation | AgentInvocation, capture_content: bool
+) -> Optional[SDKLogRecord]:
+    """Create a log record for an agent event using unified message format."""
+    attributes: Dict[str, Any] = {
+        # TODO: fixme in UI
+        # "event.name": "gen_ai.client.agent.operation.details",
+        "event.name": "gen_ai.client.inference.operation.details",
+    }
+    if agent.framework:
+        attributes[GEN_AI_FRAMEWORK] = agent.framework
+    attributes[GenAI.GEN_AI_AGENT_NAME] = agent.name
+    attributes[GenAI.GEN_AI_AGENT_ID] = str(agent.run_id)
+    body: Dict[str, Any] = {}
+    # System instructions treated similarly to LLM system messages
+    if agent.system_instructions:
+        pass  # handled below for unified always-present key
+    # Always include system instructions key (empty list if none)
+    agent_instructions: list[dict[str, Any]] = []
+    if agent.system_instructions:
+        agent_instructions.append(
+            {
+                "type": "text",
+                "content": agent.system_instructions
+                if capture_content
+                else "",
+            }
+        )
+    body[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = agent_instructions
+    input_context = getattr(agent, "input_context", None)
+    if input_context:
+        body[GenAI.GEN_AI_INPUT_MESSAGES] = [
+            _build_text_message("user", input_context, capture=capture_content)
+        ]
+    output_result = getattr(agent, "output_result", None)
+    if output_result:
+        body[GenAI.GEN_AI_OUTPUT_MESSAGES] = [
+            _build_text_message(
+                "assistant",
+                output_result,
+                capture=capture_content,
+                finish_reason="stop",
+            )
+        ]
+    # Ensure finish_reason present on all output messages (defensive)
+    if GenAI.GEN_AI_OUTPUT_MESSAGES in body:
+        for m in body[GenAI.GEN_AI_OUTPUT_MESSAGES]:
+            if "finish_reason" not in m:
+                m["finish_reason"] = "stop"
+    if not body:
+        return None
+    return _build_log_record(
+        agent,
+        # TODO: fixme in UI
+        # event_name="gen_ai.client.agent.operation.details",
+        event_name="gen_ai.client.inference.operation.details",
+        attributes=attributes,
+        body=body,
+    )
+def _step_to_log_record(
+    step: Step, capture_content: bool
+) -> Optional[SDKLogRecord]:
+    """Create a log record for a step event.
+    Note: Step events are not yet in semantic conventions but follow
+    the message structure pattern for consistency.
+    """
+    # Attributes contain metadata (not content)
+    _ensure_span_context(step)
+    otel_context = build_otel_context(
+        getattr(step, "span", None),
+        getattr(step, "span_context", None),
+    )
+    trace_id = getattr(step, "trace_id", None)
+    span_id = getattr(step, "span_id", None)
+    trace_flags = getattr(step, "trace_flags", None)
+    attributes: Dict[str, Any] = {
+        "event.name": "gen_ai.client.step.operation.details",
+        "gen_ai.step.name": step.name,
+    }
+    if step.step_type:
+        attributes["gen_ai.step.type"] = step.step_type
+    if step.objective:
+        attributes["gen_ai.step.objective"] = step.objective
+    if step.source:
+        attributes["gen_ai.step.source"] = step.source
+    if step.assigned_agent:
+        attributes[GenAI.GEN_AI_AGENT_NAME] = step.assigned_agent
+    if step.status:
+        attributes["gen_ai.step.status"] = step.status
+    # Body contains messages/content only (following semantic conventions pattern)
+    # If capture_content is disabled, emit empty content (like LLM messages do)
+    body: Dict[str, Any] = {}
+    if capture_content:
+        if step.input_data:
+            body["input_data"] = step.input_data
+        if step.output_data:
+            body["output_data"] = step.output_data
+    else:
+        # Emit structure with empty content when capture is disabled
+        if step.input_data:
+            body["input_data"] = ""
+        if step.output_data:
+            body["output_data"] = ""
+    record = SDKLogRecord(
+        body=body or None,
+        attributes=attributes,
+        event_name="gen_ai.client.step.operation.details",
+        context=otel_context,
+    )
+    if trace_id is not None:
+        record.trace_id = trace_id
+    if span_id is not None:
+        record.span_id = span_id
+    if trace_flags is not None:
+        record.trace_flags = trace_flags
+    return record
+def _embedding_to_log_record(
+    embedding: EmbeddingInvocation, capture_content: bool
+) -> Optional[SDKLogRecord]:
+    """Create a log record for an embedding event."""
+    _ensure_span_context(embedding)
+    otel_context = build_otel_context(
+        getattr(embedding, "span", None),
+        getattr(embedding, "span_context", None),
+    )
+    trace_id = getattr(embedding, "trace_id", None)
+    span_id = getattr(embedding, "span_id", None)
+    trace_flags = getattr(embedding, "trace_flags", None)
+    # Attributes contain metadata (not content)
+    attributes: Dict[str, Any] = {
+        "event.name": "gen_ai.client.embedding.operation.details",
+    }
+    # Core attributes
+    if embedding.operation_name:
+        attributes[GenAI.GEN_AI_OPERATION_NAME] = embedding.operation_name
+    if embedding.provider:
+        attributes[GenAI.GEN_AI_PROVIDER_NAME] = embedding.provider
+    if embedding.request_model:
+        attributes[GenAI.GEN_AI_REQUEST_MODEL] = embedding.request_model
+    # Optional attributes
+    if embedding.dimension_count:
+        attributes[GEN_AI_EMBEDDINGS_DIMENSION_COUNT] = (
+            embedding.dimension_count
+        )
+    if embedding.input_tokens is not None:
+        attributes[GenAI.GEN_AI_USAGE_INPUT_TOKENS] = embedding.input_tokens
+    if embedding.server_address:
+        attributes[ServerAttributes.SERVER_ADDRESS] = embedding.server_address
+    if embedding.server_port:
+        attributes[ServerAttributes.SERVER_PORT] = embedding.server_port
+    if embedding.encoding_formats:
+        attributes[GEN_AI_REQUEST_ENCODING_FORMATS] = (
+            embedding.encoding_formats
+        )
+    if embedding.error_type:
+        attributes["error.type"] = embedding.error_type
+    # Add agent context if available
+    if embedding.agent_name:
+        attributes[GenAI.GEN_AI_AGENT_NAME] = embedding.agent_name
+    if embedding.agent_id:
+        attributes[GenAI.GEN_AI_AGENT_ID] = embedding.agent_id
+    # Body contains content (input texts)
+    body: Dict[str, Any] = {}
+    if embedding.input_texts:
+        if capture_content:
+            body[GEN_AI_EMBEDDINGS_INPUT_TEXTS] = embedding.input_texts
+        else:
+            # Emit structure with empty content when capture is disabled
+            body[GEN_AI_EMBEDDINGS_INPUT_TEXTS] = []
+    record = SDKLogRecord(
+        body=body or None,
+        attributes=attributes,
+        event_name="gen_ai.client.embedding.operation.details",
+        context=otel_context,
+    )
+    if trace_id is not None:
+        record.trace_id = trace_id  # type: ignore[attr-defined]
+    if span_id is not None:
+        record.span_id = span_id  # type: ignore[attr-defined]
+    if trace_flags is not None:
+        record.trace_flags = trace_flags  # type: ignore[attr-defined]
+    return record