strands-agents-evals 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. strands_agents_evals-0.1.0.dist-info/METADATA +408 -0
  2. strands_agents_evals-0.1.0.dist-info/RECORD +68 -0
  3. strands_agents_evals-0.1.0.dist-info/WHEEL +4 -0
  4. strands_agents_evals-0.1.0.dist-info/licenses/LICENSE +175 -0
  5. strands_agents_evals-0.1.0.dist-info/licenses/NOTICE +1 -0
  6. strands_evals/__init__.py +22 -0
  7. strands_evals/case.py +53 -0
  8. strands_evals/display/display_console.py +150 -0
  9. strands_evals/evaluators/__init__.py +23 -0
  10. strands_evals/evaluators/evaluator.py +182 -0
  11. strands_evals/evaluators/faithfulness_evaluator.py +116 -0
  12. strands_evals/evaluators/goal_success_rate_evaluator.py +90 -0
  13. strands_evals/evaluators/harmfulness_evaluator.py +135 -0
  14. strands_evals/evaluators/helpfulness_evaluator.py +148 -0
  15. strands_evals/evaluators/interactions_evaluator.py +244 -0
  16. strands_evals/evaluators/output_evaluator.py +72 -0
  17. strands_evals/evaluators/prompt_templates/case_prompt_template.py +63 -0
  18. strands_evals/evaluators/prompt_templates/faithfulness/__init__.py +11 -0
  19. strands_evals/evaluators/prompt_templates/faithfulness/faithfulness_v0.py +30 -0
  20. strands_evals/evaluators/prompt_templates/goal_success_rate/__init__.py +11 -0
  21. strands_evals/evaluators/prompt_templates/goal_success_rate/goal_success_rate_v0.py +17 -0
  22. strands_evals/evaluators/prompt_templates/harmfulness/__init__.py +11 -0
  23. strands_evals/evaluators/prompt_templates/harmfulness/harmfulness_v0.py +8 -0
  24. strands_evals/evaluators/prompt_templates/helpfulness/__init__.py +11 -0
  25. strands_evals/evaluators/prompt_templates/helpfulness/helpfulness_v0.py +38 -0
  26. strands_evals/evaluators/prompt_templates/prompt_templates.py +176 -0
  27. strands_evals/evaluators/prompt_templates/tool_parameter_accuracy/__init__.py +11 -0
  28. strands_evals/evaluators/prompt_templates/tool_parameter_accuracy/tool_parameter_accuracy_v0.py +40 -0
  29. strands_evals/evaluators/prompt_templates/tool_selection_accuracy/__init__.py +11 -0
  30. strands_evals/evaluators/prompt_templates/tool_selection_accuracy/tool_selection_accuracy_v0.py +23 -0
  31. strands_evals/evaluators/tool_parameter_accuracy_evaluator.py +112 -0
  32. strands_evals/evaluators/tool_selection_accuracy_evaluator.py +112 -0
  33. strands_evals/evaluators/trajectory_evaluator.py +100 -0
  34. strands_evals/experiment.py +652 -0
  35. strands_evals/extractors/__init__.py +3 -0
  36. strands_evals/extractors/graph_extractor.py +30 -0
  37. strands_evals/extractors/swarm_extractor.py +73 -0
  38. strands_evals/extractors/tools_use_extractor.py +164 -0
  39. strands_evals/extractors/trace_extractor.py +166 -0
  40. strands_evals/generators/__init__.py +3 -0
  41. strands_evals/generators/experiment_generator.py +498 -0
  42. strands_evals/generators/prompt_template/prompt_templates.py +75 -0
  43. strands_evals/generators/topic_planner.py +60 -0
  44. strands_evals/mappers/__init__.py +6 -0
  45. strands_evals/mappers/session_mapper.py +27 -0
  46. strands_evals/mappers/strands_in_memory_session_mapper.py +473 -0
  47. strands_evals/simulation/README.md +323 -0
  48. strands_evals/simulation/__init__.py +6 -0
  49. strands_evals/simulation/actor_simulator.py +292 -0
  50. strands_evals/simulation/profiles/__init__.py +5 -0
  51. strands_evals/simulation/profiles/actor_profile.py +26 -0
  52. strands_evals/simulation/prompt_templates/__init__.py +11 -0
  53. strands_evals/simulation/prompt_templates/actor_profile_extraction.py +25 -0
  54. strands_evals/simulation/prompt_templates/actor_system_prompt.py +64 -0
  55. strands_evals/simulation/prompt_templates/goal_completion.py +27 -0
  56. strands_evals/simulation/tools/__init__.py +5 -0
  57. strands_evals/simulation/tools/goal_completion.py +93 -0
  58. strands_evals/telemetry/__init__.py +15 -0
  59. strands_evals/telemetry/_cloudwatch_logger.py +209 -0
  60. strands_evals/telemetry/config.py +207 -0
  61. strands_evals/telemetry/tracer.py +38 -0
  62. strands_evals/tools/evaluation_tools.py +67 -0
  63. strands_evals/types/__init__.py +11 -0
  64. strands_evals/types/evaluation.py +105 -0
  65. strands_evals/types/evaluation_report.py +244 -0
  66. strands_evals/types/simulation/__init__.py +5 -0
  67. strands_evals/types/simulation/actor.py +34 -0
  68. strands_evals/types/trace.py +205 -0
@@ -0,0 +1,473 @@
1
+ import json
2
+ import logging
3
+ from collections import defaultdict
4
+ from datetime import datetime, timezone
5
+ from enum import Enum
6
+ from typing import Any
7
+
8
+ from opentelemetry.sdk.trace import ReadableSpan
9
+
10
+ from ..types.trace import (
11
+ AgentInvocationSpan,
12
+ AssistantMessage,
13
+ InferenceSpan,
14
+ Session,
15
+ SpanInfo,
16
+ TextContent,
17
+ ToolCall,
18
+ ToolCallContent,
19
+ ToolConfig,
20
+ ToolExecutionSpan,
21
+ ToolResult,
22
+ ToolResultContent,
23
+ Trace,
24
+ UserMessage,
25
+ )
26
+ from .session_mapper import SessionMapper
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class GenAIConventionVersion(Enum):
32
+ """GenAI semantic convention versions following OTEL_SEMCONV_STABILITY_OPT_IN.
33
+
34
+ This enum aligns with OpenTelemetry's semantic convention stability options
35
+ as defined in OTEL_SEMCONV_STABILITY_OPT_IN environment variable.
36
+
37
+ Attributes:
38
+ LEGACY: Use legacy conventions (v1.36.0 or prior) with gen_ai.system attribute
39
+ and separate message events (gen_ai.user.message, gen_ai.choice, etc.)
40
+ LATEST_EXPERIMENTAL: Use latest experimental conventions (v1.37+) with
41
+ gen_ai.provider.name attribute and unified gen_ai.client.inference.operation.details events.
42
+ Corresponds to OTEL's "gen_ai_latest_experimental" stability option.
43
+ """
44
+
45
+ LEGACY = "legacy"
46
+ LATEST_EXPERIMENTAL = "gen_ai_latest_experimental"
47
+
48
+
49
+ class StrandsInMemorySessionMapper(SessionMapper):
50
+ """Maps OpenTelemetry in-memory spans to Session format for evaluation.
51
+
52
+ Supports both legacy and latest GenAI semantic conventions:
53
+ - Latest (v1.37+): gen_ai.provider.name with unified gen_ai.client.inference.operation.details events
54
+ - Legacy: gen_ai.system with separate message events (gen_ai.user.message, gen_ai.choice, etc.)
55
+
56
+ The mapper automatically detects the convention version. Default to Legacy.
57
+ """
58
+
59
+ def __init__(self):
60
+ super().__init__()
61
+ self._convention_version = GenAIConventionVersion.LEGACY
62
+
63
+ def map_to_session(
64
+ self,
65
+ otel_spans: list[ReadableSpan],
66
+ session_id: str,
67
+ ) -> Session:
68
+ if otel_spans:
69
+ self._convention_version = self._detect_convention_version(otel_spans[0])
70
+
71
+ # Check if any spans have session.id or gen_ai.conversation.id attribute
72
+ any_span_has_session_id = any(
73
+ span.attributes and ("session.id" in span.attributes or "gen_ai.conversation.id" in span.attributes)
74
+ for span in otel_spans
75
+ )
76
+
77
+ # Build traces: if no spans have session IDs, include all; otherwise filter by matching session_id
78
+ traces_by_id = defaultdict(list)
79
+ for span in otel_spans:
80
+ should_include = False
81
+
82
+ if not any_span_has_session_id:
83
+ # If no spans have session IDs, include all spans
84
+ should_include = True
85
+ else:
86
+ # Check if this span's session ID matches
87
+ if span.attributes:
88
+ span_session_id = None
89
+ # Check for gen_ai.conversation.id first
90
+ if "gen_ai.conversation.id" in span.attributes:
91
+ span_session_id = str(span.attributes["gen_ai.conversation.id"])
92
+ # Then check for session.id
93
+ elif "session.id" in span.attributes:
94
+ span_session_id = str(span.attributes["session.id"])
95
+
96
+ # Include if session ID matches the provided session_id
97
+ should_include = span_session_id == session_id
98
+
99
+ if should_include:
100
+ trace_id_extracted = format(span.context.trace_id, "032x")
101
+ traces_by_id[trace_id_extracted].append(span)
102
+
103
+ traces: list[Trace] = []
104
+ for trace_id_extracted, spans in traces_by_id.items():
105
+ trace = self._convert_trace(trace_id_extracted, spans, session_id)
106
+ if trace.spans:
107
+ traces.append(trace)
108
+
109
+ return Session(traces=traces, session_id=session_id)
110
+
111
+ def _detect_convention_version(self, span: ReadableSpan) -> GenAIConventionVersion:
112
+ """Detect which GenAI semantic convention version is being used.
113
+
114
+ Returns:
115
+ GenAIConventionVersion.LATEST_EXPERIMENTAL if using latest conventions,
116
+ GenAIConventionVersion.LEGACY otherwise
117
+ """
118
+ if span.attributes and "gen_ai.provider.name" in span.attributes:
119
+ return GenAIConventionVersion.LATEST_EXPERIMENTAL
120
+
121
+ return GenAIConventionVersion.LEGACY
122
+
123
+ def _use_latest_conventions(self) -> bool:
124
+ """Helper method to determine if latest conventions should be used.
125
+
126
+ Returns:
127
+ True if LATEST_EXPERIMENTAL, False if LEGACY
128
+ """
129
+ return self._convention_version == GenAIConventionVersion.LATEST_EXPERIMENTAL
130
+
131
+ def _convert_trace(self, trace_id: str, otel_spans: list[ReadableSpan], session_id: str) -> Trace:
132
+ converted_spans: list[InferenceSpan | ToolExecutionSpan | AgentInvocationSpan] = []
133
+
134
+ for span in otel_spans:
135
+ try:
136
+ operation_name = span.attributes.get("gen_ai.operation.name", "") if span.attributes else ""
137
+
138
+ if operation_name == "chat":
139
+ inference_span = self._convert_inference_span(span, session_id)
140
+ if inference_span.messages:
141
+ converted_spans.append(inference_span)
142
+ elif operation_name == "execute_tool":
143
+ converted_spans.append(self._convert_tool_execution_span(span, session_id))
144
+ elif operation_name == "invoke_agent":
145
+ converted_spans.append(self._convert_agent_invocation_span(span, session_id))
146
+ except Exception as e:
147
+ logger.warning(f"Failed to convert span: {e}")
148
+
149
+ return Trace(spans=converted_spans, trace_id=trace_id, session_id=session_id)
150
+
151
+ def _create_span_info(self, span: ReadableSpan, session_id: str) -> SpanInfo:
152
+ start_time = span.start_time or 0
153
+ end_time = span.end_time or 0
154
+
155
+ return SpanInfo(
156
+ trace_id=format(span.context.trace_id, "032x"),
157
+ span_id=format(span.context.span_id, "016x"),
158
+ session_id=session_id,
159
+ parent_span_id=format(span.parent.span_id, "016x") if span.parent else None,
160
+ start_time=datetime.fromtimestamp(start_time / 1e9, tz=timezone.utc),
161
+ end_time=datetime.fromtimestamp(end_time / 1e9, tz=timezone.utc),
162
+ )
163
+
164
+ def _parse_json_attr(self, attributes: Any, key: str, default: str = "[]") -> Any:
165
+ try:
166
+ value = attributes.get(key, default)
167
+ return json.loads(str(value))
168
+ except (AttributeError, TypeError, json.JSONDecodeError):
169
+ return json.loads(default)
170
+
171
+ def _process_user_message(self, content_list: list[dict[str, Any]]) -> list[TextContent | ToolResultContent]:
172
+ return [TextContent(text=item["text"]) for item in content_list if "text" in item]
173
+
174
+ def _process_assistant_content(self, content_list: list[dict[str, Any]]) -> list[TextContent | ToolCallContent]:
175
+ result: list[TextContent | ToolCallContent] = []
176
+ for item in content_list:
177
+ if "text" in item:
178
+ result.append(TextContent(text=item["text"]))
179
+ elif "toolUse" in item:
180
+ tool_use = item["toolUse"]
181
+ result.append(
182
+ ToolCallContent(
183
+ name=tool_use["name"],
184
+ arguments=tool_use.get("input", {}),
185
+ tool_call_id=tool_use.get("toolUseId"),
186
+ )
187
+ )
188
+ return result
189
+
190
+ def _process_tool_results(self, content_list: list[dict[str, Any]]) -> list[TextContent | ToolResultContent]:
191
+ result: list[TextContent | ToolResultContent] = []
192
+ for item in content_list:
193
+ if "toolResult" not in item:
194
+ continue
195
+
196
+ tool_result = item["toolResult"]
197
+ result_text = ""
198
+ if "content" in tool_result and tool_result["content"]:
199
+ content = tool_result["content"]
200
+ result_text = content[0].get("text", "") if isinstance(content, list) else str(content)
201
+
202
+ result.append(
203
+ ToolResultContent(
204
+ content=result_text,
205
+ error=tool_result.get("error"),
206
+ tool_call_id=tool_result.get("toolUseId"),
207
+ )
208
+ )
209
+ return result
210
+
211
+ def _convert_inference_span(self, span: ReadableSpan, session_id: str) -> InferenceSpan:
212
+ span_info = self._create_span_info(span, session_id)
213
+
214
+ if self._use_latest_conventions():
215
+ messages = self._extract_messages_from_inference_details(span)
216
+ else:
217
+ messages = self._extract_messages_from_events(span)
218
+
219
+ return InferenceSpan(span_info=span_info, messages=messages, metadata={})
220
+
221
+ def _extract_messages_from_events(self, span: ReadableSpan) -> list[UserMessage | AssistantMessage]:
222
+ """Extract messages from legacy event format (gen_ai.user.message, etc.)."""
223
+ messages: list[UserMessage | AssistantMessage] = []
224
+
225
+ for event in span.events:
226
+ try:
227
+ if event.name == "gen_ai.user.message":
228
+ content_list = self._parse_json_attr(event.attributes, "content")
229
+ user_content = self._process_user_message(content_list)
230
+ if user_content:
231
+ messages.append(UserMessage(content=user_content))
232
+
233
+ elif event.name == "gen_ai.assistant.message":
234
+ content_list = self._parse_json_attr(event.attributes, "content")
235
+ assistant_content = self._process_assistant_content(content_list)
236
+ if assistant_content:
237
+ messages.append(AssistantMessage(content=assistant_content))
238
+
239
+ elif event.name == "gen_ai.tool.message":
240
+ content_list = self._parse_json_attr(event.attributes, "content")
241
+ tool_result_content = self._process_tool_results(content_list)
242
+ if tool_result_content:
243
+ messages.append(UserMessage(content=tool_result_content))
244
+
245
+ elif event.name == "gen_ai.choice":
246
+ message_list = self._parse_json_attr(event.attributes, "message")
247
+ assistant_content = self._process_assistant_content(message_list)
248
+ if assistant_content:
249
+ messages.append(AssistantMessage(content=assistant_content))
250
+ except Exception as e:
251
+ logger.warning(f"Failed to process event {event.name}: {e}")
252
+
253
+ return messages
254
+
255
+ def _extract_messages_from_inference_details(self, span: ReadableSpan) -> list[UserMessage | AssistantMessage]:
256
+ """Extract messages from latest event format (gen_ai.client.inference.operation.details)."""
257
+ messages: list[UserMessage | AssistantMessage] = []
258
+
259
+ for event in span.events:
260
+ try:
261
+ if event.name == "gen_ai.client.inference.operation.details":
262
+ event_attributes = event.attributes
263
+ if not event_attributes:
264
+ continue
265
+ # Check for input messages
266
+ if "gen_ai.input.messages" in event_attributes:
267
+ input_messages = self._parse_json_attr(event_attributes, "gen_ai.input.messages")
268
+ for msg in input_messages:
269
+ input_content = self._convert_inference_messages(msg)
270
+ if input_content:
271
+ messages.append(input_content)
272
+
273
+ # Check for output messages
274
+ if "gen_ai.output.messages" in event_attributes:
275
+ output_messages = self._parse_json_attr(event_attributes, "gen_ai.output.messages")
276
+ for msg in output_messages:
277
+ output_content = self._convert_inference_messages(msg)
278
+ if output_content:
279
+ messages.append(output_content)
280
+ except Exception as e:
281
+ logger.warning(f"Failed to process inference details event: {e}")
282
+
283
+ return messages
284
+
285
+ def _convert_inference_messages(self, otel_msg: dict[str, Any]) -> UserMessage | AssistantMessage | None:
286
+ """Convert OTEL message format (with parts) to internal message types.
287
+
288
+ Args:
289
+ otel_msg: Message in OTEL format with 'role' and 'parts' fields
290
+
291
+ Returns:
292
+ UserMessage or AssistantMessage, or None if conversion fails
293
+ """
294
+ try:
295
+ role = otel_msg.get("role", "")
296
+ parts = otel_msg.get("parts", [])
297
+
298
+ if role == "assistant":
299
+ assistant_content: list[TextContent | ToolCallContent] = []
300
+
301
+ for part in parts:
302
+ part_type = part.get("type", "")
303
+
304
+ if part_type == "text":
305
+ assistant_content.append(TextContent(text=part.get("content", "")))
306
+
307
+ elif part_type == "tool_call":
308
+ assistant_content.append(
309
+ ToolCallContent(
310
+ name=part.get("name", ""),
311
+ arguments=part.get("arguments", {}),
312
+ tool_call_id=part.get("id"),
313
+ )
314
+ )
315
+ return AssistantMessage(content=assistant_content) if assistant_content else None
316
+
317
+ # Tool messages are represented as UserMessage with ToolResultContent
318
+ content: list[TextContent | ToolResultContent] = []
319
+
320
+ for part in parts:
321
+ part_type = part.get("type", "")
322
+
323
+ if part_type == "text":
324
+ content.append(TextContent(text=part.get("content", "")))
325
+
326
+ if part_type == "tool_call_response":
327
+ # Extract text from response array if present
328
+ response = part.get("response", [])
329
+ response_text = ""
330
+
331
+ ## To-do: Compare the differences for multiple toolResults
332
+ if isinstance(response, list) and response:
333
+ response_text = (
334
+ response[0].get("text", "") if isinstance(response[0], dict) else str(response[0])
335
+ )
336
+ elif isinstance(response, str):
337
+ response_text = response
338
+
339
+ content.append(
340
+ ToolResultContent(
341
+ content=response_text,
342
+ tool_call_id=part.get("id"),
343
+ )
344
+ )
345
+ return UserMessage(content=content) if content else None
346
+
347
+ except Exception as e:
348
+ logger.warning(f"Failed to convert OTEL message: {e}")
349
+ return None
350
+
351
+ def _convert_tool_execution_span(self, span: ReadableSpan, session_id: str) -> ToolExecutionSpan:
352
+ span_info = self._create_span_info(span, session_id)
353
+ attrs = span.attributes or {}
354
+
355
+ tool_name = str(attrs.get("gen_ai.tool.name", ""))
356
+ tool_call_id = str(attrs.get("gen_ai.tool.call.id", ""))
357
+ tool_status = attrs.get("gen_ai.tool.status", attrs.get("tool.status", ""))
358
+ tool_error = None if tool_status == "success" else (str(tool_status) if tool_status else None)
359
+
360
+ tool_arguments = {}
361
+ tool_result_content = ""
362
+
363
+ if self._use_latest_conventions():
364
+ # Extract from gen_ai.client.inference.operation.details events
365
+ for event in span.events:
366
+ try:
367
+ if event.name == "gen_ai.client.inference.operation.details":
368
+ event_attributes = event.attributes
369
+ if not event_attributes:
370
+ continue
371
+ if "gen_ai.input.messages" in event_attributes:
372
+ input_messages = self._parse_json_attr(event_attributes, "gen_ai.input.messages")
373
+ if input_messages and input_messages[0].get("parts"):
374
+ part = input_messages[0]["parts"][0]
375
+ if part.get("type") == "tool_call":
376
+ tool_arguments = part.get("arguments", {})
377
+
378
+ if "gen_ai.output.messages" in event_attributes:
379
+ output_messages = self._parse_json_attr(event_attributes, "gen_ai.output.messages")
380
+ if output_messages and output_messages[0].get("parts"):
381
+ part = output_messages[0]["parts"][0]
382
+ if part.get("type") == "tool_call_response":
383
+ response = part.get("response", [])
384
+ if isinstance(response, list) and response:
385
+ tool_result_content = (
386
+ response[0].get("text", "")
387
+ if isinstance(response[0], dict)
388
+ else str(response[0])
389
+ )
390
+ elif isinstance(response, str):
391
+ tool_result_content = response
392
+ except Exception as e:
393
+ logger.warning(f"Failed to process tool event {event.name}: {e}")
394
+ else:
395
+ for event in span.events:
396
+ try:
397
+ event_attributes = event.attributes
398
+ if not event_attributes:
399
+ continue
400
+ if event.name == "gen_ai.tool.message":
401
+ tool_arguments = self._parse_json_attr(event_attributes, "content", "{}")
402
+ elif event.name == "gen_ai.choice":
403
+ message_list = self._parse_json_attr(event_attributes, "message")
404
+ tool_result_content = message_list[0].get("text", "") if message_list else ""
405
+ except Exception as e:
406
+ logger.warning(f"Failed to process tool event {event.name}: {e}")
407
+
408
+ tool_call = ToolCall(name=tool_name, arguments=tool_arguments, tool_call_id=tool_call_id)
409
+ tool_result = ToolResult(content=tool_result_content, error=tool_error, tool_call_id=tool_call_id)
410
+
411
+ return ToolExecutionSpan(span_info=span_info, tool_call=tool_call, tool_result=tool_result, metadata={})
412
+
413
+ def _convert_agent_invocation_span(self, span: ReadableSpan, session_id: str) -> AgentInvocationSpan:
414
+ span_info = self._create_span_info(span, session_id)
415
+
416
+ user_prompt = ""
417
+ agent_response = ""
418
+ available_tools: list[ToolConfig] = []
419
+
420
+ try:
421
+ tool_names = self._parse_json_attr(span.attributes, "gen_ai.agent.tools")
422
+ available_tools = [ToolConfig(name=name) for name in tool_names]
423
+ except Exception as e:
424
+ logger.warning(f"Failed to parse available tools: {e}")
425
+
426
+ if self._use_latest_conventions():
427
+ for event in span.events:
428
+ try:
429
+ if event.name == "gen_ai.client.inference.operation.details":
430
+ event_attributes = event.attributes
431
+ if not event_attributes:
432
+ continue
433
+ if "gen_ai.input.messages" in event_attributes:
434
+ input_messages = self._parse_json_attr(event_attributes, "gen_ai.input.messages")
435
+ if input_messages and input_messages[0].get("parts"):
436
+ parts = input_messages[0]["parts"]
437
+ for part in parts:
438
+ if part.get("type") == "text":
439
+ user_prompt = part.get("content", "")
440
+ break
441
+
442
+ if "gen_ai.output.messages" in event_attributes:
443
+ output_messages = self._parse_json_attr(event_attributes, "gen_ai.output.messages")
444
+ if output_messages and output_messages[0].get("parts"):
445
+ parts = output_messages[0]["parts"]
446
+ for part in parts:
447
+ if part.get("type") == "text":
448
+ agent_response = part.get("content", "")
449
+ break
450
+ except Exception as e:
451
+ logger.warning(f"Failed to process agent event {event.name}: {e}")
452
+ else:
453
+ for event in span.events:
454
+ try:
455
+ event_attributes = event.attributes
456
+ if not event_attributes:
457
+ continue
458
+ if event.name == "gen_ai.user.message":
459
+ content_list = self._parse_json_attr(event_attributes, "content")
460
+ user_prompt = content_list[0].get("text", "") if content_list else ""
461
+ elif event.name == "gen_ai.choice":
462
+ msg = event_attributes.get("message", "") if event_attributes else ""
463
+ agent_response = str(msg)
464
+ except Exception as e:
465
+ logger.warning(f"Failed to process agent event {event.name}: {e}")
466
+
467
+ return AgentInvocationSpan(
468
+ span_info=span_info,
469
+ user_prompt=user_prompt,
470
+ agent_response=agent_response,
471
+ available_tools=available_tools,
472
+ metadata={},
473
+ )