strands-agents-evals 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: strands-agents-evals
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Evaluation framework for Strands
5
5
  Author-email: AWS <opensource@amazon.com>
6
6
  License: Apache-2.0
@@ -31,7 +31,7 @@ strands_evals/extractors/__init__.py,sha256=Jmlrk-m8sSS_LwmCVSloIkg3BjOgRzNEezja
31
31
  strands_evals/extractors/graph_extractor.py,sha256=TeT-58JB9roqSvy2ELz1kg8WF5YO-cfLlGZTO0F9s_4,1105
32
32
  strands_evals/extractors/swarm_extractor.py,sha256=Sm1XFCkAGVdF3XDyO3iF-20I8C6sAQ8JPNP5fgotOFU,2682
33
33
  strands_evals/extractors/tools_use_extractor.py,sha256=emLL63LKldL2IA2u5wZL0ZhklZJqX0KLr5xFRt-S4i4,6600
34
- strands_evals/extractors/trace_extractor.py,sha256=l7gk5rUFoUcxQduPJz49OX66SdgeK1MLt81aF1yr4Lc,6653
34
+ strands_evals/extractors/trace_extractor.py,sha256=TJKl0OdjFhh-htlV1Wxzem8TQdb0rxa-efkq_e0pAdo,7287
35
35
  strands_evals/generators/__init__.py,sha256=B1F30DAIf0kPyBdE4PAZvSby-dTelqb_7hFJoATqVb0,89
36
36
  strands_evals/generators/experiment_generator.py,sha256=6wLTL0iG2b0YAiu0w8dDiaBxOIy7p_Fs7l3hCjgQc0w,22655
37
37
  strands_evals/generators/topic_planner.py,sha256=FtgTVDlV9hWJyO8E4Z__nEWvvrOJzmTW4y6yZ9Alx1A,2436
@@ -61,8 +61,8 @@ strands_evals/types/evaluation_report.py,sha256=vT86zO4Qn9CQbULo3aziGMdG-1qWLdcB
61
61
  strands_evals/types/trace.py,sha256=BFoEylzAlENyPH702T5MDz-_H21-Wfx-FFTSXX1tDfY,4844
62
62
  strands_evals/types/simulation/__init__.py,sha256=-mz5lW6qFfIMm4dJGaP9pXY3xeiefLbB0XevjdFykkU,133
63
63
  strands_evals/types/simulation/actor.py,sha256=ESTV8165c3Ad5QT4yYmjm-A-oZdwZ0Rf0Lq7zokjTPo,1163
64
- strands_agents_evals-0.1.3.dist-info/METADATA,sha256=GnFR1FmK9no2J7NWeuV8efHNvw2eoMzl8VbSJHeRdLg,17721
65
- strands_agents_evals-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
66
- strands_agents_evals-0.1.3.dist-info/licenses/LICENSE,sha256=yIWKWnZEC7ykhOE0z330Y4XQkN0YssWOZQ-TkliALls,10141
67
- strands_agents_evals-0.1.3.dist-info/licenses/NOTICE,sha256=Eg13ogOmcI7JpMjxniFnKG81vwU3X8X7P_IlpvVg5RU,66
68
- strands_agents_evals-0.1.3.dist-info/RECORD,,
64
+ strands_agents_evals-0.1.4.dist-info/METADATA,sha256=VQm_tm1Umm3fi_HfujW0Ovm_XyvQQCjEJrAL4-dGjKQ,17721
65
+ strands_agents_evals-0.1.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
66
+ strands_agents_evals-0.1.4.dist-info/licenses/LICENSE,sha256=yIWKWnZEC7ykhOE0z330Y4XQkN0YssWOZQ-TkliALls,10141
67
+ strands_agents_evals-0.1.4.dist-info/licenses/NOTICE,sha256=Eg13ogOmcI7JpMjxniFnKG81vwU3X8X7P_IlpvVg5RU,66
68
+ strands_agents_evals-0.1.4.dist-info/RECORD,,
@@ -45,9 +45,11 @@ class TraceExtractor:
45
45
  def _extract_trace_level(self, session: Session) -> list[TraceLevelInput]:
46
46
  """Extract trace-level inputs with session history up to each turn."""
47
47
  evaluation_inputs: list[TraceLevelInput] = []
48
- previous_turns: list[Union[UserMessage, AssistantMessage]] = []
48
+ previous_turns: list[Union[UserMessage, list[ToolExecution], AssistantMessage]] = []
49
49
 
50
50
  for trace in session.traces:
51
+ tool_spans = self._find_tool_execution_spans(trace)
52
+
51
53
  for span in trace.spans:
52
54
  if not isinstance(span, AgentInvocationSpan):
53
55
  continue
@@ -59,6 +61,17 @@ class TraceExtractor:
59
61
  logger.warning(f"Failed to create user message: {e}")
60
62
  continue
61
63
 
64
+ # Include tool executions in session history
65
+ if tool_spans:
66
+ try:
67
+ tool_executions = [
68
+ ToolExecution(tool_call=ts.tool_call, tool_result=ts.tool_result)
69
+ for ts in tool_spans
70
+ ]
71
+ previous_turns.append(tool_executions)
72
+ except (AttributeError, TypeError, ValueError) as e:
73
+ logger.warning(f"Failed to create tool executions: {e}")
74
+
62
75
  trace_input = TraceLevelInput(
63
76
  span_info=span.span_info,
64
77
  agent_response=TextContent(text=span.agent_response),