strands-agents-evals 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- strands_agents_evals-0.1.0.dist-info/METADATA +408 -0
- strands_agents_evals-0.1.0.dist-info/RECORD +68 -0
- strands_agents_evals-0.1.0.dist-info/WHEEL +4 -0
- strands_agents_evals-0.1.0.dist-info/licenses/LICENSE +175 -0
- strands_agents_evals-0.1.0.dist-info/licenses/NOTICE +1 -0
- strands_evals/__init__.py +22 -0
- strands_evals/case.py +53 -0
- strands_evals/display/display_console.py +150 -0
- strands_evals/evaluators/__init__.py +23 -0
- strands_evals/evaluators/evaluator.py +182 -0
- strands_evals/evaluators/faithfulness_evaluator.py +116 -0
- strands_evals/evaluators/goal_success_rate_evaluator.py +90 -0
- strands_evals/evaluators/harmfulness_evaluator.py +135 -0
- strands_evals/evaluators/helpfulness_evaluator.py +148 -0
- strands_evals/evaluators/interactions_evaluator.py +244 -0
- strands_evals/evaluators/output_evaluator.py +72 -0
- strands_evals/evaluators/prompt_templates/case_prompt_template.py +63 -0
- strands_evals/evaluators/prompt_templates/faithfulness/__init__.py +11 -0
- strands_evals/evaluators/prompt_templates/faithfulness/faithfulness_v0.py +30 -0
- strands_evals/evaluators/prompt_templates/goal_success_rate/__init__.py +11 -0
- strands_evals/evaluators/prompt_templates/goal_success_rate/goal_success_rate_v0.py +17 -0
- strands_evals/evaluators/prompt_templates/harmfulness/__init__.py +11 -0
- strands_evals/evaluators/prompt_templates/harmfulness/harmfulness_v0.py +8 -0
- strands_evals/evaluators/prompt_templates/helpfulness/__init__.py +11 -0
- strands_evals/evaluators/prompt_templates/helpfulness/helpfulness_v0.py +38 -0
- strands_evals/evaluators/prompt_templates/prompt_templates.py +176 -0
- strands_evals/evaluators/prompt_templates/tool_parameter_accuracy/__init__.py +11 -0
- strands_evals/evaluators/prompt_templates/tool_parameter_accuracy/tool_parameter_accuracy_v0.py +40 -0
- strands_evals/evaluators/prompt_templates/tool_selection_accuracy/__init__.py +11 -0
- strands_evals/evaluators/prompt_templates/tool_selection_accuracy/tool_selection_accuracy_v0.py +23 -0
- strands_evals/evaluators/tool_parameter_accuracy_evaluator.py +112 -0
- strands_evals/evaluators/tool_selection_accuracy_evaluator.py +112 -0
- strands_evals/evaluators/trajectory_evaluator.py +100 -0
- strands_evals/experiment.py +652 -0
- strands_evals/extractors/__init__.py +3 -0
- strands_evals/extractors/graph_extractor.py +30 -0
- strands_evals/extractors/swarm_extractor.py +73 -0
- strands_evals/extractors/tools_use_extractor.py +164 -0
- strands_evals/extractors/trace_extractor.py +166 -0
- strands_evals/generators/__init__.py +3 -0
- strands_evals/generators/experiment_generator.py +498 -0
- strands_evals/generators/prompt_template/prompt_templates.py +75 -0
- strands_evals/generators/topic_planner.py +60 -0
- strands_evals/mappers/__init__.py +6 -0
- strands_evals/mappers/session_mapper.py +27 -0
- strands_evals/mappers/strands_in_memory_session_mapper.py +473 -0
- strands_evals/simulation/README.md +323 -0
- strands_evals/simulation/__init__.py +6 -0
- strands_evals/simulation/actor_simulator.py +292 -0
- strands_evals/simulation/profiles/__init__.py +5 -0
- strands_evals/simulation/profiles/actor_profile.py +26 -0
- strands_evals/simulation/prompt_templates/__init__.py +11 -0
- strands_evals/simulation/prompt_templates/actor_profile_extraction.py +25 -0
- strands_evals/simulation/prompt_templates/actor_system_prompt.py +64 -0
- strands_evals/simulation/prompt_templates/goal_completion.py +27 -0
- strands_evals/simulation/tools/__init__.py +5 -0
- strands_evals/simulation/tools/goal_completion.py +93 -0
- strands_evals/telemetry/__init__.py +15 -0
- strands_evals/telemetry/_cloudwatch_logger.py +209 -0
- strands_evals/telemetry/config.py +207 -0
- strands_evals/telemetry/tracer.py +38 -0
- strands_evals/tools/evaluation_tools.py +67 -0
- strands_evals/types/__init__.py +11 -0
- strands_evals/types/evaluation.py +105 -0
- strands_evals/types/evaluation_report.py +244 -0
- strands_evals/types/simulation/__init__.py +5 -0
- strands_evals/types/simulation/actor.py +34 -0
- strands_evals/types/trace.py +205 -0
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
# Actor Simulator
|
|
2
|
+
|
|
3
|
+
A framework for simulating realistic multi-turn conversations with AI-powered actors for agent evaluation.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
ActorSimulator creates realistic actor personas that interact with agents in multi-turn conversations. It automatically generates actor profiles from test cases, maintains conversation context, and produces contextually appropriate responses aligned with the actor's goals and traits.
|
|
8
|
+
|
|
9
|
+
## Quick Start
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from strands import Agent
|
|
13
|
+
from strands_evals import ActorSimulator, Case
|
|
14
|
+
|
|
15
|
+
# Create agent under test
|
|
16
|
+
agent = Agent(system_prompt="You are a helpful travel assistant.", callback_handler=None)
|
|
17
|
+
|
|
18
|
+
# Create test case
|
|
19
|
+
case = Case(
|
|
20
|
+
input="I want to plan a trip to Tokyo with hotel and activities",
|
|
21
|
+
metadata={"task_description": "Complete travel package arranged"}
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Create user simulator with max_turns
|
|
25
|
+
user_sim = ActorSimulator.from_case_for_user_simulator(case=case, max_turns=5)
|
|
26
|
+
|
|
27
|
+
# Run conversation
|
|
28
|
+
user_message = case.input
|
|
29
|
+
while user_sim.has_next():
|
|
30
|
+
agent_response = agent(user_message)
|
|
31
|
+
user_result = user_sim.act(str(agent_response))
|
|
32
|
+
user_message = str(user_result.structured_output.message)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## How It Works
|
|
36
|
+
|
|
37
|
+
1. **Profile Generation**: Creates a realistic actor profile with traits, context, and goals from the test case
|
|
38
|
+
2. **Conversation Initialization**: Sets up conversation with a greeting and the actor's initial query
|
|
39
|
+
3. **Contextual Responses**: Generates responses that maintain consistency with the actor's profile and goals
|
|
40
|
+
4. **Goal Tracking**: Built-in tool allows actors to assess progress toward their goals
|
|
41
|
+
|
|
42
|
+
## API Reference
|
|
43
|
+
|
|
44
|
+
### ActorSimulator
|
|
45
|
+
|
|
46
|
+
Main class for simulating actor behavior in conversations.
|
|
47
|
+
|
|
48
|
+
#### Factory Method (Recommended)
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
ActorSimulator.from_case_for_user_simulator(
|
|
52
|
+
case: Case,
|
|
53
|
+
system_prompt_template: str | None = None,
|
|
54
|
+
tools: list | None = None,
|
|
55
|
+
model: str | None = None,
|
|
56
|
+
max_turns: int = 10
|
|
57
|
+
) -> ActorSimulator
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Creates an ActorSimulator configured as a user simulator from a test case. Automatically generates a realistic actor profile from `case.input` and optionally `case.metadata["task_description"]`.
|
|
61
|
+
|
|
62
|
+
**Parameters:**
|
|
63
|
+
- `case`: Test case with input (initial query) and optional task_description in metadata
|
|
64
|
+
- `system_prompt_template`: Custom system prompt template (uses default if None)
|
|
65
|
+
- `tools`: Additional tools for the actor (defaults to goal completion tool only)
|
|
66
|
+
- `model`: Model identifier (uses Strands default if None)
|
|
67
|
+
- `max_turns`: Maximum number of conversation turns (default: 10)
|
|
68
|
+
|
|
69
|
+
**Example:**
|
|
70
|
+
```python
|
|
71
|
+
case = Case(
|
|
72
|
+
input="I need help booking a flight to Paris",
|
|
73
|
+
metadata={"task_description": "Book round-trip flight under $800"}
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
user_sim = ActorSimulator.from_case_for_user_simulator(
|
|
77
|
+
case=case,
|
|
78
|
+
max_turns=5
|
|
79
|
+
)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
#### Direct Initialization
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
ActorSimulator(
|
|
86
|
+
actor_profile: ActorProfile,
|
|
87
|
+
initial_query: str,
|
|
88
|
+
system_prompt_template: str,
|
|
89
|
+
tools: list | None = None,
|
|
90
|
+
model: str | None = None,
|
|
91
|
+
max_turns: int = 10
|
|
92
|
+
)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Initialize with an existing actor profile. Use this when you have a pre-defined profile instead of generating one from a test case.
|
|
96
|
+
|
|
97
|
+
**Parameters:**
|
|
98
|
+
- `actor_profile`: ActorProfile object with traits, context, and actor_goal
|
|
99
|
+
- `initial_query`: The actor's first query or message
|
|
100
|
+
- `system_prompt_template`: Template string for actor behavior (formatted with profile)
|
|
101
|
+
- `tools`: Additional tools for the actor
|
|
102
|
+
- `model`: Model identifier
|
|
103
|
+
- `max_turns`: Maximum number of conversation turns (default: 10)
|
|
104
|
+
|
|
105
|
+
#### Methods
|
|
106
|
+
|
|
107
|
+
**`act(agent_message: str) -> AgentResult`**
|
|
108
|
+
|
|
109
|
+
Generate the actor's next message in response to the agent's message.
|
|
110
|
+
|
|
111
|
+
**Parameters:**
|
|
112
|
+
- `agent_message`: The agent's response to react to
|
|
113
|
+
|
|
114
|
+
**Returns:**
|
|
115
|
+
- `AgentResult` containing the actor's structured response with reasoning and message
|
|
116
|
+
|
|
117
|
+
**Example:**
|
|
118
|
+
```python
|
|
119
|
+
agent_response = agent("I can help you book that flight")
|
|
120
|
+
user_result = user_sim.act(str(agent_response))
|
|
121
|
+
user_message = str(user_result.structured_output.message)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**`has_next() -> bool`**
|
|
125
|
+
|
|
126
|
+
Check if the conversation should continue. Returns False if the stop token (`<stop/>`) is present in the last message or if the maximum number of turns has been reached.
|
|
127
|
+
|
|
128
|
+
**Returns:**
|
|
129
|
+
- `True` if the conversation should continue, `False` otherwise
|
|
130
|
+
|
|
131
|
+
**Example:**
|
|
132
|
+
```python
|
|
133
|
+
while user_sim.has_next():
|
|
134
|
+
agent_response = agent(user_message)
|
|
135
|
+
user_result = user_sim.act(str(agent_response))
|
|
136
|
+
user_message = str(user_result.structured_output.message)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Data Models
|
|
140
|
+
|
|
141
|
+
**ActorProfile:**
|
|
142
|
+
```python
|
|
143
|
+
class ActorProfile(BaseModel):
|
|
144
|
+
traits: dict[str, Any] # Actor characteristics and personality
|
|
145
|
+
context: str # Background information and situation
|
|
146
|
+
actor_goal: str # What the actor wants to achieve
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**ActorResponse:**
|
|
150
|
+
```python
|
|
151
|
+
class ActorResponse(BaseModel):
|
|
152
|
+
reasoning: str # Actor's internal reasoning process
|
|
153
|
+
message: str # The actual message to send
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Usage Examples
|
|
157
|
+
|
|
158
|
+
### Complete Multi-Turn Conversation Example
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
from strands import Agent
|
|
162
|
+
from strands_evals import ActorSimulator, Case
|
|
163
|
+
|
|
164
|
+
# Create agent under test
|
|
165
|
+
agent = Agent(system_prompt="You are a helpful travel assistant.", callback_handler=None)
|
|
166
|
+
|
|
167
|
+
# Create test case
|
|
168
|
+
case = Case(
|
|
169
|
+
input="I want to plan a trip to Tokyo with hotel and activities",
|
|
170
|
+
metadata={"task_description": "Complete travel package arranged"}
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Create user simulator
|
|
174
|
+
user_sim = ActorSimulator.from_case_for_user_simulator(case=case, max_turns=5)
|
|
175
|
+
|
|
176
|
+
# Run conversation
|
|
177
|
+
conversation = []
|
|
178
|
+
user_message = case.input
|
|
179
|
+
|
|
180
|
+
while user_sim.has_next():
|
|
181
|
+
# Agent responds
|
|
182
|
+
agent_response = agent(user_message)
|
|
183
|
+
agent_message = str(agent_response)
|
|
184
|
+
conversation.append({"role": "assistant", "content": agent_message})
|
|
185
|
+
|
|
186
|
+
# User responds
|
|
187
|
+
user_result = user_sim.act(agent_message)
|
|
188
|
+
user_message = str(user_result.structured_output.message)
|
|
189
|
+
conversation.append({"role": "user", "content": user_message})
|
|
190
|
+
|
|
191
|
+
print(f"Conversation completed in {len(conversation) // 2} turns")
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### Custom Actor Profile
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from strands_evals.types.simulation import ActorProfile
|
|
198
|
+
from strands_evals.simulation.prompt_templates.actor_system_prompt import (
|
|
199
|
+
DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Create custom actor profile
|
|
203
|
+
actor_profile = ActorProfile(
|
|
204
|
+
traits={
|
|
205
|
+
"personality": "analytical and detail-oriented",
|
|
206
|
+
"communication_style": "direct and concise",
|
|
207
|
+
"technical_level": "expert"
|
|
208
|
+
},
|
|
209
|
+
context="Experienced business traveler with elite status",
|
|
210
|
+
actor_goal="Book business class flight with specific seat preferences"
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Initialize with custom profile
|
|
214
|
+
user_sim = ActorSimulator(
|
|
215
|
+
actor_profile=actor_profile,
|
|
216
|
+
initial_query="I need to book a business class flight to London",
|
|
217
|
+
system_prompt_template=DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE,
|
|
218
|
+
max_turns=15
|
|
219
|
+
)
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## Tools
|
|
223
|
+
|
|
224
|
+
### Built-in Goal Completion Tool
|
|
225
|
+
|
|
226
|
+
ActorSimulator automatically includes a goal completion assessment tool that actors can use to evaluate their progress:
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
from strands_evals.simulation.tools.goal_completion import (
|
|
230
|
+
get_conversation_goal_completion
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# The actor can call this tool during conversation to assess progress
|
|
234
|
+
assessment = get_conversation_goal_completion(
|
|
235
|
+
initial_goal="Book a flight to Tokyo",
|
|
236
|
+
conversation=[
|
|
237
|
+
{"role": "user", "content": "I need a flight to Tokyo"},
|
|
238
|
+
{"role": "assistant", "content": "I can help with that..."}
|
|
239
|
+
]
|
|
240
|
+
)
|
|
241
|
+
# Returns assessment with score and reasoning
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### Adding Custom Tools
|
|
245
|
+
|
|
246
|
+
Extend actor capabilities with custom tools:
|
|
247
|
+
|
|
248
|
+
```python
|
|
249
|
+
from strands import tool
|
|
250
|
+
|
|
251
|
+
@tool
|
|
252
|
+
def check_booking_status(booking_id: str) -> str:
|
|
253
|
+
"""Check the status of a booking."""
|
|
254
|
+
return f"Booking {booking_id} is confirmed"
|
|
255
|
+
|
|
256
|
+
# Add custom tools to the simulator
|
|
257
|
+
user_sim = ActorSimulator.from_case_for_user_simulator(
|
|
258
|
+
case=case,
|
|
259
|
+
tools=[check_booking_status]
|
|
260
|
+
)
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## Advanced Configuration
|
|
264
|
+
|
|
265
|
+
### Custom System Prompt Templates
|
|
266
|
+
|
|
267
|
+
Customize actor behavior with a custom system prompt template. The template receives the actor profile as a format parameter:
|
|
268
|
+
|
|
269
|
+
```python
|
|
270
|
+
custom_prompt_template = """
|
|
271
|
+
You are simulating a user with the following profile:
|
|
272
|
+
{actor_profile}
|
|
273
|
+
|
|
274
|
+
Behavior guidelines:
|
|
275
|
+
- Be persistent but professional
|
|
276
|
+
- Express concerns clearly
|
|
277
|
+
- Stay focused on your goal
|
|
278
|
+
|
|
279
|
+
Respond naturally based on your profile and the conversation context.
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
user_sim = ActorSimulator.from_case_for_user_simulator(
|
|
283
|
+
case=case,
|
|
284
|
+
system_prompt_template=custom_prompt_template
|
|
285
|
+
)
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
### Conversation Initialization
|
|
289
|
+
|
|
290
|
+
ActorSimulator automatically initializes conversations with a random greeting from a predefined set:
|
|
291
|
+
|
|
292
|
+
```python
|
|
293
|
+
# Built-in greetings:
|
|
294
|
+
# - "hi! how can I help you today?"
|
|
295
|
+
# - "hello! what can I assist you with?"
|
|
296
|
+
# - "hi there! how may I help you?"
|
|
297
|
+
# - "good day! what can I do for you?"
|
|
298
|
+
# - "hello! what would you like to know?"
|
|
299
|
+
|
|
300
|
+
# The conversation starts with:
|
|
301
|
+
# 1. Random greeting (as user message)
|
|
302
|
+
# 2. Actor's initial query (as assistant message)
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### Model Selection
|
|
306
|
+
|
|
307
|
+
Specify a custom model for the actor simulator:
|
|
308
|
+
|
|
309
|
+
```python
|
|
310
|
+
user_sim = ActorSimulator.from_case_for_user_simulator(
|
|
311
|
+
case=case,
|
|
312
|
+
model="anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
313
|
+
max_turns=10
|
|
314
|
+
)
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
## Best Practices
|
|
318
|
+
|
|
319
|
+
1. **Include Task Description**: Add `task_description` in case metadata for better goal generation
|
|
320
|
+
2. **Set max_turns**: Configure `max_turns` during initialization to prevent infinite conversations
|
|
321
|
+
3. **Use has_next()**: Always use `has_next()` in your conversation loop to respect turn limits and stop tokens
|
|
322
|
+
4. **Track Conversation**: Append messages to a conversation list for evaluation and debugging
|
|
323
|
+
5. **Access Structured Output**: Use `result.structured_output.message` to get the actor's message and `result.structured_output.reasoning` to see internal reasoning
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import random
|
|
3
|
+
|
|
4
|
+
from strands import Agent
|
|
5
|
+
from strands.agent.agent_result import AgentResult
|
|
6
|
+
from strands.types.content import Message
|
|
7
|
+
from typing_extensions import cast
|
|
8
|
+
|
|
9
|
+
from strands_evals.case import Case
|
|
10
|
+
from strands_evals.simulation.profiles.actor_profile import DEFAULT_USER_PROFILE_SCHEMA
|
|
11
|
+
from strands_evals.simulation.prompt_templates.actor_profile_extraction import ACTOR_PROFILE_PROMPT_TEMPLATE
|
|
12
|
+
from strands_evals.simulation.prompt_templates.actor_system_prompt import DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE
|
|
13
|
+
from strands_evals.simulation.tools.goal_completion import get_conversation_goal_completion
|
|
14
|
+
from strands_evals.types.simulation import ActorProfile, ActorResponse
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ActorSimulator:
|
|
20
|
+
"""
|
|
21
|
+
Simulates an actor in multi-turn conversations for agent evaluation.
|
|
22
|
+
|
|
23
|
+
ActorSimulator wraps a Strands Agent configured to behave as a specific actor
|
|
24
|
+
(typically a user) in conversation scenarios. It maintains conversation history,
|
|
25
|
+
generates contextually appropriate responses, and can assess goal completion.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
agent: The underlying Strands Agent configured with actor behavior.
|
|
29
|
+
actor_profile: The actor's profile containing traits, context, and goal.
|
|
30
|
+
initial_query: The actor's first query in the conversation.
|
|
31
|
+
conversation_history: List of conversation messages in Strands format.
|
|
32
|
+
model_id: Model identifier for the underlying agent.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
INITIAL_GREETINGS = [
|
|
36
|
+
"hi! how can I help you today?",
|
|
37
|
+
"hello! what can I assist you with?",
|
|
38
|
+
"hi there! how may I help you?",
|
|
39
|
+
"good day! what can I do for you?",
|
|
40
|
+
"hello! what would you like to know?",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def from_case_for_user_simulator(
|
|
45
|
+
cls,
|
|
46
|
+
case: Case,
|
|
47
|
+
system_prompt_template: str | None = None,
|
|
48
|
+
tools: list | None = None,
|
|
49
|
+
model: str | None = None,
|
|
50
|
+
max_turns: int = 10,
|
|
51
|
+
) -> "ActorSimulator":
|
|
52
|
+
"""
|
|
53
|
+
Create an ActorSimulator configured as a user simulator from a test case.
|
|
54
|
+
|
|
55
|
+
Generates a realistic user profile and goal from case.input and optionally
|
|
56
|
+
case.metadata["task_description"], then configures the simulator with
|
|
57
|
+
user-specific defaults. If you already have a profile, use __init__() directly.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
case: Test case containing input (initial query) and optional metadata with "task_description".
|
|
61
|
+
system_prompt_template: Custom system prompt template. Uses DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE if None.
|
|
62
|
+
tools: Additional tools available to the user. Defaults to goal completion tool only.
|
|
63
|
+
model: Model identifier for the underlying agent. Uses Strands default if None.
|
|
64
|
+
max_turns: Maximum number of conversation turns before stopping (default: 10).
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
ActorSimulator configured for user simulation.
|
|
68
|
+
|
|
69
|
+
Example:
|
|
70
|
+
```python
|
|
71
|
+
from strands_evals import Case, ActorSimulator
|
|
72
|
+
from strands import Agent
|
|
73
|
+
|
|
74
|
+
# Create test case
|
|
75
|
+
case = Case(
|
|
76
|
+
input="I need to book a flight to Paris",
|
|
77
|
+
metadata={"task_description": "Flight booking confirmed"}
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Create user simulator
|
|
81
|
+
user_sim = ActorSimulator.from_case_for_user_simulator(
|
|
82
|
+
case=case,
|
|
83
|
+
max_turns=5
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Create target agent to evaluate
|
|
87
|
+
agent = Agent(system_prompt="You are a travel assistant.")
|
|
88
|
+
|
|
89
|
+
# Run conversation
|
|
90
|
+
user_message = case.input
|
|
91
|
+
while user_sim.has_next():
|
|
92
|
+
agent_response = agent(user_message)
|
|
93
|
+
user_result = user_sim.act(str(agent_response))
|
|
94
|
+
user_message = str(user_result.structured_output.message)
|
|
95
|
+
```
|
|
96
|
+
"""
|
|
97
|
+
actor_profile = cls._generate_profile_from_case(case)
|
|
98
|
+
|
|
99
|
+
if system_prompt_template is None:
|
|
100
|
+
system_prompt_template = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE
|
|
101
|
+
|
|
102
|
+
return cls(
|
|
103
|
+
actor_profile=actor_profile,
|
|
104
|
+
initial_query=case.input,
|
|
105
|
+
system_prompt_template=system_prompt_template,
|
|
106
|
+
tools=tools,
|
|
107
|
+
model=model,
|
|
108
|
+
max_turns=max_turns,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
@staticmethod
|
|
112
|
+
def _generate_profile_from_case(case: Case) -> ActorProfile:
|
|
113
|
+
"""
|
|
114
|
+
Generate user profile from case.
|
|
115
|
+
|
|
116
|
+
Private helper for from_case_for_user_simulator factory method.
|
|
117
|
+
Uses case.input and optionally case.metadata["task_description"] if present.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
case: Test case with input and optional task_description in metadata.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
ActorProfile with generated traits, context, and goal.
|
|
124
|
+
"""
|
|
125
|
+
initial_query = case.input
|
|
126
|
+
task_description = case.metadata.get("task_description", "") if case.metadata else ""
|
|
127
|
+
|
|
128
|
+
profile_prompt = ACTOR_PROFILE_PROMPT_TEMPLATE.format(
|
|
129
|
+
initial_query=initial_query,
|
|
130
|
+
task_description=task_description,
|
|
131
|
+
example=DEFAULT_USER_PROFILE_SCHEMA,
|
|
132
|
+
)
|
|
133
|
+
profile_agent = Agent(callback_handler=None)
|
|
134
|
+
result = profile_agent(profile_prompt, structured_output_model=ActorProfile)
|
|
135
|
+
return result.structured_output
|
|
136
|
+
|
|
137
|
+
def __init__(
|
|
138
|
+
self,
|
|
139
|
+
actor_profile: ActorProfile,
|
|
140
|
+
initial_query: str,
|
|
141
|
+
system_prompt_template: str,
|
|
142
|
+
tools: list | None = None,
|
|
143
|
+
model: str | None = None,
|
|
144
|
+
max_turns: int = 10,
|
|
145
|
+
):
|
|
146
|
+
"""
|
|
147
|
+
Initialize an ActorSimulator with profile and goal.
|
|
148
|
+
|
|
149
|
+
Use this constructor when you have a pre-defined ActorProfile. For automatic
|
|
150
|
+
profile generation from test cases, use from_case_for_user_simulator() instead.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
actor_profile: ActorProfile object containing traits, context, and actor_goal.
|
|
154
|
+
initial_query: The actor's first query or message.
|
|
155
|
+
system_prompt_template: Template string for system prompt. Must include {actor_profile} placeholder.
|
|
156
|
+
tools: Additional tools available to the actor. Defaults to goal completion tool only.
|
|
157
|
+
model: Model identifier for the underlying agent. Uses Strands default if None.
|
|
158
|
+
max_turns: Maximum number of conversation turns before stopping (default: 10).
|
|
159
|
+
|
|
160
|
+
Example:
|
|
161
|
+
```python
|
|
162
|
+
from strands_evals.simulation import ActorSimulator
|
|
163
|
+
from strands_evals.types.simulation import ActorProfile
|
|
164
|
+
|
|
165
|
+
# Define custom actor profile
|
|
166
|
+
profile = ActorProfile(
|
|
167
|
+
traits={
|
|
168
|
+
"expertise_level": "expert",
|
|
169
|
+
"communication_style": "technical"
|
|
170
|
+
},
|
|
171
|
+
context="A software engineer debugging a production issue.",
|
|
172
|
+
actor_goal="Identify and resolve the memory leak."
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Create simulator with custom profile
|
|
176
|
+
simulator = ActorSimulator(
|
|
177
|
+
actor_profile=profile,
|
|
178
|
+
initial_query="Our service is experiencing high memory usage.",
|
|
179
|
+
system_prompt_template="You are simulating: {actor_profile}",
|
|
180
|
+
max_turns=15
|
|
181
|
+
)
|
|
182
|
+
```
|
|
183
|
+
"""
|
|
184
|
+
self.actor_profile = actor_profile
|
|
185
|
+
self.initial_query = initial_query
|
|
186
|
+
self.conversation_history: list[Message] = []
|
|
187
|
+
self.model_id = model
|
|
188
|
+
self._turn_count = 0
|
|
189
|
+
self._last_message = ""
|
|
190
|
+
self._max_turns = max_turns
|
|
191
|
+
|
|
192
|
+
system_prompt = system_prompt_template.format(actor_profile=actor_profile.model_dump())
|
|
193
|
+
|
|
194
|
+
# Combine tools
|
|
195
|
+
all_tools = [get_conversation_goal_completion]
|
|
196
|
+
if tools:
|
|
197
|
+
all_tools.extend(tools)
|
|
198
|
+
|
|
199
|
+
self._initialize_conversation()
|
|
200
|
+
|
|
201
|
+
# Create agent
|
|
202
|
+
self.agent = Agent(
|
|
203
|
+
system_prompt=system_prompt,
|
|
204
|
+
messages=self.conversation_history,
|
|
205
|
+
tools=all_tools,
|
|
206
|
+
model=self.model_id,
|
|
207
|
+
callback_handler=None,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
def _initialize_conversation(self):
|
|
211
|
+
"""
|
|
212
|
+
Initialize the conversation history with a greeting and initial query.
|
|
213
|
+
|
|
214
|
+
Sets up the conversation with a random greeting from the assistant followed
|
|
215
|
+
by the actor's initial query. This establishes the conversation context.
|
|
216
|
+
|
|
217
|
+
Note: This is a private method called during initialization.
|
|
218
|
+
"""
|
|
219
|
+
selected_greeting = random.choice(self.INITIAL_GREETINGS)
|
|
220
|
+
greeting_message = {"role": "user", "content": [{"text": selected_greeting}]}
|
|
221
|
+
self.conversation_history.append(greeting_message)
|
|
222
|
+
|
|
223
|
+
initial_query_message = {"role": "assistant", "content": [{"text": self.initial_query.strip()}]}
|
|
224
|
+
self.conversation_history.append(initial_query_message)
|
|
225
|
+
|
|
226
|
+
def act(self, agent_message: str) -> AgentResult:
|
|
227
|
+
"""
|
|
228
|
+
Generate the next actor message in the conversation.
|
|
229
|
+
|
|
230
|
+
Processes the agent's message and generates a contextually appropriate
|
|
231
|
+
response from the actor's perspective, maintaining consistency with the actor's
|
|
232
|
+
profile and goal. The response includes reasoning about the actor's thought
|
|
233
|
+
process and the actual message to send.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
agent_message: The agent's response to react to (required).
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
AgentResult containing the actor's structured response with:
|
|
240
|
+
- structured_output.reasoning: Actor's internal reasoning
|
|
241
|
+
- structured_output.message: Actor's response message
|
|
242
|
+
|
|
243
|
+
Example:
|
|
244
|
+
```python
|
|
245
|
+
# Agent responds to user
|
|
246
|
+
agent_response = agent("I need help booking a flight")
|
|
247
|
+
|
|
248
|
+
# User simulator generates next message
|
|
249
|
+
user_result = user_sim.act(str(agent_response))
|
|
250
|
+
|
|
251
|
+
# Access the response
|
|
252
|
+
print(user_result.structured_output.reasoning) # Why the actor responded this way
|
|
253
|
+
print(user_result.structured_output.message) # The actual message
|
|
254
|
+
|
|
255
|
+
# Continue conversation
|
|
256
|
+
next_message = str(user_result.structured_output.message)
|
|
257
|
+
```
|
|
258
|
+
"""
|
|
259
|
+
response = self.agent(agent_message.strip(), structured_output_model=ActorResponse)
|
|
260
|
+
self._turn_count += 1
|
|
261
|
+
self._last_message = str(cast(ActorResponse, response.structured_output).message)
|
|
262
|
+
return response
|
|
263
|
+
|
|
264
|
+
def has_next(self) -> bool:
|
|
265
|
+
"""
|
|
266
|
+
Check if the conversation should continue.
|
|
267
|
+
|
|
268
|
+
Returns False if the stop token (<stop/>) is present in the last message or if
|
|
269
|
+
the maximum number of turns has been reached. Use this in a loop to control
|
|
270
|
+
multi-turn conversations.
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
True if the conversation should continue, False otherwise.
|
|
274
|
+
|
|
275
|
+
Example:
|
|
276
|
+
```python
|
|
277
|
+
user_message = case.input
|
|
278
|
+
|
|
279
|
+
# Continue conversation until completion
|
|
280
|
+
while user_sim.has_next():
|
|
281
|
+
agent_response = agent(user_message)
|
|
282
|
+
user_result = user_sim.act(str(agent_response))
|
|
283
|
+
user_message = str(user_result.structured_output.message)
|
|
284
|
+
|
|
285
|
+
# Conversation ended either by:
|
|
286
|
+
# - Actor including <stop/> token in message
|
|
287
|
+
# - Reaching max_turns limit
|
|
288
|
+
```
|
|
289
|
+
"""
|
|
290
|
+
if self._turn_count >= self._max_turns:
|
|
291
|
+
return False
|
|
292
|
+
return "<stop/>" not in self._last_message
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Actor profile templates for simulation.
|
|
3
|
+
|
|
4
|
+
This module provides actor profile structures used as templates
|
|
5
|
+
for generating realistic actor profiles in conversation simulation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
DEFAULT_USER_PROFILE_SCHEMA = {
|
|
9
|
+
"traits": {
|
|
10
|
+
"personal_profile": {
|
|
11
|
+
"identity": {
|
|
12
|
+
"first_name": "User",
|
|
13
|
+
"last_name": "Default",
|
|
14
|
+
"preferred_name": "User",
|
|
15
|
+
"gender": "other",
|
|
16
|
+
"birthdate": "1990-01-01",
|
|
17
|
+
"email": "user@example.com",
|
|
18
|
+
},
|
|
19
|
+
"location": {"address1": "123 Main St", "city": "Default City", "province": "CA", "country": "USA"},
|
|
20
|
+
"languages": [{"language": "English", "proficiency": "Advanced"}],
|
|
21
|
+
},
|
|
22
|
+
"persona": "Friendly and helpful user seeking assistance with general topics.",
|
|
23
|
+
"supplementary_profile": "Default user profile for simulation.",
|
|
24
|
+
},
|
|
25
|
+
"context": "some context",
|
|
26
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Prompt templates for actor simulation."""
|
|
2
|
+
|
|
3
|
+
from .actor_profile_extraction import ACTOR_PROFILE_PROMPT_TEMPLATE
|
|
4
|
+
from .actor_system_prompt import DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE
|
|
5
|
+
from .goal_completion import GOAL_COMPLETION_PROMPT
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"ACTOR_PROFILE_PROMPT_TEMPLATE",
|
|
9
|
+
"DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE",
|
|
10
|
+
"GOAL_COMPLETION_PROMPT",
|
|
11
|
+
]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Prompt template for actor profile generation.
|
|
3
|
+
|
|
4
|
+
This module contains the prompt template used to generate realistic actor profiles
|
|
5
|
+
from scenario information for conversation simulation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from textwrap import dedent
|
|
9
|
+
|
|
10
|
+
ACTOR_PROFILE_PROMPT_TEMPLATE = dedent("""Generate exactly 1 realistic actor profile for the following task:
|
|
11
|
+
|
|
12
|
+
Actor's Initial Query: {initial_query}
|
|
13
|
+
Tasks Description: {task_description}
|
|
14
|
+
|
|
15
|
+
Generate a complete actor profile with the following structure with:
|
|
16
|
+
1. Traits: Key traits (as key-value pairs)
|
|
17
|
+
2. Context: Background context (as a paragraph in 2-3 sentences)
|
|
18
|
+
3. Actor Goal: What the actor ultimately wants to achieve in this interaction - should be
|
|
19
|
+
specific, actionable, and written from the actor's perspective
|
|
20
|
+
|
|
21
|
+
IMPORTANT: Return JSON in the following format! IT MUST HAVE THE EXACT STRUCTURE YOU SEE HERE WITH EXACTLY THESE KEYS.
|
|
22
|
+
|
|
23
|
+
{example}
|
|
24
|
+
|
|
25
|
+
Be specific and realistic.""")
|