synkro 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. synkro/__init__.py +179 -0
  2. synkro/advanced.py +186 -0
  3. synkro/cli.py +128 -0
  4. synkro/core/__init__.py +7 -0
  5. synkro/core/checkpoint.py +250 -0
  6. synkro/core/dataset.py +402 -0
  7. synkro/core/policy.py +337 -0
  8. synkro/errors.py +178 -0
  9. synkro/examples/__init__.py +148 -0
  10. synkro/factory.py +276 -0
  11. synkro/formatters/__init__.py +12 -0
  12. synkro/formatters/qa.py +98 -0
  13. synkro/formatters/sft.py +90 -0
  14. synkro/formatters/tool_call.py +127 -0
  15. synkro/generation/__init__.py +9 -0
  16. synkro/generation/follow_ups.py +134 -0
  17. synkro/generation/generator.py +220 -0
  18. synkro/generation/golden_responses.py +244 -0
  19. synkro/generation/golden_scenarios.py +276 -0
  20. synkro/generation/golden_tool_responses.py +416 -0
  21. synkro/generation/logic_extractor.py +126 -0
  22. synkro/generation/multiturn_responses.py +177 -0
  23. synkro/generation/planner.py +131 -0
  24. synkro/generation/responses.py +189 -0
  25. synkro/generation/scenarios.py +90 -0
  26. synkro/generation/tool_responses.py +376 -0
  27. synkro/generation/tool_simulator.py +114 -0
  28. synkro/interactive/__init__.py +12 -0
  29. synkro/interactive/hitl_session.py +77 -0
  30. synkro/interactive/logic_map_editor.py +173 -0
  31. synkro/interactive/rich_ui.py +205 -0
  32. synkro/llm/__init__.py +7 -0
  33. synkro/llm/client.py +235 -0
  34. synkro/llm/rate_limits.py +95 -0
  35. synkro/models/__init__.py +43 -0
  36. synkro/models/anthropic.py +26 -0
  37. synkro/models/google.py +19 -0
  38. synkro/models/openai.py +31 -0
  39. synkro/modes/__init__.py +15 -0
  40. synkro/modes/config.py +66 -0
  41. synkro/modes/qa.py +18 -0
  42. synkro/modes/sft.py +18 -0
  43. synkro/modes/tool_call.py +18 -0
  44. synkro/parsers.py +442 -0
  45. synkro/pipeline/__init__.py +20 -0
  46. synkro/pipeline/phases.py +592 -0
  47. synkro/pipeline/runner.py +424 -0
  48. synkro/pipelines.py +123 -0
  49. synkro/prompts/__init__.py +57 -0
  50. synkro/prompts/base.py +167 -0
  51. synkro/prompts/golden_templates.py +474 -0
  52. synkro/prompts/interactive_templates.py +65 -0
  53. synkro/prompts/multiturn_templates.py +156 -0
  54. synkro/prompts/qa_templates.py +97 -0
  55. synkro/prompts/templates.py +281 -0
  56. synkro/prompts/tool_templates.py +201 -0
  57. synkro/quality/__init__.py +14 -0
  58. synkro/quality/golden_refiner.py +163 -0
  59. synkro/quality/grader.py +153 -0
  60. synkro/quality/multiturn_grader.py +150 -0
  61. synkro/quality/refiner.py +137 -0
  62. synkro/quality/tool_grader.py +126 -0
  63. synkro/quality/tool_refiner.py +128 -0
  64. synkro/quality/verifier.py +228 -0
  65. synkro/reporting.py +537 -0
  66. synkro/schemas.py +472 -0
  67. synkro/types/__init__.py +41 -0
  68. synkro/types/core.py +126 -0
  69. synkro/types/dataset_type.py +30 -0
  70. synkro/types/logic_map.py +345 -0
  71. synkro/types/tool.py +94 -0
  72. synkro-0.4.12.data/data/examples/__init__.py +148 -0
  73. synkro-0.4.12.dist-info/METADATA +258 -0
  74. synkro-0.4.12.dist-info/RECORD +77 -0
  75. synkro-0.4.12.dist-info/WHEEL +4 -0
  76. synkro-0.4.12.dist-info/entry_points.txt +2 -0
  77. synkro-0.4.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,276 @@
1
+ """Golden Scenario Generator - The Adversary.
2
+
3
+ Generates typed scenarios (positive, negative, edge_case, irrelevant)
4
+ with explicit rule targeting. This is Stage 2 of the Golden Trace pipeline.
5
+ """
6
+
7
+ import asyncio
8
+ from typing import Literal
9
+
10
+ from synkro.llm.client import LLM
11
+ from synkro.models import Model, OpenAI
12
+ from synkro.schemas import GoldenScenariosArray
13
+ from synkro.types.core import Category
14
+ from synkro.types.logic_map import LogicMap, GoldenScenario, ScenarioType
15
+ from synkro.prompts.golden_templates import (
16
+ GOLDEN_SCENARIO_PROMPT,
17
+ POSITIVE_SCENARIO_INSTRUCTIONS,
18
+ NEGATIVE_SCENARIO_INSTRUCTIONS,
19
+ EDGE_CASE_SCENARIO_INSTRUCTIONS,
20
+ IRRELEVANT_SCENARIO_INSTRUCTIONS,
21
+ )
22
+
23
+
24
+ # Default scenario type distribution
25
+ DEFAULT_DISTRIBUTION = {
26
+ ScenarioType.POSITIVE: 0.35, # 35% happy path
27
+ ScenarioType.NEGATIVE: 0.30, # 30% violations
28
+ ScenarioType.EDGE_CASE: 0.25, # 25% edge cases
29
+ ScenarioType.IRRELEVANT: 0.10, # 10% out of scope
30
+ }
31
+
32
+
33
+ TYPE_INSTRUCTIONS = {
34
+ ScenarioType.POSITIVE: POSITIVE_SCENARIO_INSTRUCTIONS,
35
+ ScenarioType.NEGATIVE: NEGATIVE_SCENARIO_INSTRUCTIONS,
36
+ ScenarioType.EDGE_CASE: EDGE_CASE_SCENARIO_INSTRUCTIONS,
37
+ ScenarioType.IRRELEVANT: IRRELEVANT_SCENARIO_INSTRUCTIONS,
38
+ }
39
+
40
+
41
+ class GoldenScenarioGenerator:
42
+ """
43
+ The Adversary - Generates typed scenarios with rule targeting.
44
+
45
+ Produces scenarios across four types:
46
+ - POSITIVE (35%): Happy path, all criteria met
47
+ - NEGATIVE (30%): Violation, exactly one criterion fails
48
+ - EDGE_CASE (25%): Boundary conditions, exact limits
49
+ - IRRELEVANT (10%): Outside policy scope
50
+
51
+ Each scenario includes:
52
+ - Target rule IDs it's designed to test
53
+ - Expected outcome based on the rules
54
+ - Scenario type for classification
55
+
56
+ Examples:
57
+ >>> generator = GoldenScenarioGenerator(llm=LLM(model=OpenAI.GPT_4O_MINI))
58
+ >>> scenarios = await generator.generate(
59
+ ... policy_text="...",
60
+ ... logic_map=logic_map,
61
+ ... category=category,
62
+ ... count=10,
63
+ ... )
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ llm: LLM | None = None,
69
+ model: Model = OpenAI.GPT_4O_MINI,
70
+ distribution: dict[ScenarioType, float] | None = None,
71
+ ):
72
+ """
73
+ Initialize the Golden Scenario Generator.
74
+
75
+ Args:
76
+ llm: LLM client to use (creates one if not provided)
77
+ model: Model to use if creating LLM
78
+ distribution: Custom scenario type distribution (defaults to 35/30/25/10)
79
+ """
80
+ self.llm = llm or LLM(model=model, temperature=0.8)
81
+ self.distribution = distribution or DEFAULT_DISTRIBUTION
82
+
83
+ async def generate(
84
+ self,
85
+ policy_text: str,
86
+ logic_map: LogicMap,
87
+ category: Category,
88
+ count: int,
89
+ ) -> list[GoldenScenario]:
90
+ """
91
+ Generate scenarios for a category with balanced type distribution.
92
+
93
+ Args:
94
+ policy_text: The policy document text
95
+ logic_map: The extracted Logic Map (DAG of rules)
96
+ category: The category to generate scenarios for
97
+ count: Total number of scenarios to generate
98
+
99
+ Returns:
100
+ List of GoldenScenarios with type distribution
101
+ """
102
+ # Calculate counts per type based on distribution
103
+ type_counts = self._calculate_type_distribution(count)
104
+
105
+ # Generate scenarios for each type in parallel
106
+ tasks = []
107
+ for scenario_type, type_count in type_counts.items():
108
+ if type_count > 0:
109
+ task = self._generate_type(
110
+ policy_text=policy_text,
111
+ logic_map=logic_map,
112
+ category=category,
113
+ scenario_type=scenario_type,
114
+ count=type_count,
115
+ )
116
+ tasks.append(task)
117
+
118
+ # Gather all results
119
+ results = await asyncio.gather(*tasks)
120
+
121
+ # Flatten and return
122
+ scenarios = []
123
+ for batch in results:
124
+ scenarios.extend(batch)
125
+
126
+ return scenarios
127
+
128
+ def _calculate_type_distribution(self, total: int) -> dict[ScenarioType, int]:
129
+ """Calculate how many scenarios of each type to generate."""
130
+ counts = {}
131
+ remaining = total
132
+
133
+ # For small counts, prioritize non-IRRELEVANT types
134
+ # IRRELEVANT should only appear when we have enough scenarios
135
+ priority_order = [
136
+ ScenarioType.POSITIVE,
137
+ ScenarioType.NEGATIVE,
138
+ ScenarioType.EDGE_CASE,
139
+ ScenarioType.IRRELEVANT, # Last priority
140
+ ]
141
+
142
+ if total <= 3:
143
+ # For very small counts, assign one to each priority type until exhausted
144
+ for stype in priority_order:
145
+ if remaining > 0:
146
+ counts[stype] = 1
147
+ remaining -= 1
148
+ else:
149
+ counts[stype] = 0
150
+ else:
151
+ # Normal distribution for larger counts
152
+ for i, (stype, ratio) in enumerate(self.distribution.items()):
153
+ if i == len(self.distribution) - 1:
154
+ # Last type gets remaining to ensure total is exact
155
+ counts[stype] = remaining
156
+ else:
157
+ count = round(total * ratio)
158
+ counts[stype] = count
159
+ remaining -= count
160
+
161
+ return counts
162
+
163
+ async def _generate_type(
164
+ self,
165
+ policy_text: str,
166
+ logic_map: LogicMap,
167
+ category: Category,
168
+ scenario_type: ScenarioType,
169
+ count: int,
170
+ ) -> list[GoldenScenario]:
171
+ """Generate scenarios of a specific type."""
172
+ # Get type-specific instructions
173
+ type_instructions = TYPE_INSTRUCTIONS[scenario_type]
174
+
175
+ # Format Logic Map for prompt
176
+ logic_map_str = self._format_logic_map(logic_map)
177
+
178
+ # Build prompt
179
+ prompt = GOLDEN_SCENARIO_PROMPT.format(
180
+ scenario_type=scenario_type.value.upper(),
181
+ policy_text=policy_text,
182
+ logic_map=logic_map_str,
183
+ category=category.name,
184
+ count=count,
185
+ type_specific_instructions=type_instructions,
186
+ )
187
+
188
+ # Generate structured output
189
+ result = await self.llm.generate_structured(prompt, GoldenScenariosArray)
190
+
191
+ # Convert to domain models
192
+ scenarios = []
193
+ for s in result.scenarios:
194
+ scenario = GoldenScenario(
195
+ description=s.description,
196
+ context=s.context,
197
+ category=category.name,
198
+ scenario_type=ScenarioType(s.scenario_type),
199
+ target_rule_ids=s.target_rule_ids,
200
+ expected_outcome=s.expected_outcome,
201
+ )
202
+ scenarios.append(scenario)
203
+
204
+ # Enforce requested count (LLM may return more or fewer)
205
+ return scenarios[:count]
206
+
207
+ def _format_logic_map(self, logic_map: LogicMap) -> str:
208
+ """Format Logic Map for prompt inclusion."""
209
+ lines = []
210
+ lines.append("RULES:")
211
+ for rule in logic_map.rules:
212
+ deps = f" (depends on: {', '.join(rule.dependencies)})" if rule.dependencies else ""
213
+ lines.append(
214
+ f" {rule.rule_id} [{rule.category.value}]: {rule.text}{deps}"
215
+ )
216
+
217
+ lines.append("\nROOT RULES (Entry Points):")
218
+ lines.append(f" {', '.join(logic_map.root_rules)}")
219
+
220
+ return "\n".join(lines)
221
+
222
+ async def generate_for_categories(
223
+ self,
224
+ policy_text: str,
225
+ logic_map: LogicMap,
226
+ categories: list[Category],
227
+ ) -> tuple[list[GoldenScenario], dict[str, int]]:
228
+ """
229
+ Generate scenarios for multiple categories with distribution tracking.
230
+
231
+ Args:
232
+ policy_text: The policy document text
233
+ logic_map: The extracted Logic Map
234
+ categories: List of categories with counts
235
+
236
+ Returns:
237
+ Tuple of (all scenarios, type distribution counts)
238
+ """
239
+ # Generate for each category in parallel
240
+ tasks = [
241
+ self.generate(policy_text, logic_map, cat, cat.count)
242
+ for cat in categories
243
+ ]
244
+ results = await asyncio.gather(*tasks)
245
+
246
+ # Flatten scenarios
247
+ all_scenarios = []
248
+ for batch in results:
249
+ all_scenarios.extend(batch)
250
+
251
+ # Calculate distribution
252
+ distribution = {
253
+ ScenarioType.POSITIVE.value: 0,
254
+ ScenarioType.NEGATIVE.value: 0,
255
+ ScenarioType.EDGE_CASE.value: 0,
256
+ ScenarioType.IRRELEVANT.value: 0,
257
+ }
258
+ for s in all_scenarios:
259
+ distribution[s.scenario_type.value] += 1
260
+
261
+ return all_scenarios, distribution
262
+
263
+ def get_distribution_summary(self, scenarios: list[GoldenScenario]) -> dict[str, int]:
264
+ """Get a summary of scenario type distribution."""
265
+ distribution = {
266
+ "positive": 0,
267
+ "negative": 0,
268
+ "edge_case": 0,
269
+ "irrelevant": 0,
270
+ }
271
+ for s in scenarios:
272
+ distribution[s.scenario_type.value] += 1
273
+ return distribution
274
+
275
+
276
+ __all__ = ["GoldenScenarioGenerator", "DEFAULT_DISTRIBUTION"]
@@ -0,0 +1,416 @@
1
+ """Golden Tool Response Generator - The Thinker for Tool Calls.
2
+
3
+ Generates tool call traces with grounded reasoning and rule citations.
4
+ This is Stage 3 of the Golden Trace pipeline for TOOL_CALL datasets.
5
+ """
6
+
7
+ import json
8
+ import uuid
9
+ import asyncio
10
+ from typing import TYPE_CHECKING
11
+
12
+ from pydantic import BaseModel, Field
13
+
14
+ from synkro.llm.client import LLM
15
+ from synkro.models import Model, OpenAI
16
+ from synkro.types.core import Trace, Message, Scenario
17
+ from synkro.types.tool import ToolDefinition, ToolCall, ToolFunction
18
+ from synkro.types.logic_map import LogicMap, GoldenScenario
19
+ from synkro.prompts.golden_templates import GOLDEN_TOOL_TRACE_PROMPT
20
+
21
+ if TYPE_CHECKING:
22
+ from synkro.generation.tool_simulator import ToolSimulator
23
+
24
+
25
+ # =============================================================================
26
+ # Pydantic models for structured JSON output
27
+ # =============================================================================
28
+
29
+ class GoldenToolCallRequest(BaseModel):
30
+ """A tool call request with rule citation."""
31
+
32
+ name: str = Field(description="Name of the tool to call")
33
+ arguments: str = Field(description="Arguments as JSON string")
34
+ rule_id: str = Field(description="Rule ID that requires this tool call")
35
+ reasoning: str = Field(description="Why this tool is needed for the rule")
36
+
37
+
38
+ class GoldenToolDecision(BaseModel):
39
+ """Structured output for tool calling decision with rule grounding."""
40
+
41
+ needs_tool: bool = Field(description="Whether a tool call is needed")
42
+ reasoning: str = Field(description="Rule-based explanation of decision")
43
+ rule_ids_evaluated: list[str] = Field(
44
+ default_factory=list,
45
+ description="Rule IDs that were evaluated"
46
+ )
47
+ tool_calls: list[GoldenToolCallRequest] = Field(
48
+ default_factory=list,
49
+ description="Tool calls with rule citations"
50
+ )
51
+ direct_response: str | None = Field(
52
+ default=None,
53
+ description="Direct response if no tool needed"
54
+ )
55
+
56
+
57
+ class GoldenToolSynthesis(BaseModel):
58
+ """Structured output for synthesizing tool results."""
59
+
60
+ response: str = Field(description="Natural response incorporating tool results")
61
+ rules_applied: list[str] = Field(
62
+ default_factory=list,
63
+ description="Rule IDs applied in the response"
64
+ )
65
+ rules_excluded: list[str] = Field(
66
+ default_factory=list,
67
+ description="Rule IDs explicitly excluded"
68
+ )
69
+
70
+
71
+ # =============================================================================
72
+ # Golden Tool Call Response Generator
73
+ # =============================================================================
74
+
75
+ class GoldenToolCallResponseGenerator:
76
+ """
77
+ The Thinker for Tool Calls - Generates tool traces with grounded reasoning.
78
+
79
+ Produces tool call traces with:
80
+ - Rule citations for tool selection decisions
81
+ - Explicit reasoning linking rules to tool usage
82
+ - DAG-compliant evaluation order
83
+ - Verification-ready metadata
84
+
85
+ Examples:
86
+ >>> generator = GoldenToolCallResponseGenerator(
87
+ ... tools=[web_search_tool],
88
+ ... llm=LLM(model=OpenAI.GPT_4O_MINI),
89
+ ... simulator=tool_simulator,
90
+ ... )
91
+ >>> trace = await generator.generate_single(
92
+ ... policy_text="...",
93
+ ... logic_map=logic_map,
94
+ ... scenario=scenario,
95
+ ... )
96
+ """
97
+
98
+ def __init__(
99
+ self,
100
+ tools: list[ToolDefinition],
101
+ llm: LLM | None = None,
102
+ simulator: "ToolSimulator | None" = None,
103
+ model: Model = OpenAI.GPT_4O_MINI,
104
+ ):
105
+ """
106
+ Initialize the Golden Tool Call Response Generator.
107
+
108
+ Args:
109
+ tools: List of available tool definitions
110
+ llm: LLM client to use (creates one if not provided)
111
+ simulator: Tool simulator for generating tool responses
112
+ model: Model to use if creating LLM
113
+ """
114
+ self.tools = tools
115
+ self.tools_by_name = {t.name: t for t in tools}
116
+ self.llm = llm or LLM(model=model, temperature=0.7)
117
+ self.simulator = simulator
118
+
119
+ def _get_tools_description(self) -> str:
120
+ """Get formatted description of all tools."""
121
+ descriptions = []
122
+ for tool in self.tools:
123
+ descriptions.append(tool.to_system_prompt())
124
+ return "\n\n".join(descriptions)
125
+
126
+ def _generate_call_id(self) -> str:
127
+ """Generate a unique tool call ID."""
128
+ return f"call_{uuid.uuid4().hex[:12]}"
129
+
130
+ def _format_logic_map(self, logic_map: LogicMap) -> str:
131
+ """Format Logic Map for prompt inclusion."""
132
+ lines = []
133
+ lines.append("RULES:")
134
+ for rule in logic_map.rules:
135
+ deps = f" [depends on: {', '.join(rule.dependencies)}]" if rule.dependencies else ""
136
+ lines.append(
137
+ f" {rule.rule_id} ({rule.category.value}): {rule.text}{deps}"
138
+ )
139
+ lines.append(f" IF: {rule.condition}")
140
+ lines.append(f" THEN: {rule.action}")
141
+ return "\n".join(lines)
142
+
143
+ async def generate_single(
144
+ self,
145
+ policy_text: str,
146
+ logic_map: LogicMap,
147
+ scenario: GoldenScenario,
148
+ target_turns: int = 1,
149
+ ) -> Trace:
150
+ """
151
+ Generate a single tool call trace with grounded reasoning.
152
+
153
+ Args:
154
+ policy_text: The policy document text
155
+ logic_map: The extracted Logic Map (DAG of rules)
156
+ scenario: The golden scenario to respond to
157
+ target_turns: Number of conversation turns (currently single-turn only)
158
+
159
+ Returns:
160
+ Trace with proper tool calling format and rule citations
161
+ """
162
+ # TODO: Implement multi-turn tool calling support
163
+ tools_desc = self._get_tools_description()
164
+ logic_map_str = self._format_logic_map(logic_map)
165
+
166
+ # Step 1: Get LLM decision on tool usage with rule grounding
167
+ decision = await self._get_tool_decision(
168
+ policy_text, logic_map_str, scenario, tools_desc
169
+ )
170
+
171
+ # Step 2: Build the message sequence
172
+ messages = await self._build_message_sequence(
173
+ policy_text, logic_map_str, scenario, tools_desc, decision
174
+ )
175
+
176
+ # Convert GoldenScenario to base Scenario
177
+ base_scenario = scenario.to_base_scenario()
178
+
179
+ return Trace(messages=messages, scenario=base_scenario)
180
+
181
+ async def _get_tool_decision(
182
+ self,
183
+ policy_text: str,
184
+ logic_map_str: str,
185
+ scenario: GoldenScenario,
186
+ tools_desc: str,
187
+ ) -> GoldenToolDecision:
188
+ """Get the LLM's rule-grounded decision on tool usage."""
189
+ prompt = f"""You are a customer support agent deciding whether to use tools.
190
+ Your decisions must be GROUNDED in the Logic Map rules.
191
+
192
+ AVAILABLE TOOLS:
193
+ {tools_desc}
194
+
195
+ LOGIC MAP (Rules to Apply):
196
+ {logic_map_str}
197
+
198
+ POLICY GUIDELINES:
199
+ {policy_text}
200
+
201
+ SCENARIO:
202
+ Type: {scenario.scenario_type.value.upper()}
203
+ Request: {scenario.description}
204
+ Context: {scenario.context}
205
+ Target Rules: {', '.join(scenario.target_rule_ids)}
206
+
207
+ YOUR TASK:
208
+ 1. Evaluate which rules from the Logic Map apply to this scenario
209
+ 2. Determine if any rule requires information that a tool can provide
210
+ 3. If tools are needed, specify which rule requires each tool call
211
+ 4. If no tools needed, explain based on which rules why direct response is sufficient
212
+
213
+ TOOL CALLING RULES:
214
+ - Only call a tool if a SPECIFIC RULE requires information the tool can provide
215
+ - Cite the Rule ID that necessitates each tool call
216
+ - If the scenario is IRRELEVANT type, no tools should be needed
217
+ - If information is already in the context, don't call a tool for it"""
218
+
219
+ return await self.llm.generate_structured(prompt, GoldenToolDecision)
220
+
221
+ async def _build_message_sequence(
222
+ self,
223
+ policy_text: str,
224
+ logic_map_str: str,
225
+ scenario: GoldenScenario,
226
+ tools_desc: str,
227
+ decision: GoldenToolDecision,
228
+ ) -> list[Message]:
229
+ """Build the full message sequence based on the tool decision."""
230
+ messages = []
231
+
232
+ # System message with tool descriptions
233
+ system_content = f"""You are a helpful customer support agent. You have access to the following tools:
234
+
235
+ {tools_desc}
236
+
237
+ Follow the policy guidelines to assist customers effectively."""
238
+
239
+ messages.append(Message(role="system", content=system_content))
240
+
241
+ # User message
242
+ messages.append(Message(role="user", content=scenario.description))
243
+
244
+ if decision.needs_tool and decision.tool_calls:
245
+ # Assistant message with tool_calls
246
+ tool_calls = []
247
+ for tc in decision.tool_calls:
248
+ call_id = self._generate_call_id()
249
+ tool_calls.append(ToolCall(
250
+ id=call_id,
251
+ type="function",
252
+ function=ToolFunction(
253
+ name=tc.name,
254
+ arguments=tc.arguments
255
+ )
256
+ ))
257
+
258
+ messages.append(Message(
259
+ role="assistant",
260
+ content=None,
261
+ tool_calls=tool_calls
262
+ ))
263
+
264
+ # Tool response messages
265
+ tool_results = []
266
+ for tc in tool_calls:
267
+ result = await self._simulate_tool_call(tc)
268
+ tool_results.append(result)
269
+
270
+ messages.append(Message(
271
+ role="tool",
272
+ content=result,
273
+ tool_call_id=tc.id
274
+ ))
275
+
276
+ # Final assistant message synthesizing results
277
+ final_response = await self._synthesize_response(
278
+ scenario, tool_calls, tool_results, decision, policy_text, logic_map_str
279
+ )
280
+ messages.append(Message(role="assistant", content=final_response))
281
+
282
+ else:
283
+ # Direct response without tools
284
+ response = decision.direct_response or await self._generate_direct_response(
285
+ policy_text, logic_map_str, scenario
286
+ )
287
+ messages.append(Message(role="assistant", content=response))
288
+
289
+ return messages
290
+
291
+ async def _simulate_tool_call(self, tool_call: ToolCall) -> str:
292
+ """Simulate a tool response."""
293
+ if self.simulator:
294
+ return await self.simulator.simulate(tool_call)
295
+
296
+ # Fallback: generate a mock response based on tool definition
297
+ tool_name = tool_call.function.name
298
+ if tool_name in self.tools_by_name:
299
+ tool = self.tools_by_name[tool_name]
300
+ if tool.mock_responses:
301
+ import random
302
+ return random.choice(tool.mock_responses)
303
+
304
+ # Default mock response
305
+ args = json.loads(tool_call.function.arguments)
306
+ return json.dumps({
307
+ "status": "success",
308
+ "result": f"Simulated response for {tool_name}",
309
+ "query": args
310
+ })
311
+
312
+ async def _synthesize_response(
313
+ self,
314
+ scenario: GoldenScenario,
315
+ tool_calls: list[ToolCall],
316
+ tool_results: list[str],
317
+ decision: GoldenToolDecision,
318
+ policy_text: str,
319
+ logic_map_str: str,
320
+ ) -> str:
321
+ """Synthesize a natural response from tool results with rule grounding."""
322
+ # Build context of tool calls and results
323
+ tools_context = []
324
+ for tc, result in zip(tool_calls, tool_results):
325
+ tools_context.append(f"Tool: {tc.function.name}")
326
+ tools_context.append(f"Arguments: {tc.function.arguments}")
327
+ tools_context.append(f"Result: {result}")
328
+ tools_context.append("")
329
+
330
+ prompt = f"""Based on the tool results and rules, provide a helpful response.
331
+
332
+ USER REQUEST:
333
+ {scenario.description}
334
+
335
+ SCENARIO TYPE: {scenario.scenario_type.value.upper()}
336
+ TARGET RULES: {', '.join(scenario.target_rule_ids)}
337
+
338
+ TOOL RESULTS:
339
+ {chr(10).join(tools_context)}
340
+
341
+ LOGIC MAP:
342
+ {logic_map_str}
343
+
344
+ RULES EVALUATED: {', '.join(decision.rule_ids_evaluated)}
345
+
346
+ Synthesize the tool results into a natural, helpful response.
347
+ - Apply the relevant rules from the Logic Map
348
+ - Incorporate the information from the tool results
349
+ - Don't expose raw JSON or technical details
350
+ - Be conversational and helpful"""
351
+
352
+ synthesis = await self.llm.generate_structured(prompt, GoldenToolSynthesis)
353
+ return synthesis.response
354
+
355
+ async def _generate_direct_response(
356
+ self,
357
+ policy_text: str,
358
+ logic_map_str: str,
359
+ scenario: GoldenScenario,
360
+ ) -> str:
361
+ """Generate a direct response when no tools are needed."""
362
+ prompt = f"""Provide a helpful response based on the rules.
363
+
364
+ USER REQUEST:
365
+ {scenario.description}
366
+
367
+ CONTEXT:
368
+ {scenario.context}
369
+
370
+ SCENARIO TYPE: {scenario.scenario_type.value.upper()}
371
+ TARGET RULES: {', '.join(scenario.target_rule_ids)}
372
+
373
+ LOGIC MAP:
374
+ {logic_map_str}
375
+
376
+ POLICY GUIDELINES:
377
+ {policy_text}
378
+
379
+ No tools are needed for this request. Provide a direct, helpful response
380
+ applying the relevant rules from the Logic Map."""
381
+
382
+ synthesis = await self.llm.generate_structured(prompt, GoldenToolSynthesis)
383
+ return synthesis.response
384
+
385
+ async def generate(
386
+ self,
387
+ policy_text: str,
388
+ logic_map: LogicMap,
389
+ scenarios: list[GoldenScenario],
390
+ target_turns: int = 1,
391
+ ) -> list[Trace]:
392
+ """
393
+ Generate traces for multiple scenarios.
394
+
395
+ Args:
396
+ policy_text: The policy document text
397
+ logic_map: The extracted Logic Map
398
+ scenarios: List of golden scenarios
399
+ target_turns: Number of conversation turns
400
+
401
+ Returns:
402
+ List of traces with tool calling format
403
+ """
404
+ tasks = [
405
+ self.generate_single(policy_text, logic_map, s, target_turns)
406
+ for s in scenarios
407
+ ]
408
+ return await asyncio.gather(*tasks)
409
+
410
+
411
+ __all__ = [
412
+ "GoldenToolCallResponseGenerator",
413
+ "GoldenToolDecision",
414
+ "GoldenToolCallRequest",
415
+ "GoldenToolSynthesis",
416
+ ]