synkro 0.4.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synkro might be problematic. Click here for more details.
- synkro/__init__.py +331 -0
- synkro/advanced.py +184 -0
- synkro/cli.py +156 -0
- synkro/core/__init__.py +7 -0
- synkro/core/checkpoint.py +250 -0
- synkro/core/dataset.py +432 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +291 -0
- synkro/formatters/__init__.py +18 -0
- synkro/formatters/chatml.py +121 -0
- synkro/formatters/langfuse.py +98 -0
- synkro/formatters/langsmith.py +98 -0
- synkro/formatters/qa.py +112 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/follow_ups.py +134 -0
- synkro/generation/generator.py +314 -0
- synkro/generation/golden_responses.py +269 -0
- synkro/generation/golden_scenarios.py +333 -0
- synkro/generation/golden_tool_responses.py +791 -0
- synkro/generation/logic_extractor.py +126 -0
- synkro/generation/multiturn_responses.py +177 -0
- synkro/generation/planner.py +131 -0
- synkro/generation/responses.py +189 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +625 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/interactive/__init__.py +16 -0
- synkro/interactive/hitl_session.py +205 -0
- synkro/interactive/intent_classifier.py +94 -0
- synkro/interactive/logic_map_editor.py +176 -0
- synkro/interactive/rich_ui.py +459 -0
- synkro/interactive/scenario_editor.py +198 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +309 -0
- synkro/llm/rate_limits.py +99 -0
- synkro/models/__init__.py +50 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/local.py +104 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +13 -0
- synkro/modes/config.py +66 -0
- synkro/modes/conversation.py +35 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +592 -0
- synkro/pipeline/runner.py +769 -0
- synkro/pipelines.py +136 -0
- synkro/prompts/__init__.py +57 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/golden_templates.py +533 -0
- synkro/prompts/interactive_templates.py +198 -0
- synkro/prompts/multiturn_templates.py +156 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +318 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/golden_refiner.py +163 -0
- synkro/quality/grader.py +153 -0
- synkro/quality/multiturn_grader.py +150 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/quality/verifier.py +228 -0
- synkro/reporting.py +464 -0
- synkro/schemas.py +521 -0
- synkro/types/__init__.py +43 -0
- synkro/types/core.py +153 -0
- synkro/types/dataset_type.py +33 -0
- synkro/types/logic_map.py +348 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.36.data/data/examples/__init__.py +148 -0
- synkro-0.4.36.dist-info/METADATA +507 -0
- synkro-0.4.36.dist-info/RECORD +81 -0
- synkro-0.4.36.dist-info/WHEEL +4 -0
- synkro-0.4.36.dist-info/entry_points.txt +2 -0
- synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,791 @@
|
|
|
1
|
+
"""Golden Tool Response Generator - The Thinker for Tool Calls.
|
|
2
|
+
|
|
3
|
+
Generates tool call traces with grounded reasoning and rule citations.
|
|
4
|
+
This is Stage 3 of the Golden Trace pipeline for TOOL_CALL datasets.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import uuid
|
|
9
|
+
import asyncio
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
13
|
+
|
|
14
|
+
from synkro.llm.client import LLM
|
|
15
|
+
from synkro.models import Model, OpenAI
|
|
16
|
+
from synkro.types.core import Trace, Message, Scenario
|
|
17
|
+
from synkro.types.tool import ToolDefinition, ToolCall, ToolFunction
|
|
18
|
+
from synkro.types.logic_map import LogicMap, GoldenScenario
|
|
19
|
+
from synkro.prompts.golden_templates import GOLDEN_TOOL_TRACE_PROMPT
|
|
20
|
+
from synkro.prompts.tool_templates import (
|
|
21
|
+
GOLDEN_MULTI_TURN_TOOL_DECISION_PROMPT,
|
|
22
|
+
GOLDEN_MULTI_TURN_TOOL_SYNTHESIS_PROMPT,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from synkro.generation.tool_simulator import ToolSimulator
|
|
27
|
+
from synkro.generation.follow_ups import FollowUpGenerator
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# =============================================================================
|
|
31
|
+
# Pydantic models for structured JSON output
|
|
32
|
+
# =============================================================================
|
|
33
|
+
|
|
34
|
+
class GoldenToolCallRequest(BaseModel):
|
|
35
|
+
"""A tool call request with rule citation."""
|
|
36
|
+
|
|
37
|
+
name: str = Field(description="Name of the tool to call")
|
|
38
|
+
arguments: str = Field(description="Arguments as JSON string")
|
|
39
|
+
rule_id: str = Field(description="Rule ID that requires this tool call")
|
|
40
|
+
reasoning: str = Field(description="Why this tool is needed for the rule")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class GoldenToolDecision(BaseModel):
|
|
44
|
+
"""Structured output for tool calling decision with rule grounding."""
|
|
45
|
+
|
|
46
|
+
needs_tool: bool = Field(description="Whether a tool call is needed")
|
|
47
|
+
reasoning: str = Field(description="Rule-based explanation of decision")
|
|
48
|
+
rule_ids_evaluated: list[str] = Field(
|
|
49
|
+
default_factory=list,
|
|
50
|
+
description="Rule IDs that were evaluated"
|
|
51
|
+
)
|
|
52
|
+
tool_calls: list[GoldenToolCallRequest] = Field(
|
|
53
|
+
default_factory=list,
|
|
54
|
+
description="Tool calls with rule citations"
|
|
55
|
+
)
|
|
56
|
+
direct_response: str | None = Field(
|
|
57
|
+
default=None,
|
|
58
|
+
description="Direct response if no tool needed"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class GoldenToolSynthesis(BaseModel):
|
|
63
|
+
"""Structured output for synthesizing tool results."""
|
|
64
|
+
|
|
65
|
+
response: str = Field(description="Natural response incorporating tool results")
|
|
66
|
+
rules_applied: list[str] = Field(
|
|
67
|
+
default_factory=list,
|
|
68
|
+
description="Rule IDs applied in the response"
|
|
69
|
+
)
|
|
70
|
+
rules_excluded: list[str] = Field(
|
|
71
|
+
default_factory=list,
|
|
72
|
+
description="Rule IDs explicitly excluded"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class GoldenMultiTurnToolDecision(BaseModel):
|
|
77
|
+
"""Tool decision for a follow-up turn with rule grounding."""
|
|
78
|
+
|
|
79
|
+
needs_tool: bool = Field(description="Whether a tool call is needed")
|
|
80
|
+
reasoning: str = Field(description="Rule-based explanation of decision")
|
|
81
|
+
rule_ids_evaluated: list[str] = Field(
|
|
82
|
+
default_factory=list,
|
|
83
|
+
description="Rule IDs evaluated for this turn"
|
|
84
|
+
)
|
|
85
|
+
tool_calls: list[GoldenToolCallRequest] = Field(
|
|
86
|
+
default_factory=list,
|
|
87
|
+
description="Tool calls with rule citations"
|
|
88
|
+
)
|
|
89
|
+
direct_response: str | None = Field(
|
|
90
|
+
default=None,
|
|
91
|
+
description="Direct response if no tool needed"
|
|
92
|
+
)
|
|
93
|
+
rules_applied_this_turn: list[str] = Field(
|
|
94
|
+
default_factory=list,
|
|
95
|
+
description="Rules applied in this turn's response"
|
|
96
|
+
)
|
|
97
|
+
rules_excluded_this_turn: list[str] = Field(
|
|
98
|
+
default_factory=list,
|
|
99
|
+
description="Rules excluded in this turn"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class GoldenMultiTurnToolSynthesis(BaseModel):
|
|
104
|
+
"""Structured output for synthesizing follow-up responses with rule tracking."""
|
|
105
|
+
|
|
106
|
+
response: str = Field(description="Natural response for follow-up")
|
|
107
|
+
rules_applied_this_turn: list[str] = Field(
|
|
108
|
+
default_factory=list,
|
|
109
|
+
description="Rule IDs applied in this turn"
|
|
110
|
+
)
|
|
111
|
+
rules_excluded_this_turn: list[str] = Field(
|
|
112
|
+
default_factory=list,
|
|
113
|
+
description="Rule IDs excluded in this turn"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# =============================================================================
|
|
118
|
+
# Golden Tool Call Response Generator
|
|
119
|
+
# =============================================================================
|
|
120
|
+
|
|
121
|
+
class GoldenToolCallResponseGenerator:
|
|
122
|
+
"""
|
|
123
|
+
The Thinker for Tool Calls - Generates tool traces with grounded reasoning.
|
|
124
|
+
|
|
125
|
+
Produces tool call traces with:
|
|
126
|
+
- Rule citations for tool selection decisions
|
|
127
|
+
- Explicit reasoning linking rules to tool usage
|
|
128
|
+
- DAG-compliant evaluation order
|
|
129
|
+
- Verification-ready metadata
|
|
130
|
+
|
|
131
|
+
Examples:
|
|
132
|
+
>>> generator = GoldenToolCallResponseGenerator(
|
|
133
|
+
... tools=[web_search_tool],
|
|
134
|
+
... llm=LLM(model=OpenAI.GPT_4O_MINI),
|
|
135
|
+
... simulator=tool_simulator,
|
|
136
|
+
... )
|
|
137
|
+
>>> trace = await generator.generate_single(
|
|
138
|
+
... policy_text="...",
|
|
139
|
+
... logic_map=logic_map,
|
|
140
|
+
... scenario=scenario,
|
|
141
|
+
... )
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
# Instruction to inject when thinking mode is enabled
|
|
145
|
+
THINKING_INSTRUCTION = """
|
|
146
|
+
THINKING MODE:
|
|
147
|
+
Your assistant response MUST include reasoning wrapped in <think> and </think> tags.
|
|
148
|
+
Place your step-by-step reasoning inside the think tags BEFORE your actual response.
|
|
149
|
+
|
|
150
|
+
Format:
|
|
151
|
+
<think>
|
|
152
|
+
[Your reasoning about which rules apply, tool usage decisions, etc.]
|
|
153
|
+
</think>
|
|
154
|
+
|
|
155
|
+
[Your actual response to the user]
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
def __init__(
|
|
159
|
+
self,
|
|
160
|
+
tools: list[ToolDefinition],
|
|
161
|
+
llm: LLM | None = None,
|
|
162
|
+
simulator: "ToolSimulator | None" = None,
|
|
163
|
+
model: Model = OpenAI.GPT_4O_MINI,
|
|
164
|
+
thinking: bool = False,
|
|
165
|
+
):
|
|
166
|
+
"""
|
|
167
|
+
Initialize the Golden Tool Call Response Generator.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
tools: List of available tool definitions
|
|
171
|
+
llm: LLM client to use (creates one if not provided)
|
|
172
|
+
simulator: Tool simulator for generating tool responses
|
|
173
|
+
model: Model to use if creating LLM
|
|
174
|
+
thinking: Enable thinking mode with <think> tags in responses
|
|
175
|
+
"""
|
|
176
|
+
self.tools = tools
|
|
177
|
+
self.tools_by_name = {t.name: t for t in tools}
|
|
178
|
+
self.llm = llm or LLM(model=model, temperature=0.7)
|
|
179
|
+
self.simulator = simulator
|
|
180
|
+
self.thinking = thinking
|
|
181
|
+
self._follow_up_gen: "FollowUpGenerator | None" = None
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def follow_up_generator(self) -> "FollowUpGenerator":
|
|
185
|
+
"""Lazy initialization of follow-up generator for multi-turn."""
|
|
186
|
+
if self._follow_up_gen is None:
|
|
187
|
+
from synkro.generation.follow_ups import FollowUpGenerator
|
|
188
|
+
self._follow_up_gen = FollowUpGenerator(llm=self.llm)
|
|
189
|
+
return self._follow_up_gen
|
|
190
|
+
|
|
191
|
+
def _get_tools_description(self) -> str:
|
|
192
|
+
"""Get formatted description of all tools."""
|
|
193
|
+
descriptions = []
|
|
194
|
+
for tool in self.tools:
|
|
195
|
+
descriptions.append(tool.to_system_prompt())
|
|
196
|
+
return "\n\n".join(descriptions)
|
|
197
|
+
|
|
198
|
+
def _generate_call_id(self) -> str:
|
|
199
|
+
"""Generate a unique tool call ID."""
|
|
200
|
+
return f"call_{uuid.uuid4().hex[:12]}"
|
|
201
|
+
|
|
202
|
+
def _format_logic_map(self, logic_map: LogicMap) -> str:
|
|
203
|
+
"""Format Logic Map for prompt inclusion."""
|
|
204
|
+
lines = []
|
|
205
|
+
lines.append("RULES:")
|
|
206
|
+
for rule in logic_map.rules:
|
|
207
|
+
deps = f" [depends on: {', '.join(rule.dependencies)}]" if rule.dependencies else ""
|
|
208
|
+
lines.append(
|
|
209
|
+
f" {rule.rule_id} ({rule.category.value}): {rule.text}{deps}"
|
|
210
|
+
)
|
|
211
|
+
lines.append(f" IF: {rule.condition}")
|
|
212
|
+
lines.append(f" THEN: {rule.action}")
|
|
213
|
+
return "\n".join(lines)
|
|
214
|
+
|
|
215
|
+
async def generate_single(
|
|
216
|
+
self,
|
|
217
|
+
policy_text: str,
|
|
218
|
+
logic_map: LogicMap,
|
|
219
|
+
scenario: GoldenScenario,
|
|
220
|
+
target_turns: int = 1,
|
|
221
|
+
) -> Trace:
|
|
222
|
+
"""
|
|
223
|
+
Generate a single tool call trace with grounded reasoning.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
policy_text: The policy document text
|
|
227
|
+
logic_map: The extracted Logic Map (DAG of rules)
|
|
228
|
+
scenario: The golden scenario to respond to
|
|
229
|
+
target_turns: Number of conversation turns (1 for single-turn,
|
|
230
|
+
>1 for multi-turn with follow-up questions)
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Trace with proper tool calling format and rule citations
|
|
234
|
+
"""
|
|
235
|
+
if target_turns > 1:
|
|
236
|
+
return await self._generate_multi_turn(
|
|
237
|
+
policy_text, logic_map, scenario, target_turns
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
# Single-turn generation
|
|
241
|
+
tools_desc = self._get_tools_description()
|
|
242
|
+
logic_map_str = self._format_logic_map(logic_map)
|
|
243
|
+
|
|
244
|
+
# Step 1: Get LLM decision on tool usage with rule grounding
|
|
245
|
+
decision = await self._get_tool_decision(
|
|
246
|
+
policy_text, logic_map_str, scenario, tools_desc
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Step 2: Build the message sequence
|
|
250
|
+
messages = await self._build_message_sequence(
|
|
251
|
+
policy_text, logic_map_str, scenario, tools_desc, decision
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Convert GoldenScenario to base Scenario
|
|
255
|
+
base_scenario = scenario.to_base_scenario()
|
|
256
|
+
|
|
257
|
+
return Trace(messages=messages, scenario=base_scenario)
|
|
258
|
+
|
|
259
|
+
async def _get_tool_decision(
|
|
260
|
+
self,
|
|
261
|
+
policy_text: str,
|
|
262
|
+
logic_map_str: str,
|
|
263
|
+
scenario: GoldenScenario,
|
|
264
|
+
tools_desc: str,
|
|
265
|
+
) -> GoldenToolDecision:
|
|
266
|
+
"""Get the LLM's rule-grounded decision on tool usage."""
|
|
267
|
+
prompt = f"""You are a customer support agent deciding whether to use tools.
|
|
268
|
+
Your decisions must be GROUNDED in the Logic Map rules.
|
|
269
|
+
|
|
270
|
+
AVAILABLE TOOLS:
|
|
271
|
+
{tools_desc}
|
|
272
|
+
|
|
273
|
+
LOGIC MAP (Rules to Apply):
|
|
274
|
+
{logic_map_str}
|
|
275
|
+
|
|
276
|
+
POLICY GUIDELINES:
|
|
277
|
+
{policy_text}
|
|
278
|
+
|
|
279
|
+
SCENARIO:
|
|
280
|
+
Type: {scenario.scenario_type.value.upper()}
|
|
281
|
+
Request: {scenario.description}
|
|
282
|
+
Context: {scenario.context}
|
|
283
|
+
Target Rules: {', '.join(scenario.target_rule_ids)}
|
|
284
|
+
|
|
285
|
+
YOUR TASK:
|
|
286
|
+
1. Evaluate which rules from the Logic Map apply to this scenario
|
|
287
|
+
2. Determine if any rule requires information that a tool can provide
|
|
288
|
+
3. If tools are needed, specify which rule requires each tool call
|
|
289
|
+
4. If no tools needed, explain based on which rules why direct response is sufficient
|
|
290
|
+
|
|
291
|
+
TOOL CALLING RULES:
|
|
292
|
+
- Only call a tool if a SPECIFIC RULE requires information the tool can provide
|
|
293
|
+
- Cite the Rule ID that necessitates each tool call
|
|
294
|
+
- If the scenario is IRRELEVANT type, no tools should be needed
|
|
295
|
+
- If information is already in the context, don't call a tool for it"""
|
|
296
|
+
|
|
297
|
+
return await self.llm.generate_structured(prompt, GoldenToolDecision)
|
|
298
|
+
|
|
299
|
+
async def _build_message_sequence(
|
|
300
|
+
self,
|
|
301
|
+
policy_text: str,
|
|
302
|
+
logic_map_str: str,
|
|
303
|
+
scenario: GoldenScenario,
|
|
304
|
+
tools_desc: str,
|
|
305
|
+
decision: GoldenToolDecision,
|
|
306
|
+
) -> list[Message]:
|
|
307
|
+
"""Build the full message sequence based on the tool decision."""
|
|
308
|
+
messages = []
|
|
309
|
+
|
|
310
|
+
# System message with tool descriptions
|
|
311
|
+
system_content = f"""You are a helpful customer support agent. You have access to the following tools:
|
|
312
|
+
|
|
313
|
+
{tools_desc}
|
|
314
|
+
|
|
315
|
+
Follow the policy guidelines to assist customers effectively."""
|
|
316
|
+
|
|
317
|
+
messages.append(Message(role="system", content=system_content))
|
|
318
|
+
|
|
319
|
+
# User message
|
|
320
|
+
messages.append(Message(role="user", content=scenario.description))
|
|
321
|
+
|
|
322
|
+
if decision.needs_tool and decision.tool_calls:
|
|
323
|
+
# Assistant message with tool_calls
|
|
324
|
+
tool_calls = []
|
|
325
|
+
for tc in decision.tool_calls:
|
|
326
|
+
call_id = self._generate_call_id()
|
|
327
|
+
tool_calls.append(ToolCall(
|
|
328
|
+
id=call_id,
|
|
329
|
+
type="function",
|
|
330
|
+
function=ToolFunction(
|
|
331
|
+
name=tc.name,
|
|
332
|
+
arguments=tc.arguments
|
|
333
|
+
)
|
|
334
|
+
))
|
|
335
|
+
|
|
336
|
+
messages.append(Message(
|
|
337
|
+
role="assistant",
|
|
338
|
+
content=None,
|
|
339
|
+
tool_calls=tool_calls
|
|
340
|
+
))
|
|
341
|
+
|
|
342
|
+
# Tool response messages
|
|
343
|
+
tool_results = []
|
|
344
|
+
for tc in tool_calls:
|
|
345
|
+
result = await self._simulate_tool_call(tc)
|
|
346
|
+
tool_results.append(result)
|
|
347
|
+
|
|
348
|
+
messages.append(Message(
|
|
349
|
+
role="tool",
|
|
350
|
+
content=result,
|
|
351
|
+
tool_call_id=tc.id
|
|
352
|
+
))
|
|
353
|
+
|
|
354
|
+
# Final assistant message synthesizing results
|
|
355
|
+
final_response = await self._synthesize_response(
|
|
356
|
+
scenario, tool_calls, tool_results, decision, policy_text, logic_map_str
|
|
357
|
+
)
|
|
358
|
+
messages.append(Message(role="assistant", content=final_response))
|
|
359
|
+
|
|
360
|
+
else:
|
|
361
|
+
# Direct response without tools
|
|
362
|
+
response = decision.direct_response or await self._generate_direct_response(
|
|
363
|
+
policy_text, logic_map_str, scenario
|
|
364
|
+
)
|
|
365
|
+
messages.append(Message(role="assistant", content=response))
|
|
366
|
+
|
|
367
|
+
return messages
|
|
368
|
+
|
|
369
|
+
async def _simulate_tool_call(self, tool_call: ToolCall) -> str:
|
|
370
|
+
"""Simulate a tool response."""
|
|
371
|
+
if self.simulator:
|
|
372
|
+
return await self.simulator.simulate(tool_call)
|
|
373
|
+
|
|
374
|
+
# Fallback: generate a mock response based on tool definition
|
|
375
|
+
tool_name = tool_call.function.name
|
|
376
|
+
if tool_name in self.tools_by_name:
|
|
377
|
+
tool = self.tools_by_name[tool_name]
|
|
378
|
+
if tool.mock_responses:
|
|
379
|
+
import random
|
|
380
|
+
return random.choice(tool.mock_responses)
|
|
381
|
+
|
|
382
|
+
# Default mock response
|
|
383
|
+
args = json.loads(tool_call.function.arguments)
|
|
384
|
+
return json.dumps({
|
|
385
|
+
"status": "success",
|
|
386
|
+
"result": f"Simulated response for {tool_name}",
|
|
387
|
+
"query": args
|
|
388
|
+
})
|
|
389
|
+
|
|
390
|
+
async def _synthesize_response(
|
|
391
|
+
self,
|
|
392
|
+
scenario: GoldenScenario,
|
|
393
|
+
tool_calls: list[ToolCall],
|
|
394
|
+
tool_results: list[str],
|
|
395
|
+
decision: GoldenToolDecision,
|
|
396
|
+
policy_text: str,
|
|
397
|
+
logic_map_str: str,
|
|
398
|
+
) -> str:
|
|
399
|
+
"""Synthesize a natural response from tool results with rule grounding."""
|
|
400
|
+
# Build context of tool calls and results
|
|
401
|
+
tools_context = []
|
|
402
|
+
for tc, result in zip(tool_calls, tool_results):
|
|
403
|
+
tools_context.append(f"Tool: {tc.function.name}")
|
|
404
|
+
tools_context.append(f"Arguments: {tc.function.arguments}")
|
|
405
|
+
tools_context.append(f"Result: {result}")
|
|
406
|
+
tools_context.append("")
|
|
407
|
+
|
|
408
|
+
prompt = f"""Based on the tool results and rules, provide a helpful response.
|
|
409
|
+
|
|
410
|
+
USER REQUEST:
|
|
411
|
+
{scenario.description}
|
|
412
|
+
|
|
413
|
+
SCENARIO TYPE: {scenario.scenario_type.value.upper()}
|
|
414
|
+
TARGET RULES: {', '.join(scenario.target_rule_ids)}
|
|
415
|
+
|
|
416
|
+
TOOL RESULTS:
|
|
417
|
+
{chr(10).join(tools_context)}
|
|
418
|
+
|
|
419
|
+
LOGIC MAP:
|
|
420
|
+
{logic_map_str}
|
|
421
|
+
|
|
422
|
+
RULES EVALUATED: {', '.join(decision.rule_ids_evaluated)}
|
|
423
|
+
|
|
424
|
+
Synthesize the tool results into a natural, helpful response.
|
|
425
|
+
- Apply the relevant rules from the Logic Map
|
|
426
|
+
- Incorporate the information from the tool results
|
|
427
|
+
- Don't expose raw JSON or technical details
|
|
428
|
+
- Be conversational and helpful"""
|
|
429
|
+
|
|
430
|
+
# Inject thinking instruction if enabled
|
|
431
|
+
if self.thinking:
|
|
432
|
+
prompt = prompt + self.THINKING_INSTRUCTION
|
|
433
|
+
|
|
434
|
+
synthesis = await self.llm.generate_structured(prompt, GoldenToolSynthesis)
|
|
435
|
+
return synthesis.response
|
|
436
|
+
|
|
437
|
+
async def _generate_direct_response(
|
|
438
|
+
self,
|
|
439
|
+
policy_text: str,
|
|
440
|
+
logic_map_str: str,
|
|
441
|
+
scenario: GoldenScenario,
|
|
442
|
+
) -> str:
|
|
443
|
+
"""Generate a direct response when no tools are needed."""
|
|
444
|
+
prompt = f"""Provide a helpful response based on the rules.
|
|
445
|
+
|
|
446
|
+
USER REQUEST:
|
|
447
|
+
{scenario.description}
|
|
448
|
+
|
|
449
|
+
CONTEXT:
|
|
450
|
+
{scenario.context}
|
|
451
|
+
|
|
452
|
+
SCENARIO TYPE: {scenario.scenario_type.value.upper()}
|
|
453
|
+
TARGET RULES: {', '.join(scenario.target_rule_ids)}
|
|
454
|
+
|
|
455
|
+
LOGIC MAP:
|
|
456
|
+
{logic_map_str}
|
|
457
|
+
|
|
458
|
+
POLICY GUIDELINES:
|
|
459
|
+
{policy_text}
|
|
460
|
+
|
|
461
|
+
No tools are needed for this request. Provide a direct, helpful response
|
|
462
|
+
applying the relevant rules from the Logic Map."""
|
|
463
|
+
|
|
464
|
+
# Inject thinking instruction if enabled
|
|
465
|
+
if self.thinking:
|
|
466
|
+
prompt = prompt + self.THINKING_INSTRUCTION
|
|
467
|
+
|
|
468
|
+
synthesis = await self.llm.generate_structured(prompt, GoldenToolSynthesis)
|
|
469
|
+
return synthesis.response
|
|
470
|
+
|
|
471
|
+
# =========================================================================
|
|
472
|
+
# MULTI-TURN TOOL CALLING WITH RULE TRACKING
|
|
473
|
+
# =========================================================================
|
|
474
|
+
|
|
475
|
+
async def _generate_multi_turn(
|
|
476
|
+
self,
|
|
477
|
+
policy_text: str,
|
|
478
|
+
logic_map: LogicMap,
|
|
479
|
+
scenario: GoldenScenario,
|
|
480
|
+
target_turns: int,
|
|
481
|
+
) -> Trace:
|
|
482
|
+
"""
|
|
483
|
+
Generate multi-turn golden tool call trace with cumulative rule tracking.
|
|
484
|
+
|
|
485
|
+
Each turn can independently decide if new tool calls are needed.
|
|
486
|
+
Rules applied/excluded are tracked across all turns.
|
|
487
|
+
|
|
488
|
+
Args:
|
|
489
|
+
policy_text: The policy/guidelines text
|
|
490
|
+
logic_map: The extracted Logic Map
|
|
491
|
+
scenario: The golden scenario to respond to
|
|
492
|
+
target_turns: Number of conversation turns
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
Trace with multi-turn tool calling and cumulative rule metadata
|
|
496
|
+
"""
|
|
497
|
+
tools_desc = self._get_tools_description()
|
|
498
|
+
logic_map_str = self._format_logic_map(logic_map)
|
|
499
|
+
|
|
500
|
+
# Track cumulative rules across turns
|
|
501
|
+
cumulative_rules_applied: list[str] = []
|
|
502
|
+
cumulative_rules_excluded: list[str] = []
|
|
503
|
+
|
|
504
|
+
# Step 1: Generate initial response (Turn 1)
|
|
505
|
+
decision = await self._get_tool_decision(
|
|
506
|
+
policy_text, logic_map_str, scenario, tools_desc
|
|
507
|
+
)
|
|
508
|
+
messages = await self._build_message_sequence(
|
|
509
|
+
policy_text, logic_map_str, scenario, tools_desc, decision
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
# Track rules from initial turn
|
|
513
|
+
cumulative_rules_applied.extend(decision.rule_ids_evaluated)
|
|
514
|
+
|
|
515
|
+
# Step 2: Generate follow-up turns
|
|
516
|
+
for turn in range(1, target_turns):
|
|
517
|
+
# Generate follow-up question based on conversation so far
|
|
518
|
+
follow_up = await self.follow_up_generator.generate(
|
|
519
|
+
policy_text=policy_text,
|
|
520
|
+
messages=messages,
|
|
521
|
+
turn_index=turn,
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
# Add user message with follow-up question
|
|
525
|
+
messages.append(Message(role="user", content=follow_up.question))
|
|
526
|
+
|
|
527
|
+
# Get rule-grounded tool decision for this follow-up
|
|
528
|
+
follow_up_decision = await self._get_follow_up_tool_decision(
|
|
529
|
+
policy_text=policy_text,
|
|
530
|
+
logic_map_str=logic_map_str,
|
|
531
|
+
messages=messages,
|
|
532
|
+
follow_up_question=follow_up.question,
|
|
533
|
+
tools_desc=tools_desc,
|
|
534
|
+
cumulative_rules_applied=cumulative_rules_applied,
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
# Build response for this turn
|
|
538
|
+
turn_messages, turn_rules_applied, turn_rules_excluded = (
|
|
539
|
+
await self._build_follow_up_message_sequence(
|
|
540
|
+
policy_text=policy_text,
|
|
541
|
+
logic_map_str=logic_map_str,
|
|
542
|
+
messages=messages,
|
|
543
|
+
follow_up_question=follow_up.question,
|
|
544
|
+
tools_desc=tools_desc,
|
|
545
|
+
decision=follow_up_decision,
|
|
546
|
+
cumulative_rules_applied=cumulative_rules_applied,
|
|
547
|
+
)
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
messages.extend(turn_messages)
|
|
551
|
+
|
|
552
|
+
# Update cumulative rule tracking
|
|
553
|
+
cumulative_rules_applied.extend(turn_rules_applied)
|
|
554
|
+
cumulative_rules_excluded.extend(turn_rules_excluded)
|
|
555
|
+
|
|
556
|
+
# Deduplicate rules
|
|
557
|
+
unique_rules_applied = list(dict.fromkeys(cumulative_rules_applied))
|
|
558
|
+
unique_rules_excluded = list(dict.fromkeys(cumulative_rules_excluded))
|
|
559
|
+
|
|
560
|
+
base_scenario = scenario.to_base_scenario()
|
|
561
|
+
|
|
562
|
+
return Trace(
|
|
563
|
+
messages=messages,
|
|
564
|
+
scenario=base_scenario,
|
|
565
|
+
rules_applied=unique_rules_applied,
|
|
566
|
+
rules_excluded=unique_rules_excluded,
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
def _format_conversation_with_tools(self, messages: list[Message]) -> str:
|
|
570
|
+
"""Format conversation including tool calls and results for context."""
|
|
571
|
+
formatted = []
|
|
572
|
+
for msg in messages:
|
|
573
|
+
role = msg.role.upper()
|
|
574
|
+
|
|
575
|
+
if msg.role == "assistant" and msg.tool_calls:
|
|
576
|
+
tool_strs = []
|
|
577
|
+
for tc in msg.tool_calls:
|
|
578
|
+
if hasattr(tc, "function"):
|
|
579
|
+
tool_strs.append(
|
|
580
|
+
f" - {tc.function.name}({tc.function.arguments})"
|
|
581
|
+
)
|
|
582
|
+
elif isinstance(tc, dict) and "function" in tc:
|
|
583
|
+
func = tc["function"]
|
|
584
|
+
tool_strs.append(
|
|
585
|
+
f" - {func.get('name', 'unknown')}({func.get('arguments', '{}')})"
|
|
586
|
+
)
|
|
587
|
+
else:
|
|
588
|
+
tool_strs.append(f" - {tc}")
|
|
589
|
+
formatted.append(f"ASSISTANT: [Tool Calls]\n" + "\n".join(tool_strs))
|
|
590
|
+
elif msg.role == "tool":
|
|
591
|
+
formatted.append(f"TOOL RESULT [{msg.tool_call_id}]: {msg.content}")
|
|
592
|
+
else:
|
|
593
|
+
content = msg.content or "[No content]"
|
|
594
|
+
formatted.append(f"{role}: {content}")
|
|
595
|
+
|
|
596
|
+
return "\n\n".join(formatted)
|
|
597
|
+
|
|
598
|
+
async def _get_follow_up_tool_decision(
|
|
599
|
+
self,
|
|
600
|
+
policy_text: str,
|
|
601
|
+
logic_map_str: str,
|
|
602
|
+
messages: list[Message],
|
|
603
|
+
follow_up_question: str,
|
|
604
|
+
tools_desc: str,
|
|
605
|
+
cumulative_rules_applied: list[str],
|
|
606
|
+
) -> GoldenMultiTurnToolDecision:
|
|
607
|
+
"""Get rule-grounded tool decision for a follow-up question."""
|
|
608
|
+
conversation_history = self._format_conversation_with_tools(messages)
|
|
609
|
+
|
|
610
|
+
prompt = GOLDEN_MULTI_TURN_TOOL_DECISION_PROMPT.format(
|
|
611
|
+
tools_desc=tools_desc,
|
|
612
|
+
logic_map_str=logic_map_str,
|
|
613
|
+
policy_text=policy_text,
|
|
614
|
+
conversation_history=conversation_history,
|
|
615
|
+
cumulative_rules_applied=", ".join(cumulative_rules_applied) or "None yet",
|
|
616
|
+
follow_up_question=follow_up_question,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
return await self.llm.generate_structured(prompt, GoldenMultiTurnToolDecision)
|
|
620
|
+
|
|
621
|
+
async def _build_follow_up_message_sequence(
|
|
622
|
+
self,
|
|
623
|
+
policy_text: str,
|
|
624
|
+
logic_map_str: str,
|
|
625
|
+
messages: list[Message],
|
|
626
|
+
follow_up_question: str,
|
|
627
|
+
tools_desc: str,
|
|
628
|
+
decision: GoldenMultiTurnToolDecision,
|
|
629
|
+
cumulative_rules_applied: list[str],
|
|
630
|
+
) -> tuple[list[Message], list[str], list[str]]:
|
|
631
|
+
"""
|
|
632
|
+
Build message sequence for a follow-up turn with rule tracking.
|
|
633
|
+
|
|
634
|
+
Returns:
|
|
635
|
+
Tuple of (new_messages, rules_applied_this_turn, rules_excluded_this_turn)
|
|
636
|
+
"""
|
|
637
|
+
new_messages = []
|
|
638
|
+
rules_applied: list[str] = []
|
|
639
|
+
rules_excluded: list[str] = []
|
|
640
|
+
|
|
641
|
+
if decision.needs_tool and decision.tool_calls:
|
|
642
|
+
# Assistant message with new tool_calls
|
|
643
|
+
tool_calls = []
|
|
644
|
+
for tc in decision.tool_calls:
|
|
645
|
+
call_id = self._generate_call_id()
|
|
646
|
+
tool_calls.append(
|
|
647
|
+
ToolCall(
|
|
648
|
+
id=call_id,
|
|
649
|
+
type="function",
|
|
650
|
+
function=ToolFunction(
|
|
651
|
+
name=tc.name,
|
|
652
|
+
arguments=tc.arguments,
|
|
653
|
+
),
|
|
654
|
+
)
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
new_messages.append(
|
|
658
|
+
Message(role="assistant", content=None, tool_calls=tool_calls)
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
# Tool response messages
|
|
662
|
+
tool_results = []
|
|
663
|
+
for tc in tool_calls:
|
|
664
|
+
result = await self._simulate_tool_call(tc)
|
|
665
|
+
tool_results.append(result)
|
|
666
|
+
new_messages.append(
|
|
667
|
+
Message(role="tool", content=result, tool_call_id=tc.id)
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
# Final assistant message with rule-grounded synthesis
|
|
671
|
+
response, rules_applied, rules_excluded = (
|
|
672
|
+
await self._synthesize_follow_up_response(
|
|
673
|
+
policy_text=policy_text,
|
|
674
|
+
logic_map_str=logic_map_str,
|
|
675
|
+
messages=messages,
|
|
676
|
+
follow_up_question=follow_up_question,
|
|
677
|
+
tool_calls=tool_calls,
|
|
678
|
+
tool_results=tool_results,
|
|
679
|
+
cumulative_rules_applied=cumulative_rules_applied,
|
|
680
|
+
)
|
|
681
|
+
)
|
|
682
|
+
new_messages.append(Message(role="assistant", content=response))
|
|
683
|
+
|
|
684
|
+
else:
|
|
685
|
+
# Direct response without new tools
|
|
686
|
+
if decision.direct_response:
|
|
687
|
+
response = decision.direct_response
|
|
688
|
+
rules_applied = decision.rules_applied_this_turn
|
|
689
|
+
rules_excluded = decision.rules_excluded_this_turn
|
|
690
|
+
else:
|
|
691
|
+
response, rules_applied, rules_excluded = (
|
|
692
|
+
await self._synthesize_follow_up_response(
|
|
693
|
+
policy_text=policy_text,
|
|
694
|
+
logic_map_str=logic_map_str,
|
|
695
|
+
messages=messages,
|
|
696
|
+
follow_up_question=follow_up_question,
|
|
697
|
+
tool_calls=[],
|
|
698
|
+
tool_results=[],
|
|
699
|
+
cumulative_rules_applied=cumulative_rules_applied,
|
|
700
|
+
)
|
|
701
|
+
)
|
|
702
|
+
new_messages.append(Message(role="assistant", content=response))
|
|
703
|
+
|
|
704
|
+
return new_messages, rules_applied, rules_excluded
|
|
705
|
+
|
|
706
|
+
async def _synthesize_follow_up_response(
|
|
707
|
+
self,
|
|
708
|
+
policy_text: str,
|
|
709
|
+
logic_map_str: str,
|
|
710
|
+
messages: list[Message],
|
|
711
|
+
follow_up_question: str,
|
|
712
|
+
tool_calls: list[ToolCall],
|
|
713
|
+
tool_results: list[str],
|
|
714
|
+
cumulative_rules_applied: list[str],
|
|
715
|
+
) -> tuple[str, list[str], list[str]]:
|
|
716
|
+
"""
|
|
717
|
+
Synthesize response for a follow-up turn with rule tracking.
|
|
718
|
+
|
|
719
|
+
Returns:
|
|
720
|
+
Tuple of (response, rules_applied_this_turn, rules_excluded_this_turn)
|
|
721
|
+
"""
|
|
722
|
+
conversation_history = self._format_conversation_with_tools(messages)
|
|
723
|
+
|
|
724
|
+
# Format new tool results if any
|
|
725
|
+
if tool_calls and tool_results:
|
|
726
|
+
new_tool_results = []
|
|
727
|
+
for tc, result in zip(tool_calls, tool_results):
|
|
728
|
+
new_tool_results.append(f"Tool: {tc.function.name}")
|
|
729
|
+
new_tool_results.append(f"Arguments: {tc.function.arguments}")
|
|
730
|
+
new_tool_results.append(f"Result: {result}")
|
|
731
|
+
new_tool_results.append("")
|
|
732
|
+
new_results_str = "\n".join(new_tool_results)
|
|
733
|
+
else:
|
|
734
|
+
new_results_str = "None (using existing information from conversation)"
|
|
735
|
+
|
|
736
|
+
prompt = GOLDEN_MULTI_TURN_TOOL_SYNTHESIS_PROMPT.format(
|
|
737
|
+
logic_map_str=logic_map_str,
|
|
738
|
+
conversation_history=conversation_history,
|
|
739
|
+
follow_up_question=follow_up_question,
|
|
740
|
+
new_tool_results=new_results_str,
|
|
741
|
+
cumulative_rules_applied=", ".join(cumulative_rules_applied) or "None yet",
|
|
742
|
+
policy_text=policy_text,
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
# Inject thinking instruction if enabled
|
|
746
|
+
if self.thinking:
|
|
747
|
+
prompt = prompt + self.THINKING_INSTRUCTION
|
|
748
|
+
|
|
749
|
+
synthesis = await self.llm.generate_structured(
|
|
750
|
+
prompt, GoldenMultiTurnToolSynthesis
|
|
751
|
+
)
|
|
752
|
+
return (
|
|
753
|
+
synthesis.response,
|
|
754
|
+
synthesis.rules_applied_this_turn,
|
|
755
|
+
synthesis.rules_excluded_this_turn,
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
async def generate(
|
|
759
|
+
self,
|
|
760
|
+
policy_text: str,
|
|
761
|
+
logic_map: LogicMap,
|
|
762
|
+
scenarios: list[GoldenScenario],
|
|
763
|
+
target_turns: int = 1,
|
|
764
|
+
) -> list[Trace]:
|
|
765
|
+
"""
|
|
766
|
+
Generate traces for multiple scenarios.
|
|
767
|
+
|
|
768
|
+
Args:
|
|
769
|
+
policy_text: The policy document text
|
|
770
|
+
logic_map: The extracted Logic Map
|
|
771
|
+
scenarios: List of golden scenarios
|
|
772
|
+
target_turns: Number of conversation turns
|
|
773
|
+
|
|
774
|
+
Returns:
|
|
775
|
+
List of traces with tool calling format
|
|
776
|
+
"""
|
|
777
|
+
tasks = [
|
|
778
|
+
self.generate_single(policy_text, logic_map, s, target_turns)
|
|
779
|
+
for s in scenarios
|
|
780
|
+
]
|
|
781
|
+
return await asyncio.gather(*tasks)
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
__all__ = [
|
|
785
|
+
"GoldenToolCallResponseGenerator",
|
|
786
|
+
"GoldenToolDecision",
|
|
787
|
+
"GoldenToolCallRequest",
|
|
788
|
+
"GoldenToolSynthesis",
|
|
789
|
+
"GoldenMultiTurnToolDecision",
|
|
790
|
+
"GoldenMultiTurnToolSynthesis",
|
|
791
|
+
]
|