synkro 0.4.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synkro might be problematic. Click here for more details.

Files changed (81) hide show
  1. synkro/__init__.py +331 -0
  2. synkro/advanced.py +184 -0
  3. synkro/cli.py +156 -0
  4. synkro/core/__init__.py +7 -0
  5. synkro/core/checkpoint.py +250 -0
  6. synkro/core/dataset.py +432 -0
  7. synkro/core/policy.py +337 -0
  8. synkro/errors.py +178 -0
  9. synkro/examples/__init__.py +148 -0
  10. synkro/factory.py +291 -0
  11. synkro/formatters/__init__.py +18 -0
  12. synkro/formatters/chatml.py +121 -0
  13. synkro/formatters/langfuse.py +98 -0
  14. synkro/formatters/langsmith.py +98 -0
  15. synkro/formatters/qa.py +112 -0
  16. synkro/formatters/sft.py +90 -0
  17. synkro/formatters/tool_call.py +127 -0
  18. synkro/generation/__init__.py +9 -0
  19. synkro/generation/follow_ups.py +134 -0
  20. synkro/generation/generator.py +314 -0
  21. synkro/generation/golden_responses.py +269 -0
  22. synkro/generation/golden_scenarios.py +333 -0
  23. synkro/generation/golden_tool_responses.py +791 -0
  24. synkro/generation/logic_extractor.py +126 -0
  25. synkro/generation/multiturn_responses.py +177 -0
  26. synkro/generation/planner.py +131 -0
  27. synkro/generation/responses.py +189 -0
  28. synkro/generation/scenarios.py +90 -0
  29. synkro/generation/tool_responses.py +625 -0
  30. synkro/generation/tool_simulator.py +114 -0
  31. synkro/interactive/__init__.py +16 -0
  32. synkro/interactive/hitl_session.py +205 -0
  33. synkro/interactive/intent_classifier.py +94 -0
  34. synkro/interactive/logic_map_editor.py +176 -0
  35. synkro/interactive/rich_ui.py +459 -0
  36. synkro/interactive/scenario_editor.py +198 -0
  37. synkro/llm/__init__.py +7 -0
  38. synkro/llm/client.py +309 -0
  39. synkro/llm/rate_limits.py +99 -0
  40. synkro/models/__init__.py +50 -0
  41. synkro/models/anthropic.py +26 -0
  42. synkro/models/google.py +19 -0
  43. synkro/models/local.py +104 -0
  44. synkro/models/openai.py +31 -0
  45. synkro/modes/__init__.py +13 -0
  46. synkro/modes/config.py +66 -0
  47. synkro/modes/conversation.py +35 -0
  48. synkro/modes/tool_call.py +18 -0
  49. synkro/parsers.py +442 -0
  50. synkro/pipeline/__init__.py +20 -0
  51. synkro/pipeline/phases.py +592 -0
  52. synkro/pipeline/runner.py +769 -0
  53. synkro/pipelines.py +136 -0
  54. synkro/prompts/__init__.py +57 -0
  55. synkro/prompts/base.py +167 -0
  56. synkro/prompts/golden_templates.py +533 -0
  57. synkro/prompts/interactive_templates.py +198 -0
  58. synkro/prompts/multiturn_templates.py +156 -0
  59. synkro/prompts/templates.py +281 -0
  60. synkro/prompts/tool_templates.py +318 -0
  61. synkro/quality/__init__.py +14 -0
  62. synkro/quality/golden_refiner.py +163 -0
  63. synkro/quality/grader.py +153 -0
  64. synkro/quality/multiturn_grader.py +150 -0
  65. synkro/quality/refiner.py +137 -0
  66. synkro/quality/tool_grader.py +126 -0
  67. synkro/quality/tool_refiner.py +128 -0
  68. synkro/quality/verifier.py +228 -0
  69. synkro/reporting.py +464 -0
  70. synkro/schemas.py +521 -0
  71. synkro/types/__init__.py +43 -0
  72. synkro/types/core.py +153 -0
  73. synkro/types/dataset_type.py +33 -0
  74. synkro/types/logic_map.py +348 -0
  75. synkro/types/tool.py +94 -0
  76. synkro-0.4.36.data/data/examples/__init__.py +148 -0
  77. synkro-0.4.36.dist-info/METADATA +507 -0
  78. synkro-0.4.36.dist-info/RECORD +81 -0
  79. synkro-0.4.36.dist-info/WHEEL +4 -0
  80. synkro-0.4.36.dist-info/entry_points.txt +2 -0
  81. synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,791 @@
1
+ """Golden Tool Response Generator - The Thinker for Tool Calls.
2
+
3
+ Generates tool call traces with grounded reasoning and rule citations.
4
+ This is Stage 3 of the Golden Trace pipeline for TOOL_CALL datasets.
5
+ """
6
+
7
+ import json
8
+ import uuid
9
+ import asyncio
10
+ from typing import TYPE_CHECKING
11
+
12
+ from pydantic import BaseModel, Field
13
+
14
+ from synkro.llm.client import LLM
15
+ from synkro.models import Model, OpenAI
16
+ from synkro.types.core import Trace, Message, Scenario
17
+ from synkro.types.tool import ToolDefinition, ToolCall, ToolFunction
18
+ from synkro.types.logic_map import LogicMap, GoldenScenario
19
+ from synkro.prompts.golden_templates import GOLDEN_TOOL_TRACE_PROMPT
20
+ from synkro.prompts.tool_templates import (
21
+ GOLDEN_MULTI_TURN_TOOL_DECISION_PROMPT,
22
+ GOLDEN_MULTI_TURN_TOOL_SYNTHESIS_PROMPT,
23
+ )
24
+
25
+ if TYPE_CHECKING:
26
+ from synkro.generation.tool_simulator import ToolSimulator
27
+ from synkro.generation.follow_ups import FollowUpGenerator
28
+
29
+
30
+ # =============================================================================
31
+ # Pydantic models for structured JSON output
32
+ # =============================================================================
33
+
34
+ class GoldenToolCallRequest(BaseModel):
35
+ """A tool call request with rule citation."""
36
+
37
+ name: str = Field(description="Name of the tool to call")
38
+ arguments: str = Field(description="Arguments as JSON string")
39
+ rule_id: str = Field(description="Rule ID that requires this tool call")
40
+ reasoning: str = Field(description="Why this tool is needed for the rule")
41
+
42
+
43
+ class GoldenToolDecision(BaseModel):
44
+ """Structured output for tool calling decision with rule grounding."""
45
+
46
+ needs_tool: bool = Field(description="Whether a tool call is needed")
47
+ reasoning: str = Field(description="Rule-based explanation of decision")
48
+ rule_ids_evaluated: list[str] = Field(
49
+ default_factory=list,
50
+ description="Rule IDs that were evaluated"
51
+ )
52
+ tool_calls: list[GoldenToolCallRequest] = Field(
53
+ default_factory=list,
54
+ description="Tool calls with rule citations"
55
+ )
56
+ direct_response: str | None = Field(
57
+ default=None,
58
+ description="Direct response if no tool needed"
59
+ )
60
+
61
+
62
+ class GoldenToolSynthesis(BaseModel):
63
+ """Structured output for synthesizing tool results."""
64
+
65
+ response: str = Field(description="Natural response incorporating tool results")
66
+ rules_applied: list[str] = Field(
67
+ default_factory=list,
68
+ description="Rule IDs applied in the response"
69
+ )
70
+ rules_excluded: list[str] = Field(
71
+ default_factory=list,
72
+ description="Rule IDs explicitly excluded"
73
+ )
74
+
75
+
76
+ class GoldenMultiTurnToolDecision(BaseModel):
77
+ """Tool decision for a follow-up turn with rule grounding."""
78
+
79
+ needs_tool: bool = Field(description="Whether a tool call is needed")
80
+ reasoning: str = Field(description="Rule-based explanation of decision")
81
+ rule_ids_evaluated: list[str] = Field(
82
+ default_factory=list,
83
+ description="Rule IDs evaluated for this turn"
84
+ )
85
+ tool_calls: list[GoldenToolCallRequest] = Field(
86
+ default_factory=list,
87
+ description="Tool calls with rule citations"
88
+ )
89
+ direct_response: str | None = Field(
90
+ default=None,
91
+ description="Direct response if no tool needed"
92
+ )
93
+ rules_applied_this_turn: list[str] = Field(
94
+ default_factory=list,
95
+ description="Rules applied in this turn's response"
96
+ )
97
+ rules_excluded_this_turn: list[str] = Field(
98
+ default_factory=list,
99
+ description="Rules excluded in this turn"
100
+ )
101
+
102
+
103
+ class GoldenMultiTurnToolSynthesis(BaseModel):
104
+ """Structured output for synthesizing follow-up responses with rule tracking."""
105
+
106
+ response: str = Field(description="Natural response for follow-up")
107
+ rules_applied_this_turn: list[str] = Field(
108
+ default_factory=list,
109
+ description="Rule IDs applied in this turn"
110
+ )
111
+ rules_excluded_this_turn: list[str] = Field(
112
+ default_factory=list,
113
+ description="Rule IDs excluded in this turn"
114
+ )
115
+
116
+
117
+ # =============================================================================
118
+ # Golden Tool Call Response Generator
119
+ # =============================================================================
120
+
121
+ class GoldenToolCallResponseGenerator:
122
+ """
123
+ The Thinker for Tool Calls - Generates tool traces with grounded reasoning.
124
+
125
+ Produces tool call traces with:
126
+ - Rule citations for tool selection decisions
127
+ - Explicit reasoning linking rules to tool usage
128
+ - DAG-compliant evaluation order
129
+ - Verification-ready metadata
130
+
131
+ Examples:
132
+ >>> generator = GoldenToolCallResponseGenerator(
133
+ ... tools=[web_search_tool],
134
+ ... llm=LLM(model=OpenAI.GPT_4O_MINI),
135
+ ... simulator=tool_simulator,
136
+ ... )
137
+ >>> trace = await generator.generate_single(
138
+ ... policy_text="...",
139
+ ... logic_map=logic_map,
140
+ ... scenario=scenario,
141
+ ... )
142
+ """
143
+
144
+ # Instruction to inject when thinking mode is enabled
145
+ THINKING_INSTRUCTION = """
146
+ THINKING MODE:
147
+ Your assistant response MUST include reasoning wrapped in <think> and </think> tags.
148
+ Place your step-by-step reasoning inside the think tags BEFORE your actual response.
149
+
150
+ Format:
151
+ <think>
152
+ [Your reasoning about which rules apply, tool usage decisions, etc.]
153
+ </think>
154
+
155
+ [Your actual response to the user]
156
+ """
157
+
158
+ def __init__(
159
+ self,
160
+ tools: list[ToolDefinition],
161
+ llm: LLM | None = None,
162
+ simulator: "ToolSimulator | None" = None,
163
+ model: Model = OpenAI.GPT_4O_MINI,
164
+ thinking: bool = False,
165
+ ):
166
+ """
167
+ Initialize the Golden Tool Call Response Generator.
168
+
169
+ Args:
170
+ tools: List of available tool definitions
171
+ llm: LLM client to use (creates one if not provided)
172
+ simulator: Tool simulator for generating tool responses
173
+ model: Model to use if creating LLM
174
+ thinking: Enable thinking mode with <think> tags in responses
175
+ """
176
+ self.tools = tools
177
+ self.tools_by_name = {t.name: t for t in tools}
178
+ self.llm = llm or LLM(model=model, temperature=0.7)
179
+ self.simulator = simulator
180
+ self.thinking = thinking
181
+ self._follow_up_gen: "FollowUpGenerator | None" = None
182
+
183
+ @property
184
+ def follow_up_generator(self) -> "FollowUpGenerator":
185
+ """Lazy initialization of follow-up generator for multi-turn."""
186
+ if self._follow_up_gen is None:
187
+ from synkro.generation.follow_ups import FollowUpGenerator
188
+ self._follow_up_gen = FollowUpGenerator(llm=self.llm)
189
+ return self._follow_up_gen
190
+
191
+ def _get_tools_description(self) -> str:
192
+ """Get formatted description of all tools."""
193
+ descriptions = []
194
+ for tool in self.tools:
195
+ descriptions.append(tool.to_system_prompt())
196
+ return "\n\n".join(descriptions)
197
+
198
+ def _generate_call_id(self) -> str:
199
+ """Generate a unique tool call ID."""
200
+ return f"call_{uuid.uuid4().hex[:12]}"
201
+
202
+ def _format_logic_map(self, logic_map: LogicMap) -> str:
203
+ """Format Logic Map for prompt inclusion."""
204
+ lines = []
205
+ lines.append("RULES:")
206
+ for rule in logic_map.rules:
207
+ deps = f" [depends on: {', '.join(rule.dependencies)}]" if rule.dependencies else ""
208
+ lines.append(
209
+ f" {rule.rule_id} ({rule.category.value}): {rule.text}{deps}"
210
+ )
211
+ lines.append(f" IF: {rule.condition}")
212
+ lines.append(f" THEN: {rule.action}")
213
+ return "\n".join(lines)
214
+
215
+ async def generate_single(
216
+ self,
217
+ policy_text: str,
218
+ logic_map: LogicMap,
219
+ scenario: GoldenScenario,
220
+ target_turns: int = 1,
221
+ ) -> Trace:
222
+ """
223
+ Generate a single tool call trace with grounded reasoning.
224
+
225
+ Args:
226
+ policy_text: The policy document text
227
+ logic_map: The extracted Logic Map (DAG of rules)
228
+ scenario: The golden scenario to respond to
229
+ target_turns: Number of conversation turns (1 for single-turn,
230
+ >1 for multi-turn with follow-up questions)
231
+
232
+ Returns:
233
+ Trace with proper tool calling format and rule citations
234
+ """
235
+ if target_turns > 1:
236
+ return await self._generate_multi_turn(
237
+ policy_text, logic_map, scenario, target_turns
238
+ )
239
+
240
+ # Single-turn generation
241
+ tools_desc = self._get_tools_description()
242
+ logic_map_str = self._format_logic_map(logic_map)
243
+
244
+ # Step 1: Get LLM decision on tool usage with rule grounding
245
+ decision = await self._get_tool_decision(
246
+ policy_text, logic_map_str, scenario, tools_desc
247
+ )
248
+
249
+ # Step 2: Build the message sequence
250
+ messages = await self._build_message_sequence(
251
+ policy_text, logic_map_str, scenario, tools_desc, decision
252
+ )
253
+
254
+ # Convert GoldenScenario to base Scenario
255
+ base_scenario = scenario.to_base_scenario()
256
+
257
+ return Trace(messages=messages, scenario=base_scenario)
258
+
259
+ async def _get_tool_decision(
260
+ self,
261
+ policy_text: str,
262
+ logic_map_str: str,
263
+ scenario: GoldenScenario,
264
+ tools_desc: str,
265
+ ) -> GoldenToolDecision:
266
+ """Get the LLM's rule-grounded decision on tool usage."""
267
+ prompt = f"""You are a customer support agent deciding whether to use tools.
268
+ Your decisions must be GROUNDED in the Logic Map rules.
269
+
270
+ AVAILABLE TOOLS:
271
+ {tools_desc}
272
+
273
+ LOGIC MAP (Rules to Apply):
274
+ {logic_map_str}
275
+
276
+ POLICY GUIDELINES:
277
+ {policy_text}
278
+
279
+ SCENARIO:
280
+ Type: {scenario.scenario_type.value.upper()}
281
+ Request: {scenario.description}
282
+ Context: {scenario.context}
283
+ Target Rules: {', '.join(scenario.target_rule_ids)}
284
+
285
+ YOUR TASK:
286
+ 1. Evaluate which rules from the Logic Map apply to this scenario
287
+ 2. Determine if any rule requires information that a tool can provide
288
+ 3. If tools are needed, specify which rule requires each tool call
289
+ 4. If no tools needed, explain based on which rules why direct response is sufficient
290
+
291
+ TOOL CALLING RULES:
292
+ - Only call a tool if a SPECIFIC RULE requires information the tool can provide
293
+ - Cite the Rule ID that necessitates each tool call
294
+ - If the scenario is IRRELEVANT type, no tools should be needed
295
+ - If information is already in the context, don't call a tool for it"""
296
+
297
+ return await self.llm.generate_structured(prompt, GoldenToolDecision)
298
+
299
+ async def _build_message_sequence(
300
+ self,
301
+ policy_text: str,
302
+ logic_map_str: str,
303
+ scenario: GoldenScenario,
304
+ tools_desc: str,
305
+ decision: GoldenToolDecision,
306
+ ) -> list[Message]:
307
+ """Build the full message sequence based on the tool decision."""
308
+ messages = []
309
+
310
+ # System message with tool descriptions
311
+ system_content = f"""You are a helpful customer support agent. You have access to the following tools:
312
+
313
+ {tools_desc}
314
+
315
+ Follow the policy guidelines to assist customers effectively."""
316
+
317
+ messages.append(Message(role="system", content=system_content))
318
+
319
+ # User message
320
+ messages.append(Message(role="user", content=scenario.description))
321
+
322
+ if decision.needs_tool and decision.tool_calls:
323
+ # Assistant message with tool_calls
324
+ tool_calls = []
325
+ for tc in decision.tool_calls:
326
+ call_id = self._generate_call_id()
327
+ tool_calls.append(ToolCall(
328
+ id=call_id,
329
+ type="function",
330
+ function=ToolFunction(
331
+ name=tc.name,
332
+ arguments=tc.arguments
333
+ )
334
+ ))
335
+
336
+ messages.append(Message(
337
+ role="assistant",
338
+ content=None,
339
+ tool_calls=tool_calls
340
+ ))
341
+
342
+ # Tool response messages
343
+ tool_results = []
344
+ for tc in tool_calls:
345
+ result = await self._simulate_tool_call(tc)
346
+ tool_results.append(result)
347
+
348
+ messages.append(Message(
349
+ role="tool",
350
+ content=result,
351
+ tool_call_id=tc.id
352
+ ))
353
+
354
+ # Final assistant message synthesizing results
355
+ final_response = await self._synthesize_response(
356
+ scenario, tool_calls, tool_results, decision, policy_text, logic_map_str
357
+ )
358
+ messages.append(Message(role="assistant", content=final_response))
359
+
360
+ else:
361
+ # Direct response without tools
362
+ response = decision.direct_response or await self._generate_direct_response(
363
+ policy_text, logic_map_str, scenario
364
+ )
365
+ messages.append(Message(role="assistant", content=response))
366
+
367
+ return messages
368
+
369
+ async def _simulate_tool_call(self, tool_call: ToolCall) -> str:
370
+ """Simulate a tool response."""
371
+ if self.simulator:
372
+ return await self.simulator.simulate(tool_call)
373
+
374
+ # Fallback: generate a mock response based on tool definition
375
+ tool_name = tool_call.function.name
376
+ if tool_name in self.tools_by_name:
377
+ tool = self.tools_by_name[tool_name]
378
+ if tool.mock_responses:
379
+ import random
380
+ return random.choice(tool.mock_responses)
381
+
382
+ # Default mock response
383
+ args = json.loads(tool_call.function.arguments)
384
+ return json.dumps({
385
+ "status": "success",
386
+ "result": f"Simulated response for {tool_name}",
387
+ "query": args
388
+ })
389
+
390
+ async def _synthesize_response(
391
+ self,
392
+ scenario: GoldenScenario,
393
+ tool_calls: list[ToolCall],
394
+ tool_results: list[str],
395
+ decision: GoldenToolDecision,
396
+ policy_text: str,
397
+ logic_map_str: str,
398
+ ) -> str:
399
+ """Synthesize a natural response from tool results with rule grounding."""
400
+ # Build context of tool calls and results
401
+ tools_context = []
402
+ for tc, result in zip(tool_calls, tool_results):
403
+ tools_context.append(f"Tool: {tc.function.name}")
404
+ tools_context.append(f"Arguments: {tc.function.arguments}")
405
+ tools_context.append(f"Result: {result}")
406
+ tools_context.append("")
407
+
408
+ prompt = f"""Based on the tool results and rules, provide a helpful response.
409
+
410
+ USER REQUEST:
411
+ {scenario.description}
412
+
413
+ SCENARIO TYPE: {scenario.scenario_type.value.upper()}
414
+ TARGET RULES: {', '.join(scenario.target_rule_ids)}
415
+
416
+ TOOL RESULTS:
417
+ {chr(10).join(tools_context)}
418
+
419
+ LOGIC MAP:
420
+ {logic_map_str}
421
+
422
+ RULES EVALUATED: {', '.join(decision.rule_ids_evaluated)}
423
+
424
+ Synthesize the tool results into a natural, helpful response.
425
+ - Apply the relevant rules from the Logic Map
426
+ - Incorporate the information from the tool results
427
+ - Don't expose raw JSON or technical details
428
+ - Be conversational and helpful"""
429
+
430
+ # Inject thinking instruction if enabled
431
+ if self.thinking:
432
+ prompt = prompt + self.THINKING_INSTRUCTION
433
+
434
+ synthesis = await self.llm.generate_structured(prompt, GoldenToolSynthesis)
435
+ return synthesis.response
436
+
437
+ async def _generate_direct_response(
438
+ self,
439
+ policy_text: str,
440
+ logic_map_str: str,
441
+ scenario: GoldenScenario,
442
+ ) -> str:
443
+ """Generate a direct response when no tools are needed."""
444
+ prompt = f"""Provide a helpful response based on the rules.
445
+
446
+ USER REQUEST:
447
+ {scenario.description}
448
+
449
+ CONTEXT:
450
+ {scenario.context}
451
+
452
+ SCENARIO TYPE: {scenario.scenario_type.value.upper()}
453
+ TARGET RULES: {', '.join(scenario.target_rule_ids)}
454
+
455
+ LOGIC MAP:
456
+ {logic_map_str}
457
+
458
+ POLICY GUIDELINES:
459
+ {policy_text}
460
+
461
+ No tools are needed for this request. Provide a direct, helpful response
462
+ applying the relevant rules from the Logic Map."""
463
+
464
+ # Inject thinking instruction if enabled
465
+ if self.thinking:
466
+ prompt = prompt + self.THINKING_INSTRUCTION
467
+
468
+ synthesis = await self.llm.generate_structured(prompt, GoldenToolSynthesis)
469
+ return synthesis.response
470
+
471
+ # =========================================================================
472
+ # MULTI-TURN TOOL CALLING WITH RULE TRACKING
473
+ # =========================================================================
474
+
475
+ async def _generate_multi_turn(
476
+ self,
477
+ policy_text: str,
478
+ logic_map: LogicMap,
479
+ scenario: GoldenScenario,
480
+ target_turns: int,
481
+ ) -> Trace:
482
+ """
483
+ Generate multi-turn golden tool call trace with cumulative rule tracking.
484
+
485
+ Each turn can independently decide if new tool calls are needed.
486
+ Rules applied/excluded are tracked across all turns.
487
+
488
+ Args:
489
+ policy_text: The policy/guidelines text
490
+ logic_map: The extracted Logic Map
491
+ scenario: The golden scenario to respond to
492
+ target_turns: Number of conversation turns
493
+
494
+ Returns:
495
+ Trace with multi-turn tool calling and cumulative rule metadata
496
+ """
497
+ tools_desc = self._get_tools_description()
498
+ logic_map_str = self._format_logic_map(logic_map)
499
+
500
+ # Track cumulative rules across turns
501
+ cumulative_rules_applied: list[str] = []
502
+ cumulative_rules_excluded: list[str] = []
503
+
504
+ # Step 1: Generate initial response (Turn 1)
505
+ decision = await self._get_tool_decision(
506
+ policy_text, logic_map_str, scenario, tools_desc
507
+ )
508
+ messages = await self._build_message_sequence(
509
+ policy_text, logic_map_str, scenario, tools_desc, decision
510
+ )
511
+
512
+ # Track rules from initial turn
513
+ cumulative_rules_applied.extend(decision.rule_ids_evaluated)
514
+
515
+ # Step 2: Generate follow-up turns
516
+ for turn in range(1, target_turns):
517
+ # Generate follow-up question based on conversation so far
518
+ follow_up = await self.follow_up_generator.generate(
519
+ policy_text=policy_text,
520
+ messages=messages,
521
+ turn_index=turn,
522
+ )
523
+
524
+ # Add user message with follow-up question
525
+ messages.append(Message(role="user", content=follow_up.question))
526
+
527
+ # Get rule-grounded tool decision for this follow-up
528
+ follow_up_decision = await self._get_follow_up_tool_decision(
529
+ policy_text=policy_text,
530
+ logic_map_str=logic_map_str,
531
+ messages=messages,
532
+ follow_up_question=follow_up.question,
533
+ tools_desc=tools_desc,
534
+ cumulative_rules_applied=cumulative_rules_applied,
535
+ )
536
+
537
+ # Build response for this turn
538
+ turn_messages, turn_rules_applied, turn_rules_excluded = (
539
+ await self._build_follow_up_message_sequence(
540
+ policy_text=policy_text,
541
+ logic_map_str=logic_map_str,
542
+ messages=messages,
543
+ follow_up_question=follow_up.question,
544
+ tools_desc=tools_desc,
545
+ decision=follow_up_decision,
546
+ cumulative_rules_applied=cumulative_rules_applied,
547
+ )
548
+ )
549
+
550
+ messages.extend(turn_messages)
551
+
552
+ # Update cumulative rule tracking
553
+ cumulative_rules_applied.extend(turn_rules_applied)
554
+ cumulative_rules_excluded.extend(turn_rules_excluded)
555
+
556
+ # Deduplicate rules
557
+ unique_rules_applied = list(dict.fromkeys(cumulative_rules_applied))
558
+ unique_rules_excluded = list(dict.fromkeys(cumulative_rules_excluded))
559
+
560
+ base_scenario = scenario.to_base_scenario()
561
+
562
+ return Trace(
563
+ messages=messages,
564
+ scenario=base_scenario,
565
+ rules_applied=unique_rules_applied,
566
+ rules_excluded=unique_rules_excluded,
567
+ )
568
+
569
+ def _format_conversation_with_tools(self, messages: list[Message]) -> str:
570
+ """Format conversation including tool calls and results for context."""
571
+ formatted = []
572
+ for msg in messages:
573
+ role = msg.role.upper()
574
+
575
+ if msg.role == "assistant" and msg.tool_calls:
576
+ tool_strs = []
577
+ for tc in msg.tool_calls:
578
+ if hasattr(tc, "function"):
579
+ tool_strs.append(
580
+ f" - {tc.function.name}({tc.function.arguments})"
581
+ )
582
+ elif isinstance(tc, dict) and "function" in tc:
583
+ func = tc["function"]
584
+ tool_strs.append(
585
+ f" - {func.get('name', 'unknown')}({func.get('arguments', '{}')})"
586
+ )
587
+ else:
588
+ tool_strs.append(f" - {tc}")
589
+ formatted.append(f"ASSISTANT: [Tool Calls]\n" + "\n".join(tool_strs))
590
+ elif msg.role == "tool":
591
+ formatted.append(f"TOOL RESULT [{msg.tool_call_id}]: {msg.content}")
592
+ else:
593
+ content = msg.content or "[No content]"
594
+ formatted.append(f"{role}: {content}")
595
+
596
+ return "\n\n".join(formatted)
597
+
598
+ async def _get_follow_up_tool_decision(
599
+ self,
600
+ policy_text: str,
601
+ logic_map_str: str,
602
+ messages: list[Message],
603
+ follow_up_question: str,
604
+ tools_desc: str,
605
+ cumulative_rules_applied: list[str],
606
+ ) -> GoldenMultiTurnToolDecision:
607
+ """Get rule-grounded tool decision for a follow-up question."""
608
+ conversation_history = self._format_conversation_with_tools(messages)
609
+
610
+ prompt = GOLDEN_MULTI_TURN_TOOL_DECISION_PROMPT.format(
611
+ tools_desc=tools_desc,
612
+ logic_map_str=logic_map_str,
613
+ policy_text=policy_text,
614
+ conversation_history=conversation_history,
615
+ cumulative_rules_applied=", ".join(cumulative_rules_applied) or "None yet",
616
+ follow_up_question=follow_up_question,
617
+ )
618
+
619
+ return await self.llm.generate_structured(prompt, GoldenMultiTurnToolDecision)
620
+
621
+ async def _build_follow_up_message_sequence(
622
+ self,
623
+ policy_text: str,
624
+ logic_map_str: str,
625
+ messages: list[Message],
626
+ follow_up_question: str,
627
+ tools_desc: str,
628
+ decision: GoldenMultiTurnToolDecision,
629
+ cumulative_rules_applied: list[str],
630
+ ) -> tuple[list[Message], list[str], list[str]]:
631
+ """
632
+ Build message sequence for a follow-up turn with rule tracking.
633
+
634
+ Returns:
635
+ Tuple of (new_messages, rules_applied_this_turn, rules_excluded_this_turn)
636
+ """
637
+ new_messages = []
638
+ rules_applied: list[str] = []
639
+ rules_excluded: list[str] = []
640
+
641
+ if decision.needs_tool and decision.tool_calls:
642
+ # Assistant message with new tool_calls
643
+ tool_calls = []
644
+ for tc in decision.tool_calls:
645
+ call_id = self._generate_call_id()
646
+ tool_calls.append(
647
+ ToolCall(
648
+ id=call_id,
649
+ type="function",
650
+ function=ToolFunction(
651
+ name=tc.name,
652
+ arguments=tc.arguments,
653
+ ),
654
+ )
655
+ )
656
+
657
+ new_messages.append(
658
+ Message(role="assistant", content=None, tool_calls=tool_calls)
659
+ )
660
+
661
+ # Tool response messages
662
+ tool_results = []
663
+ for tc in tool_calls:
664
+ result = await self._simulate_tool_call(tc)
665
+ tool_results.append(result)
666
+ new_messages.append(
667
+ Message(role="tool", content=result, tool_call_id=tc.id)
668
+ )
669
+
670
+ # Final assistant message with rule-grounded synthesis
671
+ response, rules_applied, rules_excluded = (
672
+ await self._synthesize_follow_up_response(
673
+ policy_text=policy_text,
674
+ logic_map_str=logic_map_str,
675
+ messages=messages,
676
+ follow_up_question=follow_up_question,
677
+ tool_calls=tool_calls,
678
+ tool_results=tool_results,
679
+ cumulative_rules_applied=cumulative_rules_applied,
680
+ )
681
+ )
682
+ new_messages.append(Message(role="assistant", content=response))
683
+
684
+ else:
685
+ # Direct response without new tools
686
+ if decision.direct_response:
687
+ response = decision.direct_response
688
+ rules_applied = decision.rules_applied_this_turn
689
+ rules_excluded = decision.rules_excluded_this_turn
690
+ else:
691
+ response, rules_applied, rules_excluded = (
692
+ await self._synthesize_follow_up_response(
693
+ policy_text=policy_text,
694
+ logic_map_str=logic_map_str,
695
+ messages=messages,
696
+ follow_up_question=follow_up_question,
697
+ tool_calls=[],
698
+ tool_results=[],
699
+ cumulative_rules_applied=cumulative_rules_applied,
700
+ )
701
+ )
702
+ new_messages.append(Message(role="assistant", content=response))
703
+
704
+ return new_messages, rules_applied, rules_excluded
705
+
706
+ async def _synthesize_follow_up_response(
707
+ self,
708
+ policy_text: str,
709
+ logic_map_str: str,
710
+ messages: list[Message],
711
+ follow_up_question: str,
712
+ tool_calls: list[ToolCall],
713
+ tool_results: list[str],
714
+ cumulative_rules_applied: list[str],
715
+ ) -> tuple[str, list[str], list[str]]:
716
+ """
717
+ Synthesize response for a follow-up turn with rule tracking.
718
+
719
+ Returns:
720
+ Tuple of (response, rules_applied_this_turn, rules_excluded_this_turn)
721
+ """
722
+ conversation_history = self._format_conversation_with_tools(messages)
723
+
724
+ # Format new tool results if any
725
+ if tool_calls and tool_results:
726
+ new_tool_results = []
727
+ for tc, result in zip(tool_calls, tool_results):
728
+ new_tool_results.append(f"Tool: {tc.function.name}")
729
+ new_tool_results.append(f"Arguments: {tc.function.arguments}")
730
+ new_tool_results.append(f"Result: {result}")
731
+ new_tool_results.append("")
732
+ new_results_str = "\n".join(new_tool_results)
733
+ else:
734
+ new_results_str = "None (using existing information from conversation)"
735
+
736
+ prompt = GOLDEN_MULTI_TURN_TOOL_SYNTHESIS_PROMPT.format(
737
+ logic_map_str=logic_map_str,
738
+ conversation_history=conversation_history,
739
+ follow_up_question=follow_up_question,
740
+ new_tool_results=new_results_str,
741
+ cumulative_rules_applied=", ".join(cumulative_rules_applied) or "None yet",
742
+ policy_text=policy_text,
743
+ )
744
+
745
+ # Inject thinking instruction if enabled
746
+ if self.thinking:
747
+ prompt = prompt + self.THINKING_INSTRUCTION
748
+
749
+ synthesis = await self.llm.generate_structured(
750
+ prompt, GoldenMultiTurnToolSynthesis
751
+ )
752
+ return (
753
+ synthesis.response,
754
+ synthesis.rules_applied_this_turn,
755
+ synthesis.rules_excluded_this_turn,
756
+ )
757
+
758
+ async def generate(
759
+ self,
760
+ policy_text: str,
761
+ logic_map: LogicMap,
762
+ scenarios: list[GoldenScenario],
763
+ target_turns: int = 1,
764
+ ) -> list[Trace]:
765
+ """
766
+ Generate traces for multiple scenarios.
767
+
768
+ Args:
769
+ policy_text: The policy document text
770
+ logic_map: The extracted Logic Map
771
+ scenarios: List of golden scenarios
772
+ target_turns: Number of conversation turns
773
+
774
+ Returns:
775
+ List of traces with tool calling format
776
+ """
777
+ tasks = [
778
+ self.generate_single(policy_text, logic_map, s, target_turns)
779
+ for s in scenarios
780
+ ]
781
+ return await asyncio.gather(*tasks)
782
+
783
+
784
+ __all__ = [
785
+ "GoldenToolCallResponseGenerator",
786
+ "GoldenToolDecision",
787
+ "GoldenToolCallRequest",
788
+ "GoldenToolSynthesis",
789
+ "GoldenMultiTurnToolDecision",
790
+ "GoldenMultiTurnToolSynthesis",
791
+ ]