synkro 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synkro might be problematic. Click here for more details.
- synkro/__init__.py +165 -0
- synkro/cli.py +120 -0
- synkro/core/__init__.py +7 -0
- synkro/core/dataset.py +233 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +160 -0
- synkro/formatters/__init__.py +12 -0
- synkro/formatters/qa.py +85 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/generator.py +163 -0
- synkro/generation/planner.py +87 -0
- synkro/generation/responses.py +160 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +370 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +235 -0
- synkro/llm/rate_limits.py +95 -0
- synkro/models/__init__.py +43 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +15 -0
- synkro/modes/config.py +66 -0
- synkro/modes/qa.py +18 -0
- synkro/modes/sft.py +18 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +237 -0
- synkro/pipeline/runner.py +198 -0
- synkro/pipelines.py +105 -0
- synkro/prompts/__init__.py +44 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/qa_templates.py +97 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +201 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/grader.py +130 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/reporting.py +213 -0
- synkro/schemas.py +325 -0
- synkro/types/__init__.py +41 -0
- synkro/types/core.py +113 -0
- synkro/types/dataset_type.py +30 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.5.data/data/examples/__init__.py +148 -0
- synkro-0.4.5.dist-info/METADATA +221 -0
- synkro-0.4.5.dist-info/RECORD +58 -0
- synkro-0.4.5.dist-info/WHEEL +4 -0
- synkro-0.4.5.dist-info/entry_points.txt +2 -0
- synkro-0.4.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Planning for trace generation across categories."""
|
|
2
|
+
|
|
3
|
+
from synkro.llm.client import LLM
|
|
4
|
+
from synkro.models import Model, OpenAI
|
|
5
|
+
from synkro.types.core import Plan, Category
|
|
6
|
+
from synkro.prompts.templates import POLICY_PLANNING_PROMPT
|
|
7
|
+
from synkro.schemas import PolicyPlan
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Planner:
|
|
11
|
+
"""
|
|
12
|
+
Plans how to distribute trace generation across categories.
|
|
13
|
+
|
|
14
|
+
The planner analyzes the policy and creates an optimal distribution
|
|
15
|
+
of scenarios across different categories to ensure comprehensive
|
|
16
|
+
coverage.
|
|
17
|
+
|
|
18
|
+
Examples:
|
|
19
|
+
>>> planner = Planner()
|
|
20
|
+
>>> plan = await planner.plan(policy, target_traces=100)
|
|
21
|
+
>>> for cat in plan.categories:
|
|
22
|
+
... print(f"{cat.name}: {cat.traces} traces")
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, llm: LLM | None = None, model: Model = OpenAI.GPT_4O):
|
|
26
|
+
"""
|
|
27
|
+
Initialize the planner.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
llm: LLM client to use (creates one if not provided)
|
|
31
|
+
model: Model to use if creating LLM
|
|
32
|
+
"""
|
|
33
|
+
self.llm = llm or LLM(model=model)
|
|
34
|
+
|
|
35
|
+
async def plan(self, policy_text: str, target_traces: int) -> Plan:
|
|
36
|
+
"""
|
|
37
|
+
Create a generation plan for the policy.
|
|
38
|
+
|
|
39
|
+
Analyzes the policy and determines optimal category distribution.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
policy_text: The policy text to analyze
|
|
43
|
+
target_traces: Target number of traces to generate
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Plan object with categories and reasoning
|
|
47
|
+
"""
|
|
48
|
+
prompt = f"""{POLICY_PLANNING_PROMPT}
|
|
49
|
+
|
|
50
|
+
POLICY:
|
|
51
|
+
{policy_text}
|
|
52
|
+
|
|
53
|
+
TARGET TRACES: {target_traces}
|
|
54
|
+
|
|
55
|
+
Analyze the policy and create a plan with categories for generating training data."""
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
# Use structured output for reliable planning
|
|
59
|
+
parsed = await self.llm.generate_structured(prompt, PolicyPlan)
|
|
60
|
+
|
|
61
|
+
# Convert to typed objects
|
|
62
|
+
categories = [
|
|
63
|
+
Category(
|
|
64
|
+
name=c.name,
|
|
65
|
+
description=c.description,
|
|
66
|
+
count=c.traces,
|
|
67
|
+
)
|
|
68
|
+
for c in parsed.categories
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
return Plan(
|
|
72
|
+
categories=categories,
|
|
73
|
+
reasoning=parsed.reasoning,
|
|
74
|
+
)
|
|
75
|
+
except Exception:
|
|
76
|
+
# Fallback plan
|
|
77
|
+
third = target_traces // 3
|
|
78
|
+
remainder = target_traces - (third * 3)
|
|
79
|
+
return Plan(
|
|
80
|
+
categories=[
|
|
81
|
+
Category(name="Happy Path", description="Clear success cases", count=third),
|
|
82
|
+
Category(name="Edge Cases", description="Ambiguous situations", count=third),
|
|
83
|
+
Category(name="Violations", description="Clear failure cases", count=third + remainder),
|
|
84
|
+
],
|
|
85
|
+
reasoning="Default plan - unable to parse LLM response",
|
|
86
|
+
)
|
|
87
|
+
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Response generation for scenarios."""
|
|
2
|
+
|
|
3
|
+
from synkro.llm.client import LLM
|
|
4
|
+
from synkro.models import Model, OpenAI
|
|
5
|
+
from synkro.types.core import Scenario, Trace, Message
|
|
6
|
+
from synkro.prompts.templates import BATCHED_RESPONSE_PROMPT, SYSTEM_PROMPT
|
|
7
|
+
from synkro.schemas import SingleResponse
|
|
8
|
+
from synkro.parsers import parse_batched_responses, extract_content
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ResponseGenerator:
|
|
12
|
+
"""
|
|
13
|
+
Generates expert responses for scenarios.
|
|
14
|
+
|
|
15
|
+
Creates comprehensive, policy-grounded responses that demonstrate
|
|
16
|
+
deep domain understanding.
|
|
17
|
+
|
|
18
|
+
Examples:
|
|
19
|
+
>>> gen = ResponseGenerator()
|
|
20
|
+
>>> traces = await gen.generate(policy.text, scenarios)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, llm: LLM | None = None, model: Model = OpenAI.GPT_4O_MINI):
|
|
24
|
+
"""
|
|
25
|
+
Initialize the response generator.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
llm: LLM client to use (creates one if not provided)
|
|
29
|
+
model: Model to use if creating LLM
|
|
30
|
+
"""
|
|
31
|
+
self.llm = llm or LLM(model=model)
|
|
32
|
+
|
|
33
|
+
async def generate(
|
|
34
|
+
self,
|
|
35
|
+
policy_text: str,
|
|
36
|
+
scenarios: list[Scenario],
|
|
37
|
+
) -> list[Trace]:
|
|
38
|
+
"""
|
|
39
|
+
Generate responses for scenarios.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
policy_text: The policy text
|
|
43
|
+
scenarios: List of scenarios to respond to
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
List of traces with generated responses
|
|
47
|
+
"""
|
|
48
|
+
traces = []
|
|
49
|
+
|
|
50
|
+
# Generate responses one at a time for better quality
|
|
51
|
+
for scenario in scenarios:
|
|
52
|
+
trace = await self._generate_single(policy_text, scenario)
|
|
53
|
+
traces.append(trace)
|
|
54
|
+
|
|
55
|
+
return traces
|
|
56
|
+
|
|
57
|
+
async def _generate_single(
|
|
58
|
+
self,
|
|
59
|
+
policy_text: str,
|
|
60
|
+
scenario: Scenario,
|
|
61
|
+
) -> Trace:
|
|
62
|
+
"""Generate a single trace for one scenario."""
|
|
63
|
+
prompt = f"""You are a domain expert generating a training example.
|
|
64
|
+
|
|
65
|
+
Given the scenario and policy below, create a complete training example.
|
|
66
|
+
|
|
67
|
+
The assistant response must:
|
|
68
|
+
- Start with <reasoning> tags showing your thought process
|
|
69
|
+
- Cite specific policy sections that apply
|
|
70
|
+
- Give specific, actionable recommendations
|
|
71
|
+
- Address all aspects of the scenario
|
|
72
|
+
- Acknowledge edge cases and complications
|
|
73
|
+
|
|
74
|
+
SCENARIO:
|
|
75
|
+
{scenario.description}
|
|
76
|
+
|
|
77
|
+
CONTEXT:
|
|
78
|
+
{scenario.context}
|
|
79
|
+
|
|
80
|
+
POLICY:
|
|
81
|
+
{policy_text}
|
|
82
|
+
|
|
83
|
+
Generate exactly 3 messages: system, user, and assistant."""
|
|
84
|
+
|
|
85
|
+
# Use structured output for reliable JSON
|
|
86
|
+
parsed = await self.llm.generate_structured(prompt, SingleResponse)
|
|
87
|
+
messages = [
|
|
88
|
+
Message(role=m.role, content=m.content) for m in parsed.messages
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
return Trace(messages=messages, scenario=scenario)
|
|
92
|
+
|
|
93
|
+
async def generate_batch(
|
|
94
|
+
self,
|
|
95
|
+
policy_text: str,
|
|
96
|
+
scenarios: list[Scenario],
|
|
97
|
+
batch_size: int = 10,
|
|
98
|
+
) -> list[Trace]:
|
|
99
|
+
"""
|
|
100
|
+
Generate responses in batches.
|
|
101
|
+
|
|
102
|
+
More efficient than single generation for large numbers of scenarios.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
policy_text: The policy text
|
|
106
|
+
scenarios: List of scenarios to respond to
|
|
107
|
+
batch_size: Number of scenarios per batch
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
List of traces with generated responses
|
|
111
|
+
"""
|
|
112
|
+
traces = []
|
|
113
|
+
|
|
114
|
+
for i in range(0, len(scenarios), batch_size):
|
|
115
|
+
batch = scenarios[i : i + batch_size]
|
|
116
|
+
batch_traces = await self._generate_batch(policy_text, batch)
|
|
117
|
+
traces.extend(batch_traces)
|
|
118
|
+
|
|
119
|
+
return traces
|
|
120
|
+
|
|
121
|
+
async def _generate_batch(
|
|
122
|
+
self,
|
|
123
|
+
policy_text: str,
|
|
124
|
+
scenarios: list[Scenario],
|
|
125
|
+
) -> list[Trace]:
|
|
126
|
+
"""Generate traces for a batch of scenarios."""
|
|
127
|
+
scenarios_text = "\n\n".join(
|
|
128
|
+
f"SCENARIO {i}:\n{s.description}\n\nCONTEXT:\n{s.context}"
|
|
129
|
+
for i, s in enumerate(scenarios)
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
prompt = f"""{BATCHED_RESPONSE_PROMPT}
|
|
133
|
+
|
|
134
|
+
SYSTEM PROMPT TO USE:
|
|
135
|
+
{SYSTEM_PROMPT}
|
|
136
|
+
|
|
137
|
+
POLICY:
|
|
138
|
+
{policy_text}
|
|
139
|
+
|
|
140
|
+
SCENARIOS:
|
|
141
|
+
{scenarios_text}"""
|
|
142
|
+
|
|
143
|
+
response = await self.llm.generate(prompt)
|
|
144
|
+
from synkro.schemas import ScenarioOutput
|
|
145
|
+
|
|
146
|
+
scenario_outputs = [
|
|
147
|
+
ScenarioOutput(scenario=s.description, context=s.context) for s in scenarios
|
|
148
|
+
]
|
|
149
|
+
parsed = parse_batched_responses(response, len(scenarios), scenario_outputs)
|
|
150
|
+
|
|
151
|
+
traces = []
|
|
152
|
+
for i, p in enumerate(parsed):
|
|
153
|
+
scenario = scenarios[min(p["index"], len(scenarios) - 1)]
|
|
154
|
+
messages = [
|
|
155
|
+
Message(role=m.role, content=m.content) for m in p["messages"]
|
|
156
|
+
]
|
|
157
|
+
traces.append(Trace(messages=messages, scenario=scenario))
|
|
158
|
+
|
|
159
|
+
return traces
|
|
160
|
+
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Scenario generation from policy documents."""
|
|
2
|
+
|
|
3
|
+
from synkro.llm.client import LLM
|
|
4
|
+
from synkro.models import Model, OpenAI
|
|
5
|
+
from synkro.types.core import Scenario, Category
|
|
6
|
+
from synkro.prompts.templates import SCENARIO_GENERATOR_PROMPT, CATEGORY_SCENARIO_PROMPT
|
|
7
|
+
from synkro.schemas import ScenariosArray
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ScenarioGenerator:
|
|
11
|
+
"""
|
|
12
|
+
Generates realistic scenarios from policy documents.
|
|
13
|
+
|
|
14
|
+
Creates diverse scenarios that test different aspects of policy
|
|
15
|
+
understanding and compliance.
|
|
16
|
+
|
|
17
|
+
Examples:
|
|
18
|
+
>>> gen = ScenarioGenerator()
|
|
19
|
+
>>> scenarios = await gen.generate(policy.text, count=50)
|
|
20
|
+
>>> for s in scenarios:
|
|
21
|
+
... print(s.description)
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, llm: LLM | None = None, model: Model = OpenAI.GPT_4O_MINI):
|
|
25
|
+
"""
|
|
26
|
+
Initialize the scenario generator.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
llm: LLM client to use (creates one if not provided)
|
|
30
|
+
model: Model to use if creating LLM
|
|
31
|
+
"""
|
|
32
|
+
self.llm = llm or LLM(model=model)
|
|
33
|
+
self.prompt_template = SCENARIO_GENERATOR_PROMPT
|
|
34
|
+
|
|
35
|
+
async def generate(
|
|
36
|
+
self,
|
|
37
|
+
policy_text: str,
|
|
38
|
+
count: int,
|
|
39
|
+
category: Category | None = None,
|
|
40
|
+
) -> list[Scenario]:
|
|
41
|
+
"""
|
|
42
|
+
Generate scenarios from the policy.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
policy_text: The policy text
|
|
46
|
+
count: Number of scenarios to generate
|
|
47
|
+
category: Optional category to focus on
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
List of generated scenarios
|
|
51
|
+
"""
|
|
52
|
+
if category:
|
|
53
|
+
prompt = self._build_category_prompt(policy_text, count, category)
|
|
54
|
+
else:
|
|
55
|
+
prompt = self._build_general_prompt(policy_text, count)
|
|
56
|
+
|
|
57
|
+
# Use structured output for reliable scenario generation
|
|
58
|
+
parsed = await self.llm.generate_structured(prompt, ScenariosArray)
|
|
59
|
+
return [
|
|
60
|
+
Scenario(
|
|
61
|
+
description=s.scenario,
|
|
62
|
+
context=s.context,
|
|
63
|
+
category=category.name if category else None,
|
|
64
|
+
)
|
|
65
|
+
for s in parsed.scenarios[:count]
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
def _build_general_prompt(self, policy_text: str, count: int) -> str:
|
|
69
|
+
"""Build prompt for general scenario generation."""
|
|
70
|
+
return f"""{self.prompt_template}
|
|
71
|
+
|
|
72
|
+
POLICY:
|
|
73
|
+
{policy_text}
|
|
74
|
+
|
|
75
|
+
Generate exactly {count} diverse scenarios."""
|
|
76
|
+
|
|
77
|
+
def _build_category_prompt(
|
|
78
|
+
self, policy_text: str, count: int, category: Category
|
|
79
|
+
) -> str:
|
|
80
|
+
"""Build prompt for category-specific scenario generation."""
|
|
81
|
+
return f"""{CATEGORY_SCENARIO_PROMPT}
|
|
82
|
+
|
|
83
|
+
Category: {category.name}
|
|
84
|
+
Description: {category.description}
|
|
85
|
+
|
|
86
|
+
POLICY:
|
|
87
|
+
{policy_text}
|
|
88
|
+
|
|
89
|
+
Generate exactly {count} scenarios for the "{category.name}" category."""
|
|
90
|
+
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
"""Tool call response generation with JSON mode for structured outputs."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
from synkro.llm.client import LLM
|
|
10
|
+
from synkro.models import Model, OpenAI
|
|
11
|
+
from synkro.types.core import Scenario, Trace, Message
|
|
12
|
+
from synkro.types.tool import ToolCall, ToolFunction, ToolDefinition
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from synkro.generation.tool_simulator import ToolSimulator
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# =============================================================================
|
|
19
|
+
# Pydantic models for structured JSON output
|
|
20
|
+
# =============================================================================
|
|
21
|
+
|
|
22
|
+
class ToolCallRequest(BaseModel):
|
|
23
|
+
"""A single tool call request from the LLM."""
|
|
24
|
+
|
|
25
|
+
name: str = Field(description="Name of the tool to call")
|
|
26
|
+
arguments: str = Field(description="Arguments as a JSON string, e.g. '{\"query\": \"test\"}'")
|
|
27
|
+
|
|
28
|
+
def get_arguments_dict(self) -> dict:
|
|
29
|
+
"""Parse arguments JSON string to dict."""
|
|
30
|
+
return json.loads(self.arguments)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ToolCallDecision(BaseModel):
|
|
34
|
+
"""
|
|
35
|
+
Structured output for the LLM's tool calling decision.
|
|
36
|
+
|
|
37
|
+
The LLM outputs this to indicate whether tools are needed
|
|
38
|
+
and which ones to call.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
needs_tool: bool = Field(
|
|
42
|
+
description="Whether a tool call is needed to answer the user's request"
|
|
43
|
+
)
|
|
44
|
+
reasoning: str = Field(
|
|
45
|
+
description="Brief explanation of why tool is/isn't needed"
|
|
46
|
+
)
|
|
47
|
+
tool_calls: list[ToolCallRequest] = Field(
|
|
48
|
+
default_factory=list,
|
|
49
|
+
description="List of tool calls to make (empty if needs_tool is False)"
|
|
50
|
+
)
|
|
51
|
+
direct_response: str | None = Field(
|
|
52
|
+
default=None,
|
|
53
|
+
description="Direct response if no tool is needed"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class FinalSynthesis(BaseModel):
|
|
58
|
+
"""Structured output for synthesizing tool results into a response."""
|
|
59
|
+
|
|
60
|
+
response: str = Field(
|
|
61
|
+
description="Natural response incorporating the tool results"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# =============================================================================
|
|
66
|
+
# Tool Call Response Generator
|
|
67
|
+
# =============================================================================
|
|
68
|
+
|
|
69
|
+
class ToolCallResponseGenerator:
|
|
70
|
+
"""
|
|
71
|
+
Generates tool call training traces using JSON mode for structured outputs.
|
|
72
|
+
|
|
73
|
+
Produces traces in OpenAI function calling format:
|
|
74
|
+
- system message with tool descriptions
|
|
75
|
+
- user message with request
|
|
76
|
+
- assistant message with tool_calls (or direct response)
|
|
77
|
+
- tool response messages
|
|
78
|
+
- final assistant message synthesizing results
|
|
79
|
+
|
|
80
|
+
Example:
|
|
81
|
+
>>> gen = ToolCallResponseGenerator(
|
|
82
|
+
... tools=[web_search_tool, db_tool],
|
|
83
|
+
... llm=LLM(model=OpenAI.GPT_4O),
|
|
84
|
+
... simulator=tool_simulator,
|
|
85
|
+
... )
|
|
86
|
+
>>> trace = await gen.generate_single(policy_text, scenario)
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(
|
|
90
|
+
self,
|
|
91
|
+
tools: list[ToolDefinition],
|
|
92
|
+
llm: LLM | None = None,
|
|
93
|
+
simulator: "ToolSimulator | None" = None,
|
|
94
|
+
model: Model = OpenAI.GPT_4O_MINI,
|
|
95
|
+
):
|
|
96
|
+
"""
|
|
97
|
+
Initialize the tool call response generator.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
tools: List of available tool definitions
|
|
101
|
+
llm: LLM client to use (creates one if not provided)
|
|
102
|
+
simulator: Tool simulator for generating tool responses
|
|
103
|
+
model: Model to use if creating LLM
|
|
104
|
+
"""
|
|
105
|
+
self.tools = tools
|
|
106
|
+
self.tools_by_name = {t.name: t for t in tools}
|
|
107
|
+
self.llm = llm or LLM(model=model)
|
|
108
|
+
self.simulator = simulator
|
|
109
|
+
|
|
110
|
+
def _get_tools_description(self) -> str:
|
|
111
|
+
"""Get formatted description of all tools for system prompt."""
|
|
112
|
+
descriptions = []
|
|
113
|
+
for tool in self.tools:
|
|
114
|
+
descriptions.append(tool.to_system_prompt())
|
|
115
|
+
return "\n\n".join(descriptions)
|
|
116
|
+
|
|
117
|
+
def _get_tools_json_schema(self) -> str:
|
|
118
|
+
"""Get JSON schema representation of tools."""
|
|
119
|
+
tools_json = [tool.to_openai_format() for tool in self.tools]
|
|
120
|
+
return json.dumps(tools_json, indent=2)
|
|
121
|
+
|
|
122
|
+
def _generate_call_id(self) -> str:
|
|
123
|
+
"""Generate a unique tool call ID."""
|
|
124
|
+
return f"call_{uuid.uuid4().hex[:12]}"
|
|
125
|
+
|
|
126
|
+
async def generate_single(
|
|
127
|
+
self,
|
|
128
|
+
policy_text: str,
|
|
129
|
+
scenario: Scenario,
|
|
130
|
+
) -> Trace:
|
|
131
|
+
"""
|
|
132
|
+
Generate a single tool call trace.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
policy_text: The policy/guidelines text
|
|
136
|
+
scenario: The scenario to respond to
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Trace with proper tool calling format
|
|
140
|
+
"""
|
|
141
|
+
tools_desc = self._get_tools_description()
|
|
142
|
+
|
|
143
|
+
# Step 1: Get LLM decision on tool usage
|
|
144
|
+
decision = await self._get_tool_decision(policy_text, scenario, tools_desc)
|
|
145
|
+
|
|
146
|
+
# Step 2: Build the message sequence
|
|
147
|
+
messages = await self._build_message_sequence(
|
|
148
|
+
policy_text, scenario, tools_desc, decision
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
return Trace(messages=messages, scenario=scenario)
|
|
152
|
+
|
|
153
|
+
async def _get_tool_decision(
|
|
154
|
+
self,
|
|
155
|
+
policy_text: str,
|
|
156
|
+
scenario: Scenario,
|
|
157
|
+
tools_desc: str,
|
|
158
|
+
) -> ToolCallDecision:
|
|
159
|
+
"""
|
|
160
|
+
Get the LLM's decision on whether to use tools.
|
|
161
|
+
|
|
162
|
+
Uses JSON mode to force structured output.
|
|
163
|
+
"""
|
|
164
|
+
prompt = f"""You are a customer support agent deciding whether to use tools.
|
|
165
|
+
|
|
166
|
+
AVAILABLE TOOLS:
|
|
167
|
+
{tools_desc}
|
|
168
|
+
|
|
169
|
+
TOOL USAGE GUIDELINES:
|
|
170
|
+
{policy_text}
|
|
171
|
+
|
|
172
|
+
USER REQUEST:
|
|
173
|
+
{scenario.description}
|
|
174
|
+
|
|
175
|
+
CONTEXT:
|
|
176
|
+
{scenario.context}
|
|
177
|
+
|
|
178
|
+
Analyze this request and decide:
|
|
179
|
+
1. Does this require calling a tool, or can you answer directly?
|
|
180
|
+
2. If tools are needed, which ones and with what arguments?
|
|
181
|
+
3. If no tools needed, provide the direct response.
|
|
182
|
+
|
|
183
|
+
Important rules:
|
|
184
|
+
- Only call tools when necessary (don't call for information you already know)
|
|
185
|
+
- Use correct tool names and parameter types
|
|
186
|
+
- If multiple tools are needed, list them all
|
|
187
|
+
- Provide clear reasoning for your decision"""
|
|
188
|
+
|
|
189
|
+
return await self.llm.generate_structured(prompt, ToolCallDecision)
|
|
190
|
+
|
|
191
|
+
async def _build_message_sequence(
|
|
192
|
+
self,
|
|
193
|
+
policy_text: str,
|
|
194
|
+
scenario: Scenario,
|
|
195
|
+
tools_desc: str,
|
|
196
|
+
decision: ToolCallDecision,
|
|
197
|
+
) -> list[Message]:
|
|
198
|
+
"""Build the full message sequence based on the tool decision."""
|
|
199
|
+
messages = []
|
|
200
|
+
|
|
201
|
+
# System message with tool descriptions
|
|
202
|
+
system_content = f"""You are a helpful customer support agent. You have access to the following tools:
|
|
203
|
+
|
|
204
|
+
{tools_desc}
|
|
205
|
+
|
|
206
|
+
Follow the tool usage guidelines provided to assist customers effectively."""
|
|
207
|
+
|
|
208
|
+
messages.append(Message(role="system", content=system_content))
|
|
209
|
+
|
|
210
|
+
# User message
|
|
211
|
+
messages.append(Message(role="user", content=scenario.description))
|
|
212
|
+
|
|
213
|
+
if decision.needs_tool and decision.tool_calls:
|
|
214
|
+
# Assistant message with tool_calls
|
|
215
|
+
tool_calls = []
|
|
216
|
+
for tc in decision.tool_calls:
|
|
217
|
+
call_id = self._generate_call_id()
|
|
218
|
+
tool_calls.append(ToolCall(
|
|
219
|
+
id=call_id,
|
|
220
|
+
type="function",
|
|
221
|
+
function=ToolFunction(
|
|
222
|
+
name=tc.name,
|
|
223
|
+
arguments=tc.arguments # Already a JSON string
|
|
224
|
+
)
|
|
225
|
+
))
|
|
226
|
+
|
|
227
|
+
messages.append(Message(
|
|
228
|
+
role="assistant",
|
|
229
|
+
content=None,
|
|
230
|
+
tool_calls=tool_calls
|
|
231
|
+
))
|
|
232
|
+
|
|
233
|
+
# Tool response messages
|
|
234
|
+
tool_results = []
|
|
235
|
+
for tc in tool_calls:
|
|
236
|
+
result = await self._simulate_tool_call(tc)
|
|
237
|
+
tool_results.append(result)
|
|
238
|
+
|
|
239
|
+
messages.append(Message(
|
|
240
|
+
role="tool",
|
|
241
|
+
content=result,
|
|
242
|
+
tool_call_id=tc.id
|
|
243
|
+
))
|
|
244
|
+
|
|
245
|
+
# Final assistant message synthesizing results
|
|
246
|
+
final_response = await self._synthesize_response(
|
|
247
|
+
scenario.description, tool_calls, tool_results, policy_text
|
|
248
|
+
)
|
|
249
|
+
messages.append(Message(role="assistant", content=final_response))
|
|
250
|
+
|
|
251
|
+
else:
|
|
252
|
+
# Direct response without tools
|
|
253
|
+
response = decision.direct_response or await self._generate_direct_response(
|
|
254
|
+
policy_text, scenario, tools_desc
|
|
255
|
+
)
|
|
256
|
+
messages.append(Message(role="assistant", content=response))
|
|
257
|
+
|
|
258
|
+
return messages
|
|
259
|
+
|
|
260
|
+
async def _simulate_tool_call(self, tool_call: ToolCall) -> str:
|
|
261
|
+
"""Simulate a tool response."""
|
|
262
|
+
if self.simulator:
|
|
263
|
+
return await self.simulator.simulate(tool_call)
|
|
264
|
+
|
|
265
|
+
# Fallback: generate a mock response based on tool definition
|
|
266
|
+
tool_name = tool_call.function.name
|
|
267
|
+
if tool_name in self.tools_by_name:
|
|
268
|
+
tool = self.tools_by_name[tool_name]
|
|
269
|
+
if tool.mock_responses:
|
|
270
|
+
# Use a mock response
|
|
271
|
+
import random
|
|
272
|
+
return random.choice(tool.mock_responses)
|
|
273
|
+
|
|
274
|
+
# Default mock response
|
|
275
|
+
args = json.loads(tool_call.function.arguments)
|
|
276
|
+
return json.dumps({
|
|
277
|
+
"status": "success",
|
|
278
|
+
"result": f"Simulated response for {tool_name}",
|
|
279
|
+
"query": args
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
async def _synthesize_response(
|
|
283
|
+
self,
|
|
284
|
+
user_request: str,
|
|
285
|
+
tool_calls: list[ToolCall],
|
|
286
|
+
tool_results: list[str],
|
|
287
|
+
policy_text: str,
|
|
288
|
+
) -> str:
|
|
289
|
+
"""Synthesize a natural response from tool results."""
|
|
290
|
+
# Build context of tool calls and results
|
|
291
|
+
tools_context = []
|
|
292
|
+
for tc, result in zip(tool_calls, tool_results):
|
|
293
|
+
tools_context.append(f"Tool: {tc.function.name}")
|
|
294
|
+
tools_context.append(f"Arguments: {tc.function.arguments}")
|
|
295
|
+
tools_context.append(f"Result: {result}")
|
|
296
|
+
tools_context.append("")
|
|
297
|
+
|
|
298
|
+
prompt = f"""Based on the tool results, provide a helpful response to the user.
|
|
299
|
+
|
|
300
|
+
USER REQUEST:
|
|
301
|
+
{user_request}
|
|
302
|
+
|
|
303
|
+
TOOL RESULTS:
|
|
304
|
+
{chr(10).join(tools_context)}
|
|
305
|
+
|
|
306
|
+
GUIDELINES:
|
|
307
|
+
{policy_text}
|
|
308
|
+
|
|
309
|
+
Synthesize the tool results into a natural, helpful response.
|
|
310
|
+
- Incorporate the information from the tool results
|
|
311
|
+
- Don't expose raw JSON or technical details
|
|
312
|
+
- Be conversational and helpful
|
|
313
|
+
- If a tool returned an error, acknowledge it and offer alternatives"""
|
|
314
|
+
|
|
315
|
+
synthesis = await self.llm.generate_structured(prompt, FinalSynthesis)
|
|
316
|
+
return synthesis.response
|
|
317
|
+
|
|
318
|
+
async def _generate_direct_response(
|
|
319
|
+
self,
|
|
320
|
+
policy_text: str,
|
|
321
|
+
scenario: Scenario,
|
|
322
|
+
tools_desc: str,
|
|
323
|
+
) -> str:
|
|
324
|
+
"""Generate a direct response when no tools are needed."""
|
|
325
|
+
prompt = f"""Provide a helpful response to the user's request.
|
|
326
|
+
|
|
327
|
+
USER REQUEST:
|
|
328
|
+
{scenario.description}
|
|
329
|
+
|
|
330
|
+
CONTEXT:
|
|
331
|
+
{scenario.context}
|
|
332
|
+
|
|
333
|
+
GUIDELINES:
|
|
334
|
+
{policy_text}
|
|
335
|
+
|
|
336
|
+
Note: No tools are needed for this request. Provide a direct, helpful response
|
|
337
|
+
based on your knowledge and the guidelines."""
|
|
338
|
+
|
|
339
|
+
synthesis = await self.llm.generate_structured(prompt, FinalSynthesis)
|
|
340
|
+
return synthesis.response
|
|
341
|
+
|
|
342
|
+
async def generate(
|
|
343
|
+
self,
|
|
344
|
+
policy_text: str,
|
|
345
|
+
scenarios: list[Scenario],
|
|
346
|
+
) -> list[Trace]:
|
|
347
|
+
"""
|
|
348
|
+
Generate traces for multiple scenarios.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
policy_text: The policy/guidelines text
|
|
352
|
+
scenarios: List of scenarios to respond to
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
List of traces with tool calling format
|
|
356
|
+
"""
|
|
357
|
+
traces = []
|
|
358
|
+
for scenario in scenarios:
|
|
359
|
+
trace = await self.generate_single(policy_text, scenario)
|
|
360
|
+
traces.append(trace)
|
|
361
|
+
return traces
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
__all__ = [
|
|
365
|
+
"ToolCallResponseGenerator",
|
|
366
|
+
"ToolCallDecision",
|
|
367
|
+
"ToolCallRequest",
|
|
368
|
+
"FinalSynthesis",
|
|
369
|
+
]
|
|
370
|
+
|