synkro 0.4.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synkro might be problematic. Click here for more details.
- synkro/__init__.py +331 -0
- synkro/advanced.py +184 -0
- synkro/cli.py +156 -0
- synkro/core/__init__.py +7 -0
- synkro/core/checkpoint.py +250 -0
- synkro/core/dataset.py +432 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +291 -0
- synkro/formatters/__init__.py +18 -0
- synkro/formatters/chatml.py +121 -0
- synkro/formatters/langfuse.py +98 -0
- synkro/formatters/langsmith.py +98 -0
- synkro/formatters/qa.py +112 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/follow_ups.py +134 -0
- synkro/generation/generator.py +314 -0
- synkro/generation/golden_responses.py +269 -0
- synkro/generation/golden_scenarios.py +333 -0
- synkro/generation/golden_tool_responses.py +791 -0
- synkro/generation/logic_extractor.py +126 -0
- synkro/generation/multiturn_responses.py +177 -0
- synkro/generation/planner.py +131 -0
- synkro/generation/responses.py +189 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +625 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/interactive/__init__.py +16 -0
- synkro/interactive/hitl_session.py +205 -0
- synkro/interactive/intent_classifier.py +94 -0
- synkro/interactive/logic_map_editor.py +176 -0
- synkro/interactive/rich_ui.py +459 -0
- synkro/interactive/scenario_editor.py +198 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +309 -0
- synkro/llm/rate_limits.py +99 -0
- synkro/models/__init__.py +50 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/local.py +104 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +13 -0
- synkro/modes/config.py +66 -0
- synkro/modes/conversation.py +35 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +592 -0
- synkro/pipeline/runner.py +769 -0
- synkro/pipelines.py +136 -0
- synkro/prompts/__init__.py +57 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/golden_templates.py +533 -0
- synkro/prompts/interactive_templates.py +198 -0
- synkro/prompts/multiturn_templates.py +156 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +318 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/golden_refiner.py +163 -0
- synkro/quality/grader.py +153 -0
- synkro/quality/multiturn_grader.py +150 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/quality/verifier.py +228 -0
- synkro/reporting.py +464 -0
- synkro/schemas.py +521 -0
- synkro/types/__init__.py +43 -0
- synkro/types/core.py +153 -0
- synkro/types/dataset_type.py +33 -0
- synkro/types/logic_map.py +348 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.36.data/data/examples/__init__.py +148 -0
- synkro-0.4.36.dist-info/METADATA +507 -0
- synkro-0.4.36.dist-info/RECORD +81 -0
- synkro-0.4.36.dist-info/WHEEL +4 -0
- synkro-0.4.36.dist-info/entry_points.txt +2 -0
- synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
synkro/factory.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""Component factory for dependency injection.
|
|
2
|
+
|
|
3
|
+
This module provides a factory for creating pipeline components,
|
|
4
|
+
enabling testability and flexible configuration.
|
|
5
|
+
|
|
6
|
+
Supports both legacy components and Golden Trace components:
|
|
7
|
+
- Logic Extractor (The Cartographer)
|
|
8
|
+
- Golden Scenario Generator (The Adversary)
|
|
9
|
+
- Golden Response Generator (The Thinker)
|
|
10
|
+
- Trace Verifier (The Auditor)
|
|
11
|
+
- Golden Refiner
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import TYPE_CHECKING
|
|
15
|
+
|
|
16
|
+
from synkro.llm.client import LLM
|
|
17
|
+
from synkro.modes.config import ModeConfig
|
|
18
|
+
from synkro.generation.planner import Planner
|
|
19
|
+
from synkro.generation.scenarios import ScenarioGenerator
|
|
20
|
+
from synkro.generation.responses import ResponseGenerator
|
|
21
|
+
from synkro.generation.follow_ups import FollowUpGenerator
|
|
22
|
+
from synkro.generation.multiturn_responses import MultiTurnResponseGenerator
|
|
23
|
+
from synkro.quality.grader import Grader
|
|
24
|
+
from synkro.quality.refiner import Refiner
|
|
25
|
+
from synkro.quality.multiturn_grader import MultiTurnGrader
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from synkro.types.tool import ToolDefinition
|
|
29
|
+
from synkro.generation.tool_simulator import ToolSimulator
|
|
30
|
+
from synkro.generation.tool_responses import ToolCallResponseGenerator
|
|
31
|
+
from synkro.quality.tool_grader import ToolCallGrader
|
|
32
|
+
from synkro.quality.tool_refiner import ToolCallRefiner
|
|
33
|
+
from synkro.generation.logic_extractor import LogicExtractor
|
|
34
|
+
from synkro.generation.golden_scenarios import GoldenScenarioGenerator
|
|
35
|
+
from synkro.generation.golden_responses import GoldenResponseGenerator
|
|
36
|
+
from synkro.generation.golden_tool_responses import GoldenToolCallResponseGenerator
|
|
37
|
+
from synkro.quality.verifier import TraceVerifier
|
|
38
|
+
from synkro.quality.golden_refiner import GoldenRefiner
|
|
39
|
+
from synkro.interactive.logic_map_editor import LogicMapEditor
|
|
40
|
+
from synkro.interactive.scenario_editor import ScenarioEditor
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ComponentFactory:
|
|
44
|
+
"""
|
|
45
|
+
Factory for creating pipeline components with shared LLM clients.
|
|
46
|
+
|
|
47
|
+
This centralizes component creation and ensures consistent configuration
|
|
48
|
+
across the pipeline.
|
|
49
|
+
|
|
50
|
+
Examples:
|
|
51
|
+
>>> factory = ComponentFactory(gen_llm, grade_llm, mode_config)
|
|
52
|
+
>>> planner = factory.create_planner()
|
|
53
|
+
>>> grader = factory.create_grader()
|
|
54
|
+
|
|
55
|
+
>>> # With tools for tool_call dataset type
|
|
56
|
+
>>> factory = ComponentFactory(gen_llm, grade_llm, mode_config, tools=[...])
|
|
57
|
+
>>> simulator = factory.create_tool_simulator()
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
generation_llm: LLM,
|
|
63
|
+
grading_llm: LLM,
|
|
64
|
+
mode_config: ModeConfig,
|
|
65
|
+
tools: list["ToolDefinition"] | None = None,
|
|
66
|
+
thinking: bool = False,
|
|
67
|
+
):
|
|
68
|
+
"""
|
|
69
|
+
Initialize the factory.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
generation_llm: LLM client for generation tasks (scenarios, responses, refinement)
|
|
73
|
+
grading_llm: LLM client for grading and planning (typically stronger model)
|
|
74
|
+
mode_config: Configuration for the dataset type (prompts, etc.)
|
|
75
|
+
tools: Optional list of tool definitions for tool_call dataset type
|
|
76
|
+
thinking: Enable thinking mode with <think> tags in responses
|
|
77
|
+
"""
|
|
78
|
+
self.generation_llm = generation_llm
|
|
79
|
+
self.grading_llm = grading_llm
|
|
80
|
+
self.mode_config = mode_config
|
|
81
|
+
self.tools = tools or []
|
|
82
|
+
self.thinking = thinking
|
|
83
|
+
|
|
84
|
+
def create_planner(self) -> Planner:
|
|
85
|
+
"""Create a Planner instance."""
|
|
86
|
+
return Planner(llm=self.grading_llm)
|
|
87
|
+
|
|
88
|
+
def create_scenario_generator(self) -> ScenarioGenerator:
|
|
89
|
+
"""Create a ScenarioGenerator with mode-specific prompts."""
|
|
90
|
+
gen = ScenarioGenerator(llm=self.generation_llm)
|
|
91
|
+
gen.prompt_template = self.mode_config.scenario_prompt
|
|
92
|
+
return gen
|
|
93
|
+
|
|
94
|
+
def create_response_generator(self) -> ResponseGenerator:
|
|
95
|
+
"""Create a ResponseGenerator with mode-specific prompts."""
|
|
96
|
+
gen = ResponseGenerator(llm=self.generation_llm)
|
|
97
|
+
gen.prompt_template = self.mode_config.response_prompt
|
|
98
|
+
return gen
|
|
99
|
+
|
|
100
|
+
def create_grader(self) -> "Grader | ToolCallGrader":
|
|
101
|
+
"""
|
|
102
|
+
Create a Grader with mode-specific prompts.
|
|
103
|
+
|
|
104
|
+
Auto-selects ToolCallGrader when tools are configured.
|
|
105
|
+
"""
|
|
106
|
+
if self.has_tools:
|
|
107
|
+
from synkro.quality.tool_grader import ToolCallGrader
|
|
108
|
+
return ToolCallGrader(llm=self.grading_llm, tools=self.tools)
|
|
109
|
+
|
|
110
|
+
grader = Grader(llm=self.grading_llm)
|
|
111
|
+
grader.prompt_template = self.mode_config.grade_prompt
|
|
112
|
+
return grader
|
|
113
|
+
|
|
114
|
+
def create_refiner(self) -> "Refiner | ToolCallRefiner":
|
|
115
|
+
"""
|
|
116
|
+
Create a Refiner with mode-specific prompts.
|
|
117
|
+
|
|
118
|
+
Auto-selects ToolCallRefiner when tools are configured.
|
|
119
|
+
This ensures tool_calls format is preserved during refinement.
|
|
120
|
+
"""
|
|
121
|
+
if self.has_tools:
|
|
122
|
+
from synkro.quality.tool_refiner import ToolCallRefiner
|
|
123
|
+
simulator = self.create_tool_simulator()
|
|
124
|
+
return ToolCallRefiner(
|
|
125
|
+
llm=self.generation_llm,
|
|
126
|
+
tools=self.tools,
|
|
127
|
+
simulator=simulator,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
refiner = Refiner(llm=self.generation_llm)
|
|
131
|
+
refiner.prompt_template = self.mode_config.refine_prompt
|
|
132
|
+
return refiner
|
|
133
|
+
|
|
134
|
+
def create_tool_simulator(self) -> "ToolSimulator":
|
|
135
|
+
"""Create a ToolSimulator instance for tool_call dataset type."""
|
|
136
|
+
from synkro.generation.tool_simulator import ToolSimulator
|
|
137
|
+
|
|
138
|
+
if not self.tools:
|
|
139
|
+
raise ValueError("Cannot create ToolSimulator without tools")
|
|
140
|
+
|
|
141
|
+
return ToolSimulator(tools=self.tools, llm=self.generation_llm)
|
|
142
|
+
|
|
143
|
+
def create_tool_call_response_generator(self) -> "ToolCallResponseGenerator":
|
|
144
|
+
"""
|
|
145
|
+
Create a ToolCallResponseGenerator for generating proper tool call traces.
|
|
146
|
+
|
|
147
|
+
This generator uses JSON mode to produce structured tool calls in
|
|
148
|
+
OpenAI function calling format.
|
|
149
|
+
"""
|
|
150
|
+
from synkro.generation.tool_responses import ToolCallResponseGenerator
|
|
151
|
+
|
|
152
|
+
if not self.tools:
|
|
153
|
+
raise ValueError("Cannot create ToolCallResponseGenerator without tools")
|
|
154
|
+
|
|
155
|
+
# Create simulator for generating tool responses
|
|
156
|
+
simulator = self.create_tool_simulator()
|
|
157
|
+
|
|
158
|
+
return ToolCallResponseGenerator(
|
|
159
|
+
tools=self.tools,
|
|
160
|
+
llm=self.generation_llm,
|
|
161
|
+
simulator=simulator,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def get_tools_description(self) -> str:
|
|
165
|
+
"""Get formatted description of all available tools."""
|
|
166
|
+
if not self.tools:
|
|
167
|
+
return "No tools available"
|
|
168
|
+
|
|
169
|
+
descriptions = []
|
|
170
|
+
for tool in self.tools:
|
|
171
|
+
descriptions.append(tool.to_system_prompt())
|
|
172
|
+
return "\n\n".join(descriptions)
|
|
173
|
+
|
|
174
|
+
@property
|
|
175
|
+
def has_tools(self) -> bool:
|
|
176
|
+
"""Check if tools are configured."""
|
|
177
|
+
return bool(self.tools)
|
|
178
|
+
|
|
179
|
+
def create_follow_up_generator(self) -> FollowUpGenerator:
|
|
180
|
+
"""Create a FollowUpGenerator for multi-turn conversations."""
|
|
181
|
+
return FollowUpGenerator(llm=self.generation_llm)
|
|
182
|
+
|
|
183
|
+
def create_multi_turn_response_generator(self) -> MultiTurnResponseGenerator:
|
|
184
|
+
"""Create a MultiTurnResponseGenerator for multi-turn trace generation."""
|
|
185
|
+
return MultiTurnResponseGenerator(llm=self.generation_llm)
|
|
186
|
+
|
|
187
|
+
def create_multi_turn_grader(self) -> MultiTurnGrader:
|
|
188
|
+
"""Create a MultiTurnGrader for per-turn and overall conversation grading."""
|
|
189
|
+
return MultiTurnGrader(llm=self.grading_llm)
|
|
190
|
+
|
|
191
|
+
# =========================================================================
|
|
192
|
+
# GOLDEN TRACE COMPONENTS
|
|
193
|
+
# =========================================================================
|
|
194
|
+
|
|
195
|
+
def create_logic_extractor(self) -> "LogicExtractor":
|
|
196
|
+
"""
|
|
197
|
+
Create a LogicExtractor (The Cartographer).
|
|
198
|
+
|
|
199
|
+
Uses the grading LLM (stronger model) for accurate rule extraction.
|
|
200
|
+
"""
|
|
201
|
+
from synkro.generation.logic_extractor import LogicExtractor
|
|
202
|
+
return LogicExtractor(llm=self.grading_llm)
|
|
203
|
+
|
|
204
|
+
def create_golden_scenario_generator(self) -> "GoldenScenarioGenerator":
|
|
205
|
+
"""
|
|
206
|
+
Create a GoldenScenarioGenerator (The Adversary).
|
|
207
|
+
|
|
208
|
+
Generates typed scenarios (positive, negative, edge_case, irrelevant)
|
|
209
|
+
with rule targeting.
|
|
210
|
+
"""
|
|
211
|
+
from synkro.generation.golden_scenarios import GoldenScenarioGenerator
|
|
212
|
+
return GoldenScenarioGenerator(llm=self.generation_llm)
|
|
213
|
+
|
|
214
|
+
def create_golden_response_generator(self) -> "GoldenResponseGenerator":
|
|
215
|
+
"""
|
|
216
|
+
Create a GoldenResponseGenerator (The Thinker).
|
|
217
|
+
|
|
218
|
+
Generates traces with grounded Chain-of-Thought reasoning
|
|
219
|
+
and rule citations.
|
|
220
|
+
"""
|
|
221
|
+
from synkro.generation.golden_responses import GoldenResponseGenerator
|
|
222
|
+
return GoldenResponseGenerator(llm=self.generation_llm, thinking=self.thinking)
|
|
223
|
+
|
|
224
|
+
def create_golden_tool_call_generator(self) -> "GoldenToolCallResponseGenerator":
|
|
225
|
+
"""
|
|
226
|
+
Create a GoldenToolCallResponseGenerator (The Thinker for Tools).
|
|
227
|
+
|
|
228
|
+
Generates tool call traces with rule citations for tool selection
|
|
229
|
+
decisions. Requires tools to be configured.
|
|
230
|
+
"""
|
|
231
|
+
from synkro.generation.golden_tool_responses import GoldenToolCallResponseGenerator
|
|
232
|
+
|
|
233
|
+
if not self.tools:
|
|
234
|
+
raise ValueError("Cannot create GoldenToolCallResponseGenerator without tools")
|
|
235
|
+
|
|
236
|
+
simulator = self.create_tool_simulator()
|
|
237
|
+
return GoldenToolCallResponseGenerator(
|
|
238
|
+
tools=self.tools,
|
|
239
|
+
llm=self.generation_llm,
|
|
240
|
+
simulator=simulator,
|
|
241
|
+
thinking=self.thinking,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
def create_verifier(self) -> "TraceVerifier":
|
|
245
|
+
"""
|
|
246
|
+
Create a TraceVerifier (The Auditor).
|
|
247
|
+
|
|
248
|
+
Verifies traces against the Logic Map to ensure:
|
|
249
|
+
- No skipped rules
|
|
250
|
+
- No hallucinated rules
|
|
251
|
+
- No contradictions
|
|
252
|
+
- DAG compliance
|
|
253
|
+
|
|
254
|
+
Uses the grading LLM (stronger model) for accurate verification.
|
|
255
|
+
"""
|
|
256
|
+
from synkro.quality.verifier import TraceVerifier
|
|
257
|
+
return TraceVerifier(llm=self.grading_llm)
|
|
258
|
+
|
|
259
|
+
def create_golden_refiner(self) -> "GoldenRefiner":
|
|
260
|
+
"""
|
|
261
|
+
Create a GoldenRefiner.
|
|
262
|
+
|
|
263
|
+
Refines traces that fail verification, using Logic Map context
|
|
264
|
+
to fix skipped rules, hallucinations, and contradictions.
|
|
265
|
+
"""
|
|
266
|
+
from synkro.quality.golden_refiner import GoldenRefiner
|
|
267
|
+
return GoldenRefiner(llm=self.generation_llm)
|
|
268
|
+
|
|
269
|
+
def create_logic_map_editor(self) -> "LogicMapEditor":
|
|
270
|
+
"""
|
|
271
|
+
Create a LogicMapEditor for Human-in-the-Loop sessions.
|
|
272
|
+
|
|
273
|
+
The editor uses the grading LLM (stronger model) to interpret
|
|
274
|
+
natural language feedback and refine Logic Maps.
|
|
275
|
+
"""
|
|
276
|
+
from synkro.interactive.logic_map_editor import LogicMapEditor
|
|
277
|
+
return LogicMapEditor(llm=self.grading_llm)
|
|
278
|
+
|
|
279
|
+
def create_scenario_editor(self) -> "ScenarioEditor":
|
|
280
|
+
"""
|
|
281
|
+
Create a ScenarioEditor for Human-in-the-Loop scenario editing.
|
|
282
|
+
|
|
283
|
+
The editor uses the grading LLM (stronger model) to interpret
|
|
284
|
+
natural language feedback and refine scenarios.
|
|
285
|
+
"""
|
|
286
|
+
from synkro.interactive.scenario_editor import ScenarioEditor
|
|
287
|
+
return ScenarioEditor(llm=self.grading_llm)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
__all__ = ["ComponentFactory"]
|
|
291
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Output formatters for different training data formats."""
|
|
2
|
+
|
|
3
|
+
from synkro.formatters.sft import SFTFormatter
|
|
4
|
+
from synkro.formatters.tool_call import ToolCallFormatter
|
|
5
|
+
from synkro.formatters.chatml import ChatMLFormatter
|
|
6
|
+
from synkro.formatters.qa import QAFormatter
|
|
7
|
+
from synkro.formatters.langsmith import LangSmithFormatter
|
|
8
|
+
from synkro.formatters.langfuse import LangfuseFormatter
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"SFTFormatter",
|
|
12
|
+
"ToolCallFormatter",
|
|
13
|
+
"ChatMLFormatter",
|
|
14
|
+
"QAFormatter",
|
|
15
|
+
"LangSmithFormatter",
|
|
16
|
+
"LangfuseFormatter",
|
|
17
|
+
]
|
|
18
|
+
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""ChatML formatter with XML tags for tool calling."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from synkro.types.core import Trace
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ChatMLFormatter:
|
|
12
|
+
"""
|
|
13
|
+
Format traces as ChatML with XML tags for tool calls.
|
|
14
|
+
|
|
15
|
+
Uses <tool_call> and <tool_response> XML tags for tool interactions,
|
|
16
|
+
compatible with Hermes/Mistral style fine-tuning.
|
|
17
|
+
|
|
18
|
+
Example output:
|
|
19
|
+
{
|
|
20
|
+
"messages": [
|
|
21
|
+
{"role": "system", "content": "You have access to tools."},
|
|
22
|
+
{"role": "user", "content": "What's the weather in NYC?"},
|
|
23
|
+
{"role": "assistant", "content": "<tool_call>\\n{\"name\": \"get_weather\", \"arguments\": {\"city\": \"NYC\"}}\\n</tool_call>"},
|
|
24
|
+
{"role": "tool", "content": "<tool_response>\\n{\"temp\": \"72F\"}\\n</tool_response>"},
|
|
25
|
+
{"role": "assistant", "content": "The weather in NYC is 72°F."}
|
|
26
|
+
]
|
|
27
|
+
}
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, include_metadata: bool = False):
|
|
31
|
+
"""
|
|
32
|
+
Initialize the ChatMLFormatter.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
include_metadata: If True, include trace metadata in output
|
|
36
|
+
"""
|
|
37
|
+
self.include_metadata = include_metadata
|
|
38
|
+
|
|
39
|
+
def format(self, traces: list["Trace"]) -> list[dict]:
|
|
40
|
+
"""
|
|
41
|
+
Format traces as ChatML with XML tags.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
traces: List of traces to format
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
List of formatted examples
|
|
48
|
+
"""
|
|
49
|
+
examples = []
|
|
50
|
+
|
|
51
|
+
for trace in traces:
|
|
52
|
+
messages = []
|
|
53
|
+
|
|
54
|
+
for m in trace.messages:
|
|
55
|
+
# Handle assistant messages with tool calls
|
|
56
|
+
if m.role == "assistant" and m.tool_calls:
|
|
57
|
+
# Convert tool calls to XML format
|
|
58
|
+
tool_call_xmls = []
|
|
59
|
+
for tc in m.tool_calls:
|
|
60
|
+
tool_call_json = json.dumps({
|
|
61
|
+
"name": tc.function.name,
|
|
62
|
+
"arguments": json.loads(tc.function.arguments)
|
|
63
|
+
})
|
|
64
|
+
tool_call_xmls.append(f"<tool_call>\n{tool_call_json}\n</tool_call>")
|
|
65
|
+
|
|
66
|
+
content = "\n".join(tool_call_xmls)
|
|
67
|
+
messages.append({"role": "assistant", "content": content})
|
|
68
|
+
|
|
69
|
+
# Handle tool responses
|
|
70
|
+
elif m.role == "tool":
|
|
71
|
+
content = f"<tool_response>\n{m.content}\n</tool_response>"
|
|
72
|
+
messages.append({"role": "tool", "content": content})
|
|
73
|
+
|
|
74
|
+
# Regular messages (system, user, assistant without tools)
|
|
75
|
+
else:
|
|
76
|
+
messages.append({
|
|
77
|
+
"role": m.role,
|
|
78
|
+
"content": m.content or ""
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
example = {"messages": messages}
|
|
82
|
+
|
|
83
|
+
if self.include_metadata:
|
|
84
|
+
example["metadata"] = {
|
|
85
|
+
"scenario": trace.scenario.description,
|
|
86
|
+
"category": trace.scenario.category,
|
|
87
|
+
"grade": trace.grade.model_dump() if trace.grade else None,
|
|
88
|
+
"has_tool_calls": trace.has_tool_calls,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
examples.append(example)
|
|
92
|
+
|
|
93
|
+
return examples
|
|
94
|
+
|
|
95
|
+
def save(self, traces: list["Trace"], path: str | Path) -> None:
|
|
96
|
+
"""
|
|
97
|
+
Save formatted traces to a JSONL file.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
traces: List of traces to save
|
|
101
|
+
path: Output file path
|
|
102
|
+
"""
|
|
103
|
+
path = Path(path)
|
|
104
|
+
examples = self.format(traces)
|
|
105
|
+
|
|
106
|
+
with open(path, "w") as f:
|
|
107
|
+
for example in examples:
|
|
108
|
+
f.write(json.dumps(example) + "\n")
|
|
109
|
+
|
|
110
|
+
def to_jsonl(self, traces: list["Trace"]) -> str:
|
|
111
|
+
"""
|
|
112
|
+
Convert traces to JSONL string.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
traces: List of traces to convert
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
JSONL formatted string
|
|
119
|
+
"""
|
|
120
|
+
examples = self.format(traces)
|
|
121
|
+
return "\n".join(json.dumps(e) for e in examples)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Langfuse formatter for evaluation datasets."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from synkro.types.core import Trace
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LangfuseFormatter:
|
|
12
|
+
"""
|
|
13
|
+
Format traces for Langfuse datasets.
|
|
14
|
+
|
|
15
|
+
Langfuse format uses input/expectedOutput structure:
|
|
16
|
+
- input: any JSON object with input data
|
|
17
|
+
- expectedOutput: any JSON object with expected output
|
|
18
|
+
- metadata: optional key-value pairs
|
|
19
|
+
|
|
20
|
+
Example output:
|
|
21
|
+
{
|
|
22
|
+
"input": {
|
|
23
|
+
"question": "Can I submit a $200 expense without a receipt?",
|
|
24
|
+
"context": "Expense: $200, No receipt"
|
|
25
|
+
},
|
|
26
|
+
"expectedOutput": {
|
|
27
|
+
"answer": "All expenses require receipts...",
|
|
28
|
+
"expected_outcome": "Deny - missing receipt"
|
|
29
|
+
},
|
|
30
|
+
"metadata": {
|
|
31
|
+
"ground_truth_rules": ["R003"],
|
|
32
|
+
"difficulty": "negative",
|
|
33
|
+
"category": "Receipt Requirements"
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def format(self, traces: list["Trace"]) -> list[dict]:
|
|
39
|
+
"""
|
|
40
|
+
Format traces as Langfuse dataset items.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
traces: List of traces to format
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
List of Langfuse-compatible dataset items
|
|
47
|
+
"""
|
|
48
|
+
examples = []
|
|
49
|
+
|
|
50
|
+
for trace in traces:
|
|
51
|
+
example = {
|
|
52
|
+
"input": {
|
|
53
|
+
"question": trace.user_message,
|
|
54
|
+
"context": trace.scenario.context or "",
|
|
55
|
+
},
|
|
56
|
+
"expectedOutput": {
|
|
57
|
+
"answer": trace.assistant_message,
|
|
58
|
+
"expected_outcome": trace.scenario.expected_outcome or "",
|
|
59
|
+
},
|
|
60
|
+
"metadata": {
|
|
61
|
+
"ground_truth_rules": trace.scenario.target_rule_ids or [],
|
|
62
|
+
"difficulty": trace.scenario.scenario_type or "unknown",
|
|
63
|
+
"category": trace.scenario.category or "",
|
|
64
|
+
"passed": trace.grade.passed if trace.grade else None,
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
examples.append(example)
|
|
69
|
+
|
|
70
|
+
return examples
|
|
71
|
+
|
|
72
|
+
def save(self, traces: list["Trace"], path: str | Path) -> None:
|
|
73
|
+
"""
|
|
74
|
+
Save formatted traces to a JSONL file.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
traces: List of traces to save
|
|
78
|
+
path: Output file path
|
|
79
|
+
"""
|
|
80
|
+
path = Path(path)
|
|
81
|
+
examples = self.format(traces)
|
|
82
|
+
|
|
83
|
+
with open(path, "w") as f:
|
|
84
|
+
for example in examples:
|
|
85
|
+
f.write(json.dumps(example) + "\n")
|
|
86
|
+
|
|
87
|
+
def to_jsonl(self, traces: list["Trace"]) -> str:
|
|
88
|
+
"""
|
|
89
|
+
Convert traces to JSONL string.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
traces: List of traces to convert
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
JSONL formatted string
|
|
96
|
+
"""
|
|
97
|
+
examples = self.format(traces)
|
|
98
|
+
return "\n".join(json.dumps(e) for e in examples)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""LangSmith formatter for evaluation datasets."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from synkro.types.core import Trace
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LangSmithFormatter:
|
|
12
|
+
"""
|
|
13
|
+
Format traces for LangSmith datasets.
|
|
14
|
+
|
|
15
|
+
LangSmith format uses nested inputs/outputs structure:
|
|
16
|
+
- inputs: dict of input fields
|
|
17
|
+
- outputs: dict of expected output fields
|
|
18
|
+
- metadata: optional additional info
|
|
19
|
+
|
|
20
|
+
Example output:
|
|
21
|
+
{
|
|
22
|
+
"inputs": {
|
|
23
|
+
"question": "Can I submit a $200 expense without a receipt?",
|
|
24
|
+
"context": "Expense: $200, No receipt"
|
|
25
|
+
},
|
|
26
|
+
"outputs": {
|
|
27
|
+
"answer": "All expenses require receipts..."
|
|
28
|
+
},
|
|
29
|
+
"metadata": {
|
|
30
|
+
"expected_outcome": "Deny - missing receipt",
|
|
31
|
+
"ground_truth_rules": ["R003"],
|
|
32
|
+
"difficulty": "negative",
|
|
33
|
+
"category": "Receipt Requirements"
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def format(self, traces: list["Trace"]) -> list[dict]:
|
|
39
|
+
"""
|
|
40
|
+
Format traces as LangSmith dataset examples.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
traces: List of traces to format
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
List of LangSmith-compatible examples
|
|
47
|
+
"""
|
|
48
|
+
examples = []
|
|
49
|
+
|
|
50
|
+
for trace in traces:
|
|
51
|
+
example = {
|
|
52
|
+
"inputs": {
|
|
53
|
+
"question": trace.user_message,
|
|
54
|
+
"context": trace.scenario.context or "",
|
|
55
|
+
},
|
|
56
|
+
"outputs": {
|
|
57
|
+
"answer": trace.assistant_message,
|
|
58
|
+
},
|
|
59
|
+
"metadata": {
|
|
60
|
+
"expected_outcome": trace.scenario.expected_outcome or "",
|
|
61
|
+
"ground_truth_rules": trace.scenario.target_rule_ids or [],
|
|
62
|
+
"difficulty": trace.scenario.scenario_type or "unknown",
|
|
63
|
+
"category": trace.scenario.category or "",
|
|
64
|
+
"passed": trace.grade.passed if trace.grade else None,
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
examples.append(example)
|
|
69
|
+
|
|
70
|
+
return examples
|
|
71
|
+
|
|
72
|
+
def save(self, traces: list["Trace"], path: str | Path) -> None:
|
|
73
|
+
"""
|
|
74
|
+
Save formatted traces to a JSONL file.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
traces: List of traces to save
|
|
78
|
+
path: Output file path
|
|
79
|
+
"""
|
|
80
|
+
path = Path(path)
|
|
81
|
+
examples = self.format(traces)
|
|
82
|
+
|
|
83
|
+
with open(path, "w") as f:
|
|
84
|
+
for example in examples:
|
|
85
|
+
f.write(json.dumps(example) + "\n")
|
|
86
|
+
|
|
87
|
+
def to_jsonl(self, traces: list["Trace"]) -> str:
|
|
88
|
+
"""
|
|
89
|
+
Convert traces to JSONL string.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
traces: List of traces to convert
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
JSONL formatted string
|
|
96
|
+
"""
|
|
97
|
+
examples = self.format(traces)
|
|
98
|
+
return "\n".join(json.dumps(e) for e in examples)
|