synkro 0.4.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synkro might be problematic. Click here for more details.

Files changed (81) hide show
  1. synkro/__init__.py +331 -0
  2. synkro/advanced.py +184 -0
  3. synkro/cli.py +156 -0
  4. synkro/core/__init__.py +7 -0
  5. synkro/core/checkpoint.py +250 -0
  6. synkro/core/dataset.py +432 -0
  7. synkro/core/policy.py +337 -0
  8. synkro/errors.py +178 -0
  9. synkro/examples/__init__.py +148 -0
  10. synkro/factory.py +291 -0
  11. synkro/formatters/__init__.py +18 -0
  12. synkro/formatters/chatml.py +121 -0
  13. synkro/formatters/langfuse.py +98 -0
  14. synkro/formatters/langsmith.py +98 -0
  15. synkro/formatters/qa.py +112 -0
  16. synkro/formatters/sft.py +90 -0
  17. synkro/formatters/tool_call.py +127 -0
  18. synkro/generation/__init__.py +9 -0
  19. synkro/generation/follow_ups.py +134 -0
  20. synkro/generation/generator.py +314 -0
  21. synkro/generation/golden_responses.py +269 -0
  22. synkro/generation/golden_scenarios.py +333 -0
  23. synkro/generation/golden_tool_responses.py +791 -0
  24. synkro/generation/logic_extractor.py +126 -0
  25. synkro/generation/multiturn_responses.py +177 -0
  26. synkro/generation/planner.py +131 -0
  27. synkro/generation/responses.py +189 -0
  28. synkro/generation/scenarios.py +90 -0
  29. synkro/generation/tool_responses.py +625 -0
  30. synkro/generation/tool_simulator.py +114 -0
  31. synkro/interactive/__init__.py +16 -0
  32. synkro/interactive/hitl_session.py +205 -0
  33. synkro/interactive/intent_classifier.py +94 -0
  34. synkro/interactive/logic_map_editor.py +176 -0
  35. synkro/interactive/rich_ui.py +459 -0
  36. synkro/interactive/scenario_editor.py +198 -0
  37. synkro/llm/__init__.py +7 -0
  38. synkro/llm/client.py +309 -0
  39. synkro/llm/rate_limits.py +99 -0
  40. synkro/models/__init__.py +50 -0
  41. synkro/models/anthropic.py +26 -0
  42. synkro/models/google.py +19 -0
  43. synkro/models/local.py +104 -0
  44. synkro/models/openai.py +31 -0
  45. synkro/modes/__init__.py +13 -0
  46. synkro/modes/config.py +66 -0
  47. synkro/modes/conversation.py +35 -0
  48. synkro/modes/tool_call.py +18 -0
  49. synkro/parsers.py +442 -0
  50. synkro/pipeline/__init__.py +20 -0
  51. synkro/pipeline/phases.py +592 -0
  52. synkro/pipeline/runner.py +769 -0
  53. synkro/pipelines.py +136 -0
  54. synkro/prompts/__init__.py +57 -0
  55. synkro/prompts/base.py +167 -0
  56. synkro/prompts/golden_templates.py +533 -0
  57. synkro/prompts/interactive_templates.py +198 -0
  58. synkro/prompts/multiturn_templates.py +156 -0
  59. synkro/prompts/templates.py +281 -0
  60. synkro/prompts/tool_templates.py +318 -0
  61. synkro/quality/__init__.py +14 -0
  62. synkro/quality/golden_refiner.py +163 -0
  63. synkro/quality/grader.py +153 -0
  64. synkro/quality/multiturn_grader.py +150 -0
  65. synkro/quality/refiner.py +137 -0
  66. synkro/quality/tool_grader.py +126 -0
  67. synkro/quality/tool_refiner.py +128 -0
  68. synkro/quality/verifier.py +228 -0
  69. synkro/reporting.py +464 -0
  70. synkro/schemas.py +521 -0
  71. synkro/types/__init__.py +43 -0
  72. synkro/types/core.py +153 -0
  73. synkro/types/dataset_type.py +33 -0
  74. synkro/types/logic_map.py +348 -0
  75. synkro/types/tool.py +94 -0
  76. synkro-0.4.36.data/data/examples/__init__.py +148 -0
  77. synkro-0.4.36.dist-info/METADATA +507 -0
  78. synkro-0.4.36.dist-info/RECORD +81 -0
  79. synkro-0.4.36.dist-info/WHEEL +4 -0
  80. synkro-0.4.36.dist-info/entry_points.txt +2 -0
  81. synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
synkro/factory.py ADDED
@@ -0,0 +1,291 @@
1
+ """Component factory for dependency injection.
2
+
3
+ This module provides a factory for creating pipeline components,
4
+ enabling testability and flexible configuration.
5
+
6
+ Supports both legacy components and Golden Trace components:
7
+ - Logic Extractor (The Cartographer)
8
+ - Golden Scenario Generator (The Adversary)
9
+ - Golden Response Generator (The Thinker)
10
+ - Trace Verifier (The Auditor)
11
+ - Golden Refiner
12
+ """
13
+
14
+ from typing import TYPE_CHECKING
15
+
16
+ from synkro.llm.client import LLM
17
+ from synkro.modes.config import ModeConfig
18
+ from synkro.generation.planner import Planner
19
+ from synkro.generation.scenarios import ScenarioGenerator
20
+ from synkro.generation.responses import ResponseGenerator
21
+ from synkro.generation.follow_ups import FollowUpGenerator
22
+ from synkro.generation.multiturn_responses import MultiTurnResponseGenerator
23
+ from synkro.quality.grader import Grader
24
+ from synkro.quality.refiner import Refiner
25
+ from synkro.quality.multiturn_grader import MultiTurnGrader
26
+
27
+ if TYPE_CHECKING:
28
+ from synkro.types.tool import ToolDefinition
29
+ from synkro.generation.tool_simulator import ToolSimulator
30
+ from synkro.generation.tool_responses import ToolCallResponseGenerator
31
+ from synkro.quality.tool_grader import ToolCallGrader
32
+ from synkro.quality.tool_refiner import ToolCallRefiner
33
+ from synkro.generation.logic_extractor import LogicExtractor
34
+ from synkro.generation.golden_scenarios import GoldenScenarioGenerator
35
+ from synkro.generation.golden_responses import GoldenResponseGenerator
36
+ from synkro.generation.golden_tool_responses import GoldenToolCallResponseGenerator
37
+ from synkro.quality.verifier import TraceVerifier
38
+ from synkro.quality.golden_refiner import GoldenRefiner
39
+ from synkro.interactive.logic_map_editor import LogicMapEditor
40
+ from synkro.interactive.scenario_editor import ScenarioEditor
41
+
42
+
43
+ class ComponentFactory:
44
+ """
45
+ Factory for creating pipeline components with shared LLM clients.
46
+
47
+ This centralizes component creation and ensures consistent configuration
48
+ across the pipeline.
49
+
50
+ Examples:
51
+ >>> factory = ComponentFactory(gen_llm, grade_llm, mode_config)
52
+ >>> planner = factory.create_planner()
53
+ >>> grader = factory.create_grader()
54
+
55
+ >>> # With tools for tool_call dataset type
56
+ >>> factory = ComponentFactory(gen_llm, grade_llm, mode_config, tools=[...])
57
+ >>> simulator = factory.create_tool_simulator()
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ generation_llm: LLM,
63
+ grading_llm: LLM,
64
+ mode_config: ModeConfig,
65
+ tools: list["ToolDefinition"] | None = None,
66
+ thinking: bool = False,
67
+ ):
68
+ """
69
+ Initialize the factory.
70
+
71
+ Args:
72
+ generation_llm: LLM client for generation tasks (scenarios, responses, refinement)
73
+ grading_llm: LLM client for grading and planning (typically stronger model)
74
+ mode_config: Configuration for the dataset type (prompts, etc.)
75
+ tools: Optional list of tool definitions for tool_call dataset type
76
+ thinking: Enable thinking mode with <think> tags in responses
77
+ """
78
+ self.generation_llm = generation_llm
79
+ self.grading_llm = grading_llm
80
+ self.mode_config = mode_config
81
+ self.tools = tools or []
82
+ self.thinking = thinking
83
+
84
+ def create_planner(self) -> Planner:
85
+ """Create a Planner instance."""
86
+ return Planner(llm=self.grading_llm)
87
+
88
+ def create_scenario_generator(self) -> ScenarioGenerator:
89
+ """Create a ScenarioGenerator with mode-specific prompts."""
90
+ gen = ScenarioGenerator(llm=self.generation_llm)
91
+ gen.prompt_template = self.mode_config.scenario_prompt
92
+ return gen
93
+
94
+ def create_response_generator(self) -> ResponseGenerator:
95
+ """Create a ResponseGenerator with mode-specific prompts."""
96
+ gen = ResponseGenerator(llm=self.generation_llm)
97
+ gen.prompt_template = self.mode_config.response_prompt
98
+ return gen
99
+
100
+ def create_grader(self) -> "Grader | ToolCallGrader":
101
+ """
102
+ Create a Grader with mode-specific prompts.
103
+
104
+ Auto-selects ToolCallGrader when tools are configured.
105
+ """
106
+ if self.has_tools:
107
+ from synkro.quality.tool_grader import ToolCallGrader
108
+ return ToolCallGrader(llm=self.grading_llm, tools=self.tools)
109
+
110
+ grader = Grader(llm=self.grading_llm)
111
+ grader.prompt_template = self.mode_config.grade_prompt
112
+ return grader
113
+
114
+ def create_refiner(self) -> "Refiner | ToolCallRefiner":
115
+ """
116
+ Create a Refiner with mode-specific prompts.
117
+
118
+ Auto-selects ToolCallRefiner when tools are configured.
119
+ This ensures tool_calls format is preserved during refinement.
120
+ """
121
+ if self.has_tools:
122
+ from synkro.quality.tool_refiner import ToolCallRefiner
123
+ simulator = self.create_tool_simulator()
124
+ return ToolCallRefiner(
125
+ llm=self.generation_llm,
126
+ tools=self.tools,
127
+ simulator=simulator,
128
+ )
129
+
130
+ refiner = Refiner(llm=self.generation_llm)
131
+ refiner.prompt_template = self.mode_config.refine_prompt
132
+ return refiner
133
+
134
+ def create_tool_simulator(self) -> "ToolSimulator":
135
+ """Create a ToolSimulator instance for tool_call dataset type."""
136
+ from synkro.generation.tool_simulator import ToolSimulator
137
+
138
+ if not self.tools:
139
+ raise ValueError("Cannot create ToolSimulator without tools")
140
+
141
+ return ToolSimulator(tools=self.tools, llm=self.generation_llm)
142
+
143
+ def create_tool_call_response_generator(self) -> "ToolCallResponseGenerator":
144
+ """
145
+ Create a ToolCallResponseGenerator for generating proper tool call traces.
146
+
147
+ This generator uses JSON mode to produce structured tool calls in
148
+ OpenAI function calling format.
149
+ """
150
+ from synkro.generation.tool_responses import ToolCallResponseGenerator
151
+
152
+ if not self.tools:
153
+ raise ValueError("Cannot create ToolCallResponseGenerator without tools")
154
+
155
+ # Create simulator for generating tool responses
156
+ simulator = self.create_tool_simulator()
157
+
158
+ return ToolCallResponseGenerator(
159
+ tools=self.tools,
160
+ llm=self.generation_llm,
161
+ simulator=simulator,
162
+ )
163
+
164
+ def get_tools_description(self) -> str:
165
+ """Get formatted description of all available tools."""
166
+ if not self.tools:
167
+ return "No tools available"
168
+
169
+ descriptions = []
170
+ for tool in self.tools:
171
+ descriptions.append(tool.to_system_prompt())
172
+ return "\n\n".join(descriptions)
173
+
174
+ @property
175
+ def has_tools(self) -> bool:
176
+ """Check if tools are configured."""
177
+ return bool(self.tools)
178
+
179
+ def create_follow_up_generator(self) -> FollowUpGenerator:
180
+ """Create a FollowUpGenerator for multi-turn conversations."""
181
+ return FollowUpGenerator(llm=self.generation_llm)
182
+
183
+ def create_multi_turn_response_generator(self) -> MultiTurnResponseGenerator:
184
+ """Create a MultiTurnResponseGenerator for multi-turn trace generation."""
185
+ return MultiTurnResponseGenerator(llm=self.generation_llm)
186
+
187
+ def create_multi_turn_grader(self) -> MultiTurnGrader:
188
+ """Create a MultiTurnGrader for per-turn and overall conversation grading."""
189
+ return MultiTurnGrader(llm=self.grading_llm)
190
+
191
+ # =========================================================================
192
+ # GOLDEN TRACE COMPONENTS
193
+ # =========================================================================
194
+
195
+ def create_logic_extractor(self) -> "LogicExtractor":
196
+ """
197
+ Create a LogicExtractor (The Cartographer).
198
+
199
+ Uses the grading LLM (stronger model) for accurate rule extraction.
200
+ """
201
+ from synkro.generation.logic_extractor import LogicExtractor
202
+ return LogicExtractor(llm=self.grading_llm)
203
+
204
+ def create_golden_scenario_generator(self) -> "GoldenScenarioGenerator":
205
+ """
206
+ Create a GoldenScenarioGenerator (The Adversary).
207
+
208
+ Generates typed scenarios (positive, negative, edge_case, irrelevant)
209
+ with rule targeting.
210
+ """
211
+ from synkro.generation.golden_scenarios import GoldenScenarioGenerator
212
+ return GoldenScenarioGenerator(llm=self.generation_llm)
213
+
214
+ def create_golden_response_generator(self) -> "GoldenResponseGenerator":
215
+ """
216
+ Create a GoldenResponseGenerator (The Thinker).
217
+
218
+ Generates traces with grounded Chain-of-Thought reasoning
219
+ and rule citations.
220
+ """
221
+ from synkro.generation.golden_responses import GoldenResponseGenerator
222
+ return GoldenResponseGenerator(llm=self.generation_llm, thinking=self.thinking)
223
+
224
+ def create_golden_tool_call_generator(self) -> "GoldenToolCallResponseGenerator":
225
+ """
226
+ Create a GoldenToolCallResponseGenerator (The Thinker for Tools).
227
+
228
+ Generates tool call traces with rule citations for tool selection
229
+ decisions. Requires tools to be configured.
230
+ """
231
+ from synkro.generation.golden_tool_responses import GoldenToolCallResponseGenerator
232
+
233
+ if not self.tools:
234
+ raise ValueError("Cannot create GoldenToolCallResponseGenerator without tools")
235
+
236
+ simulator = self.create_tool_simulator()
237
+ return GoldenToolCallResponseGenerator(
238
+ tools=self.tools,
239
+ llm=self.generation_llm,
240
+ simulator=simulator,
241
+ thinking=self.thinking,
242
+ )
243
+
244
+ def create_verifier(self) -> "TraceVerifier":
245
+ """
246
+ Create a TraceVerifier (The Auditor).
247
+
248
+ Verifies traces against the Logic Map to ensure:
249
+ - No skipped rules
250
+ - No hallucinated rules
251
+ - No contradictions
252
+ - DAG compliance
253
+
254
+ Uses the grading LLM (stronger model) for accurate verification.
255
+ """
256
+ from synkro.quality.verifier import TraceVerifier
257
+ return TraceVerifier(llm=self.grading_llm)
258
+
259
+ def create_golden_refiner(self) -> "GoldenRefiner":
260
+ """
261
+ Create a GoldenRefiner.
262
+
263
+ Refines traces that fail verification, using Logic Map context
264
+ to fix skipped rules, hallucinations, and contradictions.
265
+ """
266
+ from synkro.quality.golden_refiner import GoldenRefiner
267
+ return GoldenRefiner(llm=self.generation_llm)
268
+
269
+ def create_logic_map_editor(self) -> "LogicMapEditor":
270
+ """
271
+ Create a LogicMapEditor for Human-in-the-Loop sessions.
272
+
273
+ The editor uses the grading LLM (stronger model) to interpret
274
+ natural language feedback and refine Logic Maps.
275
+ """
276
+ from synkro.interactive.logic_map_editor import LogicMapEditor
277
+ return LogicMapEditor(llm=self.grading_llm)
278
+
279
+ def create_scenario_editor(self) -> "ScenarioEditor":
280
+ """
281
+ Create a ScenarioEditor for Human-in-the-Loop scenario editing.
282
+
283
+ The editor uses the grading LLM (stronger model) to interpret
284
+ natural language feedback and refine scenarios.
285
+ """
286
+ from synkro.interactive.scenario_editor import ScenarioEditor
287
+ return ScenarioEditor(llm=self.grading_llm)
288
+
289
+
290
+ __all__ = ["ComponentFactory"]
291
+
@@ -0,0 +1,18 @@
1
+ """Output formatters for different training data formats."""
2
+
3
+ from synkro.formatters.sft import SFTFormatter
4
+ from synkro.formatters.tool_call import ToolCallFormatter
5
+ from synkro.formatters.chatml import ChatMLFormatter
6
+ from synkro.formatters.qa import QAFormatter
7
+ from synkro.formatters.langsmith import LangSmithFormatter
8
+ from synkro.formatters.langfuse import LangfuseFormatter
9
+
10
+ __all__ = [
11
+ "SFTFormatter",
12
+ "ToolCallFormatter",
13
+ "ChatMLFormatter",
14
+ "QAFormatter",
15
+ "LangSmithFormatter",
16
+ "LangfuseFormatter",
17
+ ]
18
+
@@ -0,0 +1,121 @@
1
+ """ChatML formatter with XML tags for tool calling."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING:
8
+ from synkro.types.core import Trace
9
+
10
+
11
+ class ChatMLFormatter:
12
+ """
13
+ Format traces as ChatML with XML tags for tool calls.
14
+
15
+ Uses <tool_call> and <tool_response> XML tags for tool interactions,
16
+ compatible with Hermes/Mistral style fine-tuning.
17
+
18
+ Example output:
19
+ {
20
+ "messages": [
21
+ {"role": "system", "content": "You have access to tools."},
22
+ {"role": "user", "content": "What's the weather in NYC?"},
23
+ {"role": "assistant", "content": "<tool_call>\\n{\"name\": \"get_weather\", \"arguments\": {\"city\": \"NYC\"}}\\n</tool_call>"},
24
+ {"role": "tool", "content": "<tool_response>\\n{\"temp\": \"72F\"}\\n</tool_response>"},
25
+ {"role": "assistant", "content": "The weather in NYC is 72°F."}
26
+ ]
27
+ }
28
+ """
29
+
30
+ def __init__(self, include_metadata: bool = False):
31
+ """
32
+ Initialize the ChatMLFormatter.
33
+
34
+ Args:
35
+ include_metadata: If True, include trace metadata in output
36
+ """
37
+ self.include_metadata = include_metadata
38
+
39
+ def format(self, traces: list["Trace"]) -> list[dict]:
40
+ """
41
+ Format traces as ChatML with XML tags.
42
+
43
+ Args:
44
+ traces: List of traces to format
45
+
46
+ Returns:
47
+ List of formatted examples
48
+ """
49
+ examples = []
50
+
51
+ for trace in traces:
52
+ messages = []
53
+
54
+ for m in trace.messages:
55
+ # Handle assistant messages with tool calls
56
+ if m.role == "assistant" and m.tool_calls:
57
+ # Convert tool calls to XML format
58
+ tool_call_xmls = []
59
+ for tc in m.tool_calls:
60
+ tool_call_json = json.dumps({
61
+ "name": tc.function.name,
62
+ "arguments": json.loads(tc.function.arguments)
63
+ })
64
+ tool_call_xmls.append(f"<tool_call>\n{tool_call_json}\n</tool_call>")
65
+
66
+ content = "\n".join(tool_call_xmls)
67
+ messages.append({"role": "assistant", "content": content})
68
+
69
+ # Handle tool responses
70
+ elif m.role == "tool":
71
+ content = f"<tool_response>\n{m.content}\n</tool_response>"
72
+ messages.append({"role": "tool", "content": content})
73
+
74
+ # Regular messages (system, user, assistant without tools)
75
+ else:
76
+ messages.append({
77
+ "role": m.role,
78
+ "content": m.content or ""
79
+ })
80
+
81
+ example = {"messages": messages}
82
+
83
+ if self.include_metadata:
84
+ example["metadata"] = {
85
+ "scenario": trace.scenario.description,
86
+ "category": trace.scenario.category,
87
+ "grade": trace.grade.model_dump() if trace.grade else None,
88
+ "has_tool_calls": trace.has_tool_calls,
89
+ }
90
+
91
+ examples.append(example)
92
+
93
+ return examples
94
+
95
+ def save(self, traces: list["Trace"], path: str | Path) -> None:
96
+ """
97
+ Save formatted traces to a JSONL file.
98
+
99
+ Args:
100
+ traces: List of traces to save
101
+ path: Output file path
102
+ """
103
+ path = Path(path)
104
+ examples = self.format(traces)
105
+
106
+ with open(path, "w") as f:
107
+ for example in examples:
108
+ f.write(json.dumps(example) + "\n")
109
+
110
+ def to_jsonl(self, traces: list["Trace"]) -> str:
111
+ """
112
+ Convert traces to JSONL string.
113
+
114
+ Args:
115
+ traces: List of traces to convert
116
+
117
+ Returns:
118
+ JSONL formatted string
119
+ """
120
+ examples = self.format(traces)
121
+ return "\n".join(json.dumps(e) for e in examples)
@@ -0,0 +1,98 @@
1
+ """Langfuse formatter for evaluation datasets."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING:
8
+ from synkro.types.core import Trace
9
+
10
+
11
+ class LangfuseFormatter:
12
+ """
13
+ Format traces for Langfuse datasets.
14
+
15
+ Langfuse format uses input/expectedOutput structure:
16
+ - input: any JSON object with input data
17
+ - expectedOutput: any JSON object with expected output
18
+ - metadata: optional key-value pairs
19
+
20
+ Example output:
21
+ {
22
+ "input": {
23
+ "question": "Can I submit a $200 expense without a receipt?",
24
+ "context": "Expense: $200, No receipt"
25
+ },
26
+ "expectedOutput": {
27
+ "answer": "All expenses require receipts...",
28
+ "expected_outcome": "Deny - missing receipt"
29
+ },
30
+ "metadata": {
31
+ "ground_truth_rules": ["R003"],
32
+ "difficulty": "negative",
33
+ "category": "Receipt Requirements"
34
+ }
35
+ }
36
+ """
37
+
38
+ def format(self, traces: list["Trace"]) -> list[dict]:
39
+ """
40
+ Format traces as Langfuse dataset items.
41
+
42
+ Args:
43
+ traces: List of traces to format
44
+
45
+ Returns:
46
+ List of Langfuse-compatible dataset items
47
+ """
48
+ examples = []
49
+
50
+ for trace in traces:
51
+ example = {
52
+ "input": {
53
+ "question": trace.user_message,
54
+ "context": trace.scenario.context or "",
55
+ },
56
+ "expectedOutput": {
57
+ "answer": trace.assistant_message,
58
+ "expected_outcome": trace.scenario.expected_outcome or "",
59
+ },
60
+ "metadata": {
61
+ "ground_truth_rules": trace.scenario.target_rule_ids or [],
62
+ "difficulty": trace.scenario.scenario_type or "unknown",
63
+ "category": trace.scenario.category or "",
64
+ "passed": trace.grade.passed if trace.grade else None,
65
+ },
66
+ }
67
+
68
+ examples.append(example)
69
+
70
+ return examples
71
+
72
+ def save(self, traces: list["Trace"], path: str | Path) -> None:
73
+ """
74
+ Save formatted traces to a JSONL file.
75
+
76
+ Args:
77
+ traces: List of traces to save
78
+ path: Output file path
79
+ """
80
+ path = Path(path)
81
+ examples = self.format(traces)
82
+
83
+ with open(path, "w") as f:
84
+ for example in examples:
85
+ f.write(json.dumps(example) + "\n")
86
+
87
+ def to_jsonl(self, traces: list["Trace"]) -> str:
88
+ """
89
+ Convert traces to JSONL string.
90
+
91
+ Args:
92
+ traces: List of traces to convert
93
+
94
+ Returns:
95
+ JSONL formatted string
96
+ """
97
+ examples = self.format(traces)
98
+ return "\n".join(json.dumps(e) for e in examples)
@@ -0,0 +1,98 @@
1
+ """LangSmith formatter for evaluation datasets."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING:
8
+ from synkro.types.core import Trace
9
+
10
+
11
+ class LangSmithFormatter:
12
+ """
13
+ Format traces for LangSmith datasets.
14
+
15
+ LangSmith format uses nested inputs/outputs structure:
16
+ - inputs: dict of input fields
17
+ - outputs: dict of expected output fields
18
+ - metadata: optional additional info
19
+
20
+ Example output:
21
+ {
22
+ "inputs": {
23
+ "question": "Can I submit a $200 expense without a receipt?",
24
+ "context": "Expense: $200, No receipt"
25
+ },
26
+ "outputs": {
27
+ "answer": "All expenses require receipts..."
28
+ },
29
+ "metadata": {
30
+ "expected_outcome": "Deny - missing receipt",
31
+ "ground_truth_rules": ["R003"],
32
+ "difficulty": "negative",
33
+ "category": "Receipt Requirements"
34
+ }
35
+ }
36
+ """
37
+
38
+ def format(self, traces: list["Trace"]) -> list[dict]:
39
+ """
40
+ Format traces as LangSmith dataset examples.
41
+
42
+ Args:
43
+ traces: List of traces to format
44
+
45
+ Returns:
46
+ List of LangSmith-compatible examples
47
+ """
48
+ examples = []
49
+
50
+ for trace in traces:
51
+ example = {
52
+ "inputs": {
53
+ "question": trace.user_message,
54
+ "context": trace.scenario.context or "",
55
+ },
56
+ "outputs": {
57
+ "answer": trace.assistant_message,
58
+ },
59
+ "metadata": {
60
+ "expected_outcome": trace.scenario.expected_outcome or "",
61
+ "ground_truth_rules": trace.scenario.target_rule_ids or [],
62
+ "difficulty": trace.scenario.scenario_type or "unknown",
63
+ "category": trace.scenario.category or "",
64
+ "passed": trace.grade.passed if trace.grade else None,
65
+ },
66
+ }
67
+
68
+ examples.append(example)
69
+
70
+ return examples
71
+
72
+ def save(self, traces: list["Trace"], path: str | Path) -> None:
73
+ """
74
+ Save formatted traces to a JSONL file.
75
+
76
+ Args:
77
+ traces: List of traces to save
78
+ path: Output file path
79
+ """
80
+ path = Path(path)
81
+ examples = self.format(traces)
82
+
83
+ with open(path, "w") as f:
84
+ for example in examples:
85
+ f.write(json.dumps(example) + "\n")
86
+
87
+ def to_jsonl(self, traces: list["Trace"]) -> str:
88
+ """
89
+ Convert traces to JSONL string.
90
+
91
+ Args:
92
+ traces: List of traces to convert
93
+
94
+ Returns:
95
+ JSONL formatted string
96
+ """
97
+ examples = self.format(traces)
98
+ return "\n".join(json.dumps(e) for e in examples)