synkro 0.4.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synkro might be problematic. Click here for more details.
- synkro/__init__.py +331 -0
- synkro/advanced.py +184 -0
- synkro/cli.py +156 -0
- synkro/core/__init__.py +7 -0
- synkro/core/checkpoint.py +250 -0
- synkro/core/dataset.py +432 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +291 -0
- synkro/formatters/__init__.py +18 -0
- synkro/formatters/chatml.py +121 -0
- synkro/formatters/langfuse.py +98 -0
- synkro/formatters/langsmith.py +98 -0
- synkro/formatters/qa.py +112 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/follow_ups.py +134 -0
- synkro/generation/generator.py +314 -0
- synkro/generation/golden_responses.py +269 -0
- synkro/generation/golden_scenarios.py +333 -0
- synkro/generation/golden_tool_responses.py +791 -0
- synkro/generation/logic_extractor.py +126 -0
- synkro/generation/multiturn_responses.py +177 -0
- synkro/generation/planner.py +131 -0
- synkro/generation/responses.py +189 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +625 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/interactive/__init__.py +16 -0
- synkro/interactive/hitl_session.py +205 -0
- synkro/interactive/intent_classifier.py +94 -0
- synkro/interactive/logic_map_editor.py +176 -0
- synkro/interactive/rich_ui.py +459 -0
- synkro/interactive/scenario_editor.py +198 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +309 -0
- synkro/llm/rate_limits.py +99 -0
- synkro/models/__init__.py +50 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/local.py +104 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +13 -0
- synkro/modes/config.py +66 -0
- synkro/modes/conversation.py +35 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +592 -0
- synkro/pipeline/runner.py +769 -0
- synkro/pipelines.py +136 -0
- synkro/prompts/__init__.py +57 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/golden_templates.py +533 -0
- synkro/prompts/interactive_templates.py +198 -0
- synkro/prompts/multiturn_templates.py +156 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +318 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/golden_refiner.py +163 -0
- synkro/quality/grader.py +153 -0
- synkro/quality/multiturn_grader.py +150 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/quality/verifier.py +228 -0
- synkro/reporting.py +464 -0
- synkro/schemas.py +521 -0
- synkro/types/__init__.py +43 -0
- synkro/types/core.py +153 -0
- synkro/types/dataset_type.py +33 -0
- synkro/types/logic_map.py +348 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.36.data/data/examples/__init__.py +148 -0
- synkro-0.4.36.dist-info/METADATA +507 -0
- synkro-0.4.36.dist-info/RECORD +81 -0
- synkro-0.4.36.dist-info/WHEEL +4 -0
- synkro-0.4.36.dist-info/entry_points.txt +2 -0
- synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
synkro/__init__.py
ADDED
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Synkro - Generate high-quality training datasets from any document.
|
|
3
|
+
|
|
4
|
+
Quick Start:
|
|
5
|
+
>>> import synkro
|
|
6
|
+
>>> dataset = synkro.generate("Your policy text...")
|
|
7
|
+
>>> dataset.save("training.jsonl")
|
|
8
|
+
|
|
9
|
+
Pipeline Usage (more control):
|
|
10
|
+
>>> from synkro import create_pipeline, DatasetType
|
|
11
|
+
>>> pipeline = create_pipeline(dataset_type=DatasetType.CONVERSATION)
|
|
12
|
+
>>> dataset = pipeline.generate("policy text", traces=50)
|
|
13
|
+
|
|
14
|
+
Access Logic Map (for inspection):
|
|
15
|
+
>>> result = pipeline.generate("policy text", return_logic_map=True)
|
|
16
|
+
>>> print(result.logic_map.rules) # See extracted rules
|
|
17
|
+
>>> dataset = result.dataset
|
|
18
|
+
|
|
19
|
+
Silent Mode:
|
|
20
|
+
>>> from synkro import SilentReporter, create_pipeline
|
|
21
|
+
>>> pipeline = create_pipeline(reporter=SilentReporter())
|
|
22
|
+
|
|
23
|
+
Progress Callbacks:
|
|
24
|
+
>>> from synkro import CallbackReporter, create_pipeline
|
|
25
|
+
>>> reporter = CallbackReporter(
|
|
26
|
+
... on_progress=lambda event, data: print(f"{event}: {data}")
|
|
27
|
+
... )
|
|
28
|
+
>>> pipeline = create_pipeline(reporter=reporter)
|
|
29
|
+
|
|
30
|
+
Tool Call Dataset:
|
|
31
|
+
>>> from synkro import create_pipeline, ToolDefinition, DatasetType
|
|
32
|
+
>>> tools = [ToolDefinition(name="search", description="...", parameters={})]
|
|
33
|
+
>>> pipeline = create_pipeline(dataset_type=DatasetType.TOOL_CALL, tools=tools)
|
|
34
|
+
|
|
35
|
+
Eval Dataset Generation:
|
|
36
|
+
>>> import synkro
|
|
37
|
+
>>> result = synkro.generate_scenarios("Your policy...", count=100)
|
|
38
|
+
>>> for scenario in result.scenarios:
|
|
39
|
+
... response = my_model(scenario.user_message)
|
|
40
|
+
... grade = synkro.grade(response, scenario, policy)
|
|
41
|
+
|
|
42
|
+
Advanced Usage (power users):
|
|
43
|
+
>>> from synkro.advanced import LogicExtractor, TraceVerifier, LogicMap
|
|
44
|
+
>>> # Full access to Golden Trace internals
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Dynamic version from package metadata
|
|
48
|
+
try:
|
|
49
|
+
from importlib.metadata import version as _get_version
|
|
50
|
+
__version__ = _get_version("synkro")
|
|
51
|
+
except Exception:
|
|
52
|
+
__version__ = "0.4.22" # Fallback
|
|
53
|
+
|
|
54
|
+
# =============================================================================
|
|
55
|
+
# PRIMARY API - What most developers need
|
|
56
|
+
# =============================================================================
|
|
57
|
+
|
|
58
|
+
from synkro.pipelines import create_pipeline
|
|
59
|
+
from synkro.models import OpenAI, Anthropic, Google, Local, LocalModel
|
|
60
|
+
from synkro.llm import LLM
|
|
61
|
+
from synkro.types import DatasetType
|
|
62
|
+
from synkro.core.policy import Policy
|
|
63
|
+
from synkro.core.dataset import Dataset
|
|
64
|
+
from synkro.reporting import SilentReporter, RichReporter, CallbackReporter
|
|
65
|
+
|
|
66
|
+
# Tool types (needed for TOOL_CALL dataset type)
|
|
67
|
+
from synkro.types import ToolDefinition
|
|
68
|
+
|
|
69
|
+
# =============================================================================
|
|
70
|
+
# SECONDARY API - Less commonly needed
|
|
71
|
+
# =============================================================================
|
|
72
|
+
|
|
73
|
+
from synkro.types import Message, Scenario, EvalScenario, Trace, GradeResult, Plan, Category
|
|
74
|
+
from synkro.types import ToolCall, ToolFunction, ToolResult
|
|
75
|
+
from synkro.reporting import ProgressReporter
|
|
76
|
+
|
|
77
|
+
# GenerationResult for return_logic_map=True
|
|
78
|
+
from synkro.pipeline.runner import GenerationResult, ScenariosResult
|
|
79
|
+
|
|
80
|
+
__all__ = [
|
|
81
|
+
# Primary API
|
|
82
|
+
"create_pipeline",
|
|
83
|
+
"generate",
|
|
84
|
+
"generate_scenarios",
|
|
85
|
+
"grade",
|
|
86
|
+
"DatasetType",
|
|
87
|
+
"Policy",
|
|
88
|
+
"Dataset",
|
|
89
|
+
"ToolDefinition",
|
|
90
|
+
# Reporters
|
|
91
|
+
"SilentReporter",
|
|
92
|
+
"RichReporter",
|
|
93
|
+
"CallbackReporter",
|
|
94
|
+
"ProgressReporter",
|
|
95
|
+
# Models
|
|
96
|
+
"OpenAI",
|
|
97
|
+
"Anthropic",
|
|
98
|
+
"Google",
|
|
99
|
+
"Local",
|
|
100
|
+
"LocalModel",
|
|
101
|
+
"LLM",
|
|
102
|
+
# Result types
|
|
103
|
+
"GenerationResult",
|
|
104
|
+
"ScenariosResult",
|
|
105
|
+
# Data types (less common)
|
|
106
|
+
"Trace",
|
|
107
|
+
"Scenario",
|
|
108
|
+
"EvalScenario",
|
|
109
|
+
"Message",
|
|
110
|
+
"GradeResult",
|
|
111
|
+
"Plan",
|
|
112
|
+
"Category",
|
|
113
|
+
"ToolCall",
|
|
114
|
+
"ToolFunction",
|
|
115
|
+
"ToolResult",
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# Note: For advanced usage (LogicMap, TraceVerifier, etc.), use:
|
|
120
|
+
# from synkro.advanced import ...
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def generate(
|
|
124
|
+
policy: str | Policy,
|
|
125
|
+
traces: int = 20,
|
|
126
|
+
turns: int | str = "auto",
|
|
127
|
+
dataset_type: DatasetType = DatasetType.CONVERSATION,
|
|
128
|
+
generation_model: OpenAI | Anthropic | Google | LocalModel | str = OpenAI.GPT_5_MINI,
|
|
129
|
+
grading_model: OpenAI | Anthropic | Google | LocalModel | str = OpenAI.GPT_52,
|
|
130
|
+
max_iterations: int = 3,
|
|
131
|
+
skip_grading: bool = False,
|
|
132
|
+
reporter: ProgressReporter | None = None,
|
|
133
|
+
return_logic_map: bool = False,
|
|
134
|
+
enable_hitl: bool = True,
|
|
135
|
+
base_url: str | None = None,
|
|
136
|
+
temperature: float = 0.7,
|
|
137
|
+
) -> Dataset | GenerationResult:
|
|
138
|
+
"""
|
|
139
|
+
Generate training traces from a policy document.
|
|
140
|
+
|
|
141
|
+
This is a convenience function. For more control, use create_pipeline().
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
policy: Policy text or Policy object
|
|
145
|
+
traces: Number of traces to generate (default: 20)
|
|
146
|
+
turns: Conversation turns per trace. Use int for fixed turns, or "auto"
|
|
147
|
+
for policy complexity-driven turns (Simple=1-2, Conditional=3, Complex=5+)
|
|
148
|
+
dataset_type: Type of dataset - CONVERSATION (default), INSTRUCTION, or TOOL_CALL
|
|
149
|
+
generation_model: Model for generating (default: gpt-5-mini)
|
|
150
|
+
grading_model: Model for grading (default: gpt-5.2)
|
|
151
|
+
max_iterations: Max refinement iterations per trace (default: 3)
|
|
152
|
+
skip_grading: Skip grading phase for faster generation (default: False)
|
|
153
|
+
reporter: Progress reporter (default: RichReporter for console output)
|
|
154
|
+
return_logic_map: If True, return GenerationResult with Logic Map access
|
|
155
|
+
enable_hitl: Enable Human-in-the-Loop Logic Map editing (default: False)
|
|
156
|
+
base_url: Optional API base URL for local LLM providers (Ollama, vLLM, etc.)
|
|
157
|
+
temperature: Sampling temperature for generation (0.0-2.0, default: 0.7).
|
|
158
|
+
Lower values (0.1-0.3) produce more deterministic outputs for eval datasets.
|
|
159
|
+
Higher values (0.7-1.0) produce more diverse outputs for training data.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Dataset (default) or GenerationResult if return_logic_map=True
|
|
163
|
+
|
|
164
|
+
Example:
|
|
165
|
+
>>> import synkro
|
|
166
|
+
>>> dataset = synkro.generate("All expenses over $50 require approval")
|
|
167
|
+
>>> dataset.save("training.jsonl")
|
|
168
|
+
|
|
169
|
+
>>> # Access Logic Map
|
|
170
|
+
>>> result = synkro.generate(policy, return_logic_map=True)
|
|
171
|
+
>>> print(result.logic_map.rules)
|
|
172
|
+
>>> dataset = result.dataset
|
|
173
|
+
|
|
174
|
+
>>> # Multi-turn with fixed 3 turns
|
|
175
|
+
>>> dataset = synkro.generate(policy, turns=3)
|
|
176
|
+
|
|
177
|
+
>>> # Interactive Logic Map editing
|
|
178
|
+
>>> dataset = synkro.generate(policy, enable_hitl=True)
|
|
179
|
+
|
|
180
|
+
>>> # Silent mode
|
|
181
|
+
>>> from synkro import SilentReporter
|
|
182
|
+
>>> dataset = synkro.generate(policy, reporter=SilentReporter())
|
|
183
|
+
"""
|
|
184
|
+
from synkro.generation.generator import Generator
|
|
185
|
+
|
|
186
|
+
if isinstance(policy, str):
|
|
187
|
+
policy = Policy(text=policy)
|
|
188
|
+
|
|
189
|
+
generator = Generator(
|
|
190
|
+
dataset_type=dataset_type,
|
|
191
|
+
generation_model=generation_model,
|
|
192
|
+
grading_model=grading_model,
|
|
193
|
+
max_iterations=max_iterations,
|
|
194
|
+
skip_grading=skip_grading,
|
|
195
|
+
reporter=reporter,
|
|
196
|
+
turns=turns,
|
|
197
|
+
enable_hitl=enable_hitl,
|
|
198
|
+
base_url=base_url,
|
|
199
|
+
temperature=temperature,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
return generator.generate(policy, traces=traces, return_logic_map=return_logic_map)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def generate_scenarios(
|
|
206
|
+
policy: str | Policy,
|
|
207
|
+
count: int = 100,
|
|
208
|
+
generation_model: OpenAI | Anthropic | Google | LocalModel | str = OpenAI.GPT_4O_MINI,
|
|
209
|
+
temperature: float = 0.8,
|
|
210
|
+
reporter: ProgressReporter | None = None,
|
|
211
|
+
enable_hitl: bool = False,
|
|
212
|
+
base_url: str | None = None,
|
|
213
|
+
) -> ScenariosResult:
|
|
214
|
+
"""
|
|
215
|
+
Generate eval scenarios from a policy without synthetic responses.
|
|
216
|
+
|
|
217
|
+
This is the eval-focused API. It generates diverse test scenarios with
|
|
218
|
+
ground truth labels (expected outcomes, target rules) but does NOT generate
|
|
219
|
+
synthetic responses. Use synkro.grade() to evaluate your own model's outputs.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
policy: Policy text or Policy object
|
|
223
|
+
count: Number of scenarios to generate (default: 100)
|
|
224
|
+
generation_model: Model for generation (default: gpt-4o-mini)
|
|
225
|
+
temperature: Sampling temperature (default: 0.8 for scenario diversity)
|
|
226
|
+
reporter: Progress reporter (default: RichReporter for console output)
|
|
227
|
+
enable_hitl: Enable Human-in-the-Loop editing (default: False)
|
|
228
|
+
base_url: Optional API base URL for local LLM providers
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
ScenariosResult with scenarios, logic_map, and distribution
|
|
232
|
+
|
|
233
|
+
Example:
|
|
234
|
+
>>> import synkro
|
|
235
|
+
>>> result = synkro.generate_scenarios("Your policy...", count=100)
|
|
236
|
+
>>>
|
|
237
|
+
>>> for scenario in result.scenarios:
|
|
238
|
+
... # Run YOUR model
|
|
239
|
+
... response = my_model(scenario.user_message)
|
|
240
|
+
...
|
|
241
|
+
... # Grade the response
|
|
242
|
+
... grade = synkro.grade(response, scenario, policy)
|
|
243
|
+
... print(f"Passed: {grade.passed}")
|
|
244
|
+
"""
|
|
245
|
+
from synkro.generation.generator import Generator
|
|
246
|
+
|
|
247
|
+
if isinstance(policy, str):
|
|
248
|
+
policy = Policy(text=policy)
|
|
249
|
+
|
|
250
|
+
generator = Generator(
|
|
251
|
+
dataset_type=DatasetType.CONVERSATION, # Type doesn't matter for scenarios-only
|
|
252
|
+
generation_model=generation_model,
|
|
253
|
+
grading_model=generation_model, # Not used but required
|
|
254
|
+
skip_grading=True,
|
|
255
|
+
reporter=reporter,
|
|
256
|
+
enable_hitl=enable_hitl,
|
|
257
|
+
base_url=base_url,
|
|
258
|
+
temperature=temperature,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
return generator.generate_scenarios(policy, count=count)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def grade(
|
|
265
|
+
response: str,
|
|
266
|
+
scenario: EvalScenario,
|
|
267
|
+
policy: str | Policy,
|
|
268
|
+
model: OpenAI | Anthropic | Google | LocalModel | str = OpenAI.GPT_4O,
|
|
269
|
+
base_url: str | None = None,
|
|
270
|
+
) -> GradeResult:
|
|
271
|
+
"""
|
|
272
|
+
Grade an external model's response against a scenario and policy.
|
|
273
|
+
|
|
274
|
+
Use this to evaluate your own model's outputs against scenarios
|
|
275
|
+
generated by synkro.generate_scenarios().
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
response: The response from the model being evaluated
|
|
279
|
+
scenario: The eval scenario with expected_outcome and target_rules
|
|
280
|
+
policy: The policy document for grading context
|
|
281
|
+
model: LLM to use for grading (default: gpt-4o, stronger = better)
|
|
282
|
+
base_url: Optional API base URL for local LLM providers
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
GradeResult with passed, feedback, and issues
|
|
286
|
+
|
|
287
|
+
Example:
|
|
288
|
+
>>> scenarios = synkro.generate_scenarios(policy, count=100)
|
|
289
|
+
>>> for scenario in scenarios:
|
|
290
|
+
... response = my_model(scenario.user_message)
|
|
291
|
+
... grade = synkro.grade(response, scenario, policy)
|
|
292
|
+
... if not grade.passed:
|
|
293
|
+
... print(f"Failed: {grade.feedback}")
|
|
294
|
+
"""
|
|
295
|
+
import asyncio
|
|
296
|
+
from synkro.llm.client import LLM
|
|
297
|
+
from synkro.quality.grader import Grader
|
|
298
|
+
from synkro.types.core import Trace, Message, Scenario as BaseScenario
|
|
299
|
+
|
|
300
|
+
if isinstance(policy, str):
|
|
301
|
+
policy_text = policy
|
|
302
|
+
else:
|
|
303
|
+
policy_text = policy.text
|
|
304
|
+
|
|
305
|
+
# Create grader with specified model
|
|
306
|
+
grading_llm = LLM(model=model, base_url=base_url, temperature=0.1)
|
|
307
|
+
grader = Grader(llm=grading_llm)
|
|
308
|
+
|
|
309
|
+
# Build a Trace object from the scenario and response
|
|
310
|
+
base_scenario = BaseScenario(
|
|
311
|
+
description=scenario.user_message,
|
|
312
|
+
context=scenario.context,
|
|
313
|
+
category=scenario.category,
|
|
314
|
+
scenario_type=scenario.scenario_type,
|
|
315
|
+
target_rule_ids=scenario.target_rule_ids,
|
|
316
|
+
expected_outcome=scenario.expected_outcome,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
trace = Trace(
|
|
320
|
+
messages=[
|
|
321
|
+
Message(role="user", content=scenario.user_message),
|
|
322
|
+
Message(role="assistant", content=response),
|
|
323
|
+
],
|
|
324
|
+
scenario=base_scenario,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Run grading
|
|
328
|
+
async def _grade():
|
|
329
|
+
return await grader.grade(trace, policy_text)
|
|
330
|
+
|
|
331
|
+
return asyncio.run(_grade())
|
synkro/advanced.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Advanced components for power users.
|
|
2
|
+
|
|
3
|
+
This module exposes internal components for developers who need fine-grained
|
|
4
|
+
control over the generation pipeline.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from synkro.advanced import (
|
|
8
|
+
# Golden Trace components
|
|
9
|
+
LogicExtractor,
|
|
10
|
+
GoldenScenarioGenerator,
|
|
11
|
+
GoldenResponseGenerator,
|
|
12
|
+
TraceVerifier,
|
|
13
|
+
GoldenRefiner,
|
|
14
|
+
|
|
15
|
+
# Types
|
|
16
|
+
LogicMap,
|
|
17
|
+
Rule,
|
|
18
|
+
GoldenScenario,
|
|
19
|
+
VerificationResult,
|
|
20
|
+
GenerationResult,
|
|
21
|
+
|
|
22
|
+
# Pipeline internals
|
|
23
|
+
GenerationPipeline,
|
|
24
|
+
ComponentFactory,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
Examples:
|
|
28
|
+
>>> # Extract Logic Map manually
|
|
29
|
+
>>> from synkro.advanced import LogicExtractor, LLM
|
|
30
|
+
>>> extractor = LogicExtractor(llm=LLM(model="gpt-4o"))
|
|
31
|
+
>>> logic_map = await extractor.extract(policy_text)
|
|
32
|
+
>>> print(logic_map.rules)
|
|
33
|
+
|
|
34
|
+
>>> # Verify a trace against Logic Map
|
|
35
|
+
>>> from synkro.advanced import TraceVerifier
|
|
36
|
+
>>> verifier = TraceVerifier()
|
|
37
|
+
>>> result = await verifier.verify(trace, logic_map, scenario)
|
|
38
|
+
>>> if not result.passed:
|
|
39
|
+
... print(f"Failed: {result.issues}")
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
# Golden Trace components (The 4 Stages)
|
|
43
|
+
from synkro.generation.logic_extractor import LogicExtractor
|
|
44
|
+
from synkro.generation.golden_scenarios import GoldenScenarioGenerator
|
|
45
|
+
from synkro.generation.golden_responses import GoldenResponseGenerator
|
|
46
|
+
from synkro.generation.golden_tool_responses import GoldenToolCallResponseGenerator
|
|
47
|
+
from synkro.quality.verifier import TraceVerifier
|
|
48
|
+
from synkro.quality.golden_refiner import GoldenRefiner
|
|
49
|
+
|
|
50
|
+
# Logic Map types
|
|
51
|
+
from synkro.types.logic_map import (
|
|
52
|
+
LogicMap,
|
|
53
|
+
Rule,
|
|
54
|
+
RuleCategory,
|
|
55
|
+
GoldenScenario,
|
|
56
|
+
ScenarioType,
|
|
57
|
+
ReasoningStep,
|
|
58
|
+
VerificationResult,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Pipeline internals
|
|
62
|
+
from synkro.pipeline.runner import GenerationPipeline, GenerationResult
|
|
63
|
+
from synkro.factory import ComponentFactory
|
|
64
|
+
|
|
65
|
+
# Pipeline phases
|
|
66
|
+
from synkro.pipeline.phases import (
|
|
67
|
+
PlanPhase,
|
|
68
|
+
LogicExtractionPhase,
|
|
69
|
+
GoldenScenarioPhase,
|
|
70
|
+
GoldenTracePhase,
|
|
71
|
+
GoldenToolCallPhase,
|
|
72
|
+
VerificationPhase,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Low-level generators
|
|
76
|
+
from synkro.generation.generator import Generator
|
|
77
|
+
from synkro.generation.scenarios import ScenarioGenerator
|
|
78
|
+
from synkro.generation.responses import ResponseGenerator
|
|
79
|
+
from synkro.generation.planner import Planner
|
|
80
|
+
from synkro.generation.follow_ups import FollowUpGenerator
|
|
81
|
+
from synkro.generation.multiturn_responses import MultiTurnResponseGenerator
|
|
82
|
+
|
|
83
|
+
# Quality components
|
|
84
|
+
from synkro.quality.grader import Grader
|
|
85
|
+
from synkro.quality.refiner import Refiner
|
|
86
|
+
from synkro.quality.tool_grader import ToolCallGrader
|
|
87
|
+
from synkro.quality.tool_refiner import ToolCallRefiner
|
|
88
|
+
from synkro.quality.multiturn_grader import MultiTurnGrader
|
|
89
|
+
|
|
90
|
+
# LLM client
|
|
91
|
+
from synkro.llm.client import LLM
|
|
92
|
+
|
|
93
|
+
# Prompts (for customization)
|
|
94
|
+
from synkro.prompts import SystemPrompt, ScenarioPrompt, ResponsePrompt, GradePrompt
|
|
95
|
+
from synkro.prompts.golden_templates import (
|
|
96
|
+
LOGIC_EXTRACTION_PROMPT,
|
|
97
|
+
GOLDEN_SCENARIO_PROMPT,
|
|
98
|
+
GOLDEN_TRACE_PROMPT,
|
|
99
|
+
VERIFICATION_PROMPT,
|
|
100
|
+
GOLDEN_REFINE_PROMPT,
|
|
101
|
+
GOLDEN_TOOL_TRACE_PROMPT,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Formatters
|
|
105
|
+
from synkro.formatters.sft import SFTFormatter
|
|
106
|
+
from synkro.formatters.tool_call import ToolCallFormatter
|
|
107
|
+
|
|
108
|
+
# Schemas (for structured output)
|
|
109
|
+
from synkro.schemas import (
|
|
110
|
+
RuleExtraction,
|
|
111
|
+
LogicMapOutput,
|
|
112
|
+
GoldenScenarioOutput,
|
|
113
|
+
GoldenScenariosArray,
|
|
114
|
+
ReasoningStepOutput,
|
|
115
|
+
GoldenTraceOutput,
|
|
116
|
+
VerificationOutput,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
__all__ = [
|
|
121
|
+
# Golden Trace components
|
|
122
|
+
"LogicExtractor",
|
|
123
|
+
"GoldenScenarioGenerator",
|
|
124
|
+
"GoldenResponseGenerator",
|
|
125
|
+
"GoldenToolCallResponseGenerator",
|
|
126
|
+
"TraceVerifier",
|
|
127
|
+
"GoldenRefiner",
|
|
128
|
+
# Logic Map types
|
|
129
|
+
"LogicMap",
|
|
130
|
+
"Rule",
|
|
131
|
+
"RuleCategory",
|
|
132
|
+
"GoldenScenario",
|
|
133
|
+
"ScenarioType",
|
|
134
|
+
"ReasoningStep",
|
|
135
|
+
"VerificationResult",
|
|
136
|
+
# Pipeline
|
|
137
|
+
"GenerationPipeline",
|
|
138
|
+
"GenerationResult",
|
|
139
|
+
"ComponentFactory",
|
|
140
|
+
# Phases
|
|
141
|
+
"PlanPhase",
|
|
142
|
+
"LogicExtractionPhase",
|
|
143
|
+
"GoldenScenarioPhase",
|
|
144
|
+
"GoldenTracePhase",
|
|
145
|
+
"GoldenToolCallPhase",
|
|
146
|
+
"VerificationPhase",
|
|
147
|
+
# Generators
|
|
148
|
+
"Generator",
|
|
149
|
+
"ScenarioGenerator",
|
|
150
|
+
"ResponseGenerator",
|
|
151
|
+
"Planner",
|
|
152
|
+
"FollowUpGenerator",
|
|
153
|
+
"MultiTurnResponseGenerator",
|
|
154
|
+
# Quality
|
|
155
|
+
"Grader",
|
|
156
|
+
"Refiner",
|
|
157
|
+
"ToolCallGrader",
|
|
158
|
+
"ToolCallRefiner",
|
|
159
|
+
"MultiTurnGrader",
|
|
160
|
+
# LLM
|
|
161
|
+
"LLM",
|
|
162
|
+
# Prompts
|
|
163
|
+
"SystemPrompt",
|
|
164
|
+
"ScenarioPrompt",
|
|
165
|
+
"ResponsePrompt",
|
|
166
|
+
"GradePrompt",
|
|
167
|
+
"LOGIC_EXTRACTION_PROMPT",
|
|
168
|
+
"GOLDEN_SCENARIO_PROMPT",
|
|
169
|
+
"GOLDEN_TRACE_PROMPT",
|
|
170
|
+
"VERIFICATION_PROMPT",
|
|
171
|
+
"GOLDEN_REFINE_PROMPT",
|
|
172
|
+
"GOLDEN_TOOL_TRACE_PROMPT",
|
|
173
|
+
# Formatters
|
|
174
|
+
"SFTFormatter",
|
|
175
|
+
"ToolCallFormatter",
|
|
176
|
+
# Schemas
|
|
177
|
+
"RuleExtraction",
|
|
178
|
+
"LogicMapOutput",
|
|
179
|
+
"GoldenScenarioOutput",
|
|
180
|
+
"GoldenScenariosArray",
|
|
181
|
+
"ReasoningStepOutput",
|
|
182
|
+
"GoldenTraceOutput",
|
|
183
|
+
"VerificationOutput",
|
|
184
|
+
]
|
synkro/cli.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Synkro CLI - Generate training data from the command line."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
app = typer.Typer(
|
|
8
|
+
name="synkro",
|
|
9
|
+
help="Generate training datasets from documents.",
|
|
10
|
+
no_args_is_help=True,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@app.command()
|
|
15
|
+
def generate(
|
|
16
|
+
source: str = typer.Argument(
|
|
17
|
+
...,
|
|
18
|
+
help="Policy text, file path (.pdf, .docx, .txt, .md), folder path, or URL",
|
|
19
|
+
),
|
|
20
|
+
output: Optional[Path] = typer.Option(
|
|
21
|
+
None,
|
|
22
|
+
"--output", "-o",
|
|
23
|
+
help="Output file path (auto-generated if not specified)",
|
|
24
|
+
),
|
|
25
|
+
traces: int = typer.Option(
|
|
26
|
+
20,
|
|
27
|
+
"--traces", "-n",
|
|
28
|
+
help="Number of traces to generate",
|
|
29
|
+
),
|
|
30
|
+
format: str = typer.Option(
|
|
31
|
+
"sft",
|
|
32
|
+
"--format", "-f",
|
|
33
|
+
help="Output format: sft or qa",
|
|
34
|
+
),
|
|
35
|
+
model: str = typer.Option(
|
|
36
|
+
"gpt-4o-mini",
|
|
37
|
+
"--model", "-m",
|
|
38
|
+
help="Model for generation (e.g., gpt-4o-mini, claude-3-5-sonnet, gemini-2.5-flash, llama3.1)",
|
|
39
|
+
),
|
|
40
|
+
provider: Optional[str] = typer.Option(
|
|
41
|
+
None,
|
|
42
|
+
"--provider", "-p",
|
|
43
|
+
help="LLM provider for local models (ollama, vllm)",
|
|
44
|
+
),
|
|
45
|
+
endpoint: Optional[str] = typer.Option(
|
|
46
|
+
None,
|
|
47
|
+
"--endpoint", "-e",
|
|
48
|
+
help="API endpoint URL (e.g., http://localhost:11434)",
|
|
49
|
+
),
|
|
50
|
+
interactive: bool = typer.Option(
|
|
51
|
+
True,
|
|
52
|
+
"--interactive/--no-interactive", "-i/-I",
|
|
53
|
+
help="Enable interactive Logic Map editing before generation (enabled by default)",
|
|
54
|
+
),
|
|
55
|
+
):
|
|
56
|
+
"""
|
|
57
|
+
Generate training data from a policy document.
|
|
58
|
+
|
|
59
|
+
Examples:
|
|
60
|
+
|
|
61
|
+
synkro generate policy.pdf
|
|
62
|
+
|
|
63
|
+
synkro generate policies/ # Load all files from folder
|
|
64
|
+
|
|
65
|
+
synkro generate "All expenses over $50 need approval" --traces 50
|
|
66
|
+
|
|
67
|
+
synkro generate handbook.docx -o training.jsonl -n 100
|
|
68
|
+
|
|
69
|
+
synkro generate policy.pdf --interactive # Review and edit Logic Map
|
|
70
|
+
"""
|
|
71
|
+
import synkro
|
|
72
|
+
from synkro import Policy
|
|
73
|
+
|
|
74
|
+
# Determine if source is text, file, or URL
|
|
75
|
+
source_path = Path(source)
|
|
76
|
+
|
|
77
|
+
if source_path.exists():
|
|
78
|
+
# It's a file
|
|
79
|
+
policy = Policy.from_file(source_path)
|
|
80
|
+
elif source.startswith(("http://", "https://")):
|
|
81
|
+
# It's a URL
|
|
82
|
+
policy = Policy.from_url(source)
|
|
83
|
+
else:
|
|
84
|
+
# Treat as raw text
|
|
85
|
+
policy = Policy(text=source)
|
|
86
|
+
|
|
87
|
+
# Handle local LLM provider configuration
|
|
88
|
+
base_url = endpoint
|
|
89
|
+
effective_model = model
|
|
90
|
+
|
|
91
|
+
if provider:
|
|
92
|
+
# Format model string for LiteLLM if provider specified
|
|
93
|
+
if "/" not in model:
|
|
94
|
+
effective_model = f"{provider}/{model}"
|
|
95
|
+
|
|
96
|
+
# Use default endpoint if not specified
|
|
97
|
+
if not endpoint:
|
|
98
|
+
defaults = {
|
|
99
|
+
"ollama": "http://localhost:11434",
|
|
100
|
+
"vllm": "http://localhost:8000",
|
|
101
|
+
}
|
|
102
|
+
base_url = defaults.get(provider)
|
|
103
|
+
|
|
104
|
+
# Generate
|
|
105
|
+
dataset = synkro.generate(
|
|
106
|
+
policy,
|
|
107
|
+
traces=traces,
|
|
108
|
+
generation_model=effective_model,
|
|
109
|
+
enable_hitl=interactive,
|
|
110
|
+
base_url=base_url,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Save
|
|
114
|
+
if output:
|
|
115
|
+
dataset.save(output, format=format)
|
|
116
|
+
else:
|
|
117
|
+
dataset.save(format=format)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@app.command()
|
|
121
|
+
def demo():
|
|
122
|
+
"""
|
|
123
|
+
Run a quick demo with a built-in example policy.
|
|
124
|
+
"""
|
|
125
|
+
import synkro
|
|
126
|
+
from synkro.examples import EXPENSE_POLICY
|
|
127
|
+
from rich.console import Console
|
|
128
|
+
|
|
129
|
+
console = Console()
|
|
130
|
+
console.print("\n[cyan]Running demo with built-in expense policy...[/cyan]\n")
|
|
131
|
+
|
|
132
|
+
dataset = synkro.generate(EXPENSE_POLICY, traces=5)
|
|
133
|
+
dataset.save("demo_output.jsonl")
|
|
134
|
+
|
|
135
|
+
console.print("\n[green]Demo complete![/green]")
|
|
136
|
+
console.print("[dim]Check demo_output.jsonl for the generated training data.[/dim]\n")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@app.command()
|
|
140
|
+
def version():
|
|
141
|
+
"""Show version information."""
|
|
142
|
+
import synkro
|
|
143
|
+
from rich.console import Console
|
|
144
|
+
|
|
145
|
+
console = Console()
|
|
146
|
+
console.print(f"[cyan]synkro[/cyan] v{synkro.__version__}")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def main():
|
|
150
|
+
"""Entry point for the CLI."""
|
|
151
|
+
app()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
if __name__ == "__main__":
|
|
155
|
+
main()
|
|
156
|
+
|