synkro 0.4.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synkro might be problematic. Click here for more details.

Files changed (81) hide show
  1. synkro/__init__.py +331 -0
  2. synkro/advanced.py +184 -0
  3. synkro/cli.py +156 -0
  4. synkro/core/__init__.py +7 -0
  5. synkro/core/checkpoint.py +250 -0
  6. synkro/core/dataset.py +432 -0
  7. synkro/core/policy.py +337 -0
  8. synkro/errors.py +178 -0
  9. synkro/examples/__init__.py +148 -0
  10. synkro/factory.py +291 -0
  11. synkro/formatters/__init__.py +18 -0
  12. synkro/formatters/chatml.py +121 -0
  13. synkro/formatters/langfuse.py +98 -0
  14. synkro/formatters/langsmith.py +98 -0
  15. synkro/formatters/qa.py +112 -0
  16. synkro/formatters/sft.py +90 -0
  17. synkro/formatters/tool_call.py +127 -0
  18. synkro/generation/__init__.py +9 -0
  19. synkro/generation/follow_ups.py +134 -0
  20. synkro/generation/generator.py +314 -0
  21. synkro/generation/golden_responses.py +269 -0
  22. synkro/generation/golden_scenarios.py +333 -0
  23. synkro/generation/golden_tool_responses.py +791 -0
  24. synkro/generation/logic_extractor.py +126 -0
  25. synkro/generation/multiturn_responses.py +177 -0
  26. synkro/generation/planner.py +131 -0
  27. synkro/generation/responses.py +189 -0
  28. synkro/generation/scenarios.py +90 -0
  29. synkro/generation/tool_responses.py +625 -0
  30. synkro/generation/tool_simulator.py +114 -0
  31. synkro/interactive/__init__.py +16 -0
  32. synkro/interactive/hitl_session.py +205 -0
  33. synkro/interactive/intent_classifier.py +94 -0
  34. synkro/interactive/logic_map_editor.py +176 -0
  35. synkro/interactive/rich_ui.py +459 -0
  36. synkro/interactive/scenario_editor.py +198 -0
  37. synkro/llm/__init__.py +7 -0
  38. synkro/llm/client.py +309 -0
  39. synkro/llm/rate_limits.py +99 -0
  40. synkro/models/__init__.py +50 -0
  41. synkro/models/anthropic.py +26 -0
  42. synkro/models/google.py +19 -0
  43. synkro/models/local.py +104 -0
  44. synkro/models/openai.py +31 -0
  45. synkro/modes/__init__.py +13 -0
  46. synkro/modes/config.py +66 -0
  47. synkro/modes/conversation.py +35 -0
  48. synkro/modes/tool_call.py +18 -0
  49. synkro/parsers.py +442 -0
  50. synkro/pipeline/__init__.py +20 -0
  51. synkro/pipeline/phases.py +592 -0
  52. synkro/pipeline/runner.py +769 -0
  53. synkro/pipelines.py +136 -0
  54. synkro/prompts/__init__.py +57 -0
  55. synkro/prompts/base.py +167 -0
  56. synkro/prompts/golden_templates.py +533 -0
  57. synkro/prompts/interactive_templates.py +198 -0
  58. synkro/prompts/multiturn_templates.py +156 -0
  59. synkro/prompts/templates.py +281 -0
  60. synkro/prompts/tool_templates.py +318 -0
  61. synkro/quality/__init__.py +14 -0
  62. synkro/quality/golden_refiner.py +163 -0
  63. synkro/quality/grader.py +153 -0
  64. synkro/quality/multiturn_grader.py +150 -0
  65. synkro/quality/refiner.py +137 -0
  66. synkro/quality/tool_grader.py +126 -0
  67. synkro/quality/tool_refiner.py +128 -0
  68. synkro/quality/verifier.py +228 -0
  69. synkro/reporting.py +464 -0
  70. synkro/schemas.py +521 -0
  71. synkro/types/__init__.py +43 -0
  72. synkro/types/core.py +153 -0
  73. synkro/types/dataset_type.py +33 -0
  74. synkro/types/logic_map.py +348 -0
  75. synkro/types/tool.py +94 -0
  76. synkro-0.4.36.data/data/examples/__init__.py +148 -0
  77. synkro-0.4.36.dist-info/METADATA +507 -0
  78. synkro-0.4.36.dist-info/RECORD +81 -0
  79. synkro-0.4.36.dist-info/WHEEL +4 -0
  80. synkro-0.4.36.dist-info/entry_points.txt +2 -0
  81. synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
synkro/__init__.py ADDED
@@ -0,0 +1,331 @@
1
+ """
2
+ Synkro - Generate high-quality training datasets from any document.
3
+
4
+ Quick Start:
5
+ >>> import synkro
6
+ >>> dataset = synkro.generate("Your policy text...")
7
+ >>> dataset.save("training.jsonl")
8
+
9
+ Pipeline Usage (more control):
10
+ >>> from synkro import create_pipeline, DatasetType
11
+ >>> pipeline = create_pipeline(dataset_type=DatasetType.CONVERSATION)
12
+ >>> dataset = pipeline.generate("policy text", traces=50)
13
+
14
+ Access Logic Map (for inspection):
15
+ >>> result = pipeline.generate("policy text", return_logic_map=True)
16
+ >>> print(result.logic_map.rules) # See extracted rules
17
+ >>> dataset = result.dataset
18
+
19
+ Silent Mode:
20
+ >>> from synkro import SilentReporter, create_pipeline
21
+ >>> pipeline = create_pipeline(reporter=SilentReporter())
22
+
23
+ Progress Callbacks:
24
+ >>> from synkro import CallbackReporter, create_pipeline
25
+ >>> reporter = CallbackReporter(
26
+ ... on_progress=lambda event, data: print(f"{event}: {data}")
27
+ ... )
28
+ >>> pipeline = create_pipeline(reporter=reporter)
29
+
30
+ Tool Call Dataset:
31
+ >>> from synkro import create_pipeline, ToolDefinition, DatasetType
32
+ >>> tools = [ToolDefinition(name="search", description="...", parameters={})]
33
+ >>> pipeline = create_pipeline(dataset_type=DatasetType.TOOL_CALL, tools=tools)
34
+
35
+ Eval Dataset Generation:
36
+ >>> import synkro
37
+ >>> result = synkro.generate_scenarios("Your policy...", count=100)
38
+ >>> for scenario in result.scenarios:
39
+ ... response = my_model(scenario.user_message)
40
+ ... grade = synkro.grade(response, scenario, policy)
41
+
42
+ Advanced Usage (power users):
43
+ >>> from synkro.advanced import LogicExtractor, TraceVerifier, LogicMap
44
+ >>> # Full access to Golden Trace internals
45
+ """
46
+
47
+ # Dynamic version from package metadata
48
+ try:
49
+ from importlib.metadata import version as _get_version
50
+ __version__ = _get_version("synkro")
51
+ except Exception:
52
+ __version__ = "0.4.22" # Fallback
53
+
54
+ # =============================================================================
55
+ # PRIMARY API - What most developers need
56
+ # =============================================================================
57
+
58
+ from synkro.pipelines import create_pipeline
59
+ from synkro.models import OpenAI, Anthropic, Google, Local, LocalModel
60
+ from synkro.llm import LLM
61
+ from synkro.types import DatasetType
62
+ from synkro.core.policy import Policy
63
+ from synkro.core.dataset import Dataset
64
+ from synkro.reporting import SilentReporter, RichReporter, CallbackReporter
65
+
66
+ # Tool types (needed for TOOL_CALL dataset type)
67
+ from synkro.types import ToolDefinition
68
+
69
+ # =============================================================================
70
+ # SECONDARY API - Less commonly needed
71
+ # =============================================================================
72
+
73
+ from synkro.types import Message, Scenario, EvalScenario, Trace, GradeResult, Plan, Category
74
+ from synkro.types import ToolCall, ToolFunction, ToolResult
75
+ from synkro.reporting import ProgressReporter
76
+
77
+ # GenerationResult for return_logic_map=True
78
+ from synkro.pipeline.runner import GenerationResult, ScenariosResult
79
+
80
+ __all__ = [
81
+ # Primary API
82
+ "create_pipeline",
83
+ "generate",
84
+ "generate_scenarios",
85
+ "grade",
86
+ "DatasetType",
87
+ "Policy",
88
+ "Dataset",
89
+ "ToolDefinition",
90
+ # Reporters
91
+ "SilentReporter",
92
+ "RichReporter",
93
+ "CallbackReporter",
94
+ "ProgressReporter",
95
+ # Models
96
+ "OpenAI",
97
+ "Anthropic",
98
+ "Google",
99
+ "Local",
100
+ "LocalModel",
101
+ "LLM",
102
+ # Result types
103
+ "GenerationResult",
104
+ "ScenariosResult",
105
+ # Data types (less common)
106
+ "Trace",
107
+ "Scenario",
108
+ "EvalScenario",
109
+ "Message",
110
+ "GradeResult",
111
+ "Plan",
112
+ "Category",
113
+ "ToolCall",
114
+ "ToolFunction",
115
+ "ToolResult",
116
+ ]
117
+
118
+
119
+ # Note: For advanced usage (LogicMap, TraceVerifier, etc.), use:
120
+ # from synkro.advanced import ...
121
+
122
+
123
+ def generate(
124
+ policy: str | Policy,
125
+ traces: int = 20,
126
+ turns: int | str = "auto",
127
+ dataset_type: DatasetType = DatasetType.CONVERSATION,
128
+ generation_model: OpenAI | Anthropic | Google | LocalModel | str = OpenAI.GPT_5_MINI,
129
+ grading_model: OpenAI | Anthropic | Google | LocalModel | str = OpenAI.GPT_52,
130
+ max_iterations: int = 3,
131
+ skip_grading: bool = False,
132
+ reporter: ProgressReporter | None = None,
133
+ return_logic_map: bool = False,
134
+ enable_hitl: bool = True,
135
+ base_url: str | None = None,
136
+ temperature: float = 0.7,
137
+ ) -> Dataset | GenerationResult:
138
+ """
139
+ Generate training traces from a policy document.
140
+
141
+ This is a convenience function. For more control, use create_pipeline().
142
+
143
+ Args:
144
+ policy: Policy text or Policy object
145
+ traces: Number of traces to generate (default: 20)
146
+ turns: Conversation turns per trace. Use int for fixed turns, or "auto"
147
+ for policy complexity-driven turns (Simple=1-2, Conditional=3, Complex=5+)
148
+ dataset_type: Type of dataset - CONVERSATION (default), INSTRUCTION, or TOOL_CALL
149
+ generation_model: Model for generating (default: gpt-5-mini)
150
+ grading_model: Model for grading (default: gpt-5.2)
151
+ max_iterations: Max refinement iterations per trace (default: 3)
152
+ skip_grading: Skip grading phase for faster generation (default: False)
153
+ reporter: Progress reporter (default: RichReporter for console output)
154
+ return_logic_map: If True, return GenerationResult with Logic Map access
155
+ enable_hitl: Enable Human-in-the-Loop Logic Map editing (default: False)
156
+ base_url: Optional API base URL for local LLM providers (Ollama, vLLM, etc.)
157
+ temperature: Sampling temperature for generation (0.0-2.0, default: 0.7).
158
+ Lower values (0.1-0.3) produce more deterministic outputs for eval datasets.
159
+ Higher values (0.7-1.0) produce more diverse outputs for training data.
160
+
161
+ Returns:
162
+ Dataset (default) or GenerationResult if return_logic_map=True
163
+
164
+ Example:
165
+ >>> import synkro
166
+ >>> dataset = synkro.generate("All expenses over $50 require approval")
167
+ >>> dataset.save("training.jsonl")
168
+
169
+ >>> # Access Logic Map
170
+ >>> result = synkro.generate(policy, return_logic_map=True)
171
+ >>> print(result.logic_map.rules)
172
+ >>> dataset = result.dataset
173
+
174
+ >>> # Multi-turn with fixed 3 turns
175
+ >>> dataset = synkro.generate(policy, turns=3)
176
+
177
+ >>> # Interactive Logic Map editing
178
+ >>> dataset = synkro.generate(policy, enable_hitl=True)
179
+
180
+ >>> # Silent mode
181
+ >>> from synkro import SilentReporter
182
+ >>> dataset = synkro.generate(policy, reporter=SilentReporter())
183
+ """
184
+ from synkro.generation.generator import Generator
185
+
186
+ if isinstance(policy, str):
187
+ policy = Policy(text=policy)
188
+
189
+ generator = Generator(
190
+ dataset_type=dataset_type,
191
+ generation_model=generation_model,
192
+ grading_model=grading_model,
193
+ max_iterations=max_iterations,
194
+ skip_grading=skip_grading,
195
+ reporter=reporter,
196
+ turns=turns,
197
+ enable_hitl=enable_hitl,
198
+ base_url=base_url,
199
+ temperature=temperature,
200
+ )
201
+
202
+ return generator.generate(policy, traces=traces, return_logic_map=return_logic_map)
203
+
204
+
205
+ def generate_scenarios(
206
+ policy: str | Policy,
207
+ count: int = 100,
208
+ generation_model: OpenAI | Anthropic | Google | LocalModel | str = OpenAI.GPT_4O_MINI,
209
+ temperature: float = 0.8,
210
+ reporter: ProgressReporter | None = None,
211
+ enable_hitl: bool = False,
212
+ base_url: str | None = None,
213
+ ) -> ScenariosResult:
214
+ """
215
+ Generate eval scenarios from a policy without synthetic responses.
216
+
217
+ This is the eval-focused API. It generates diverse test scenarios with
218
+ ground truth labels (expected outcomes, target rules) but does NOT generate
219
+ synthetic responses. Use synkro.grade() to evaluate your own model's outputs.
220
+
221
+ Args:
222
+ policy: Policy text or Policy object
223
+ count: Number of scenarios to generate (default: 100)
224
+ generation_model: Model for generation (default: gpt-4o-mini)
225
+ temperature: Sampling temperature (default: 0.8 for scenario diversity)
226
+ reporter: Progress reporter (default: RichReporter for console output)
227
+ enable_hitl: Enable Human-in-the-Loop editing (default: False)
228
+ base_url: Optional API base URL for local LLM providers
229
+
230
+ Returns:
231
+ ScenariosResult with scenarios, logic_map, and distribution
232
+
233
+ Example:
234
+ >>> import synkro
235
+ >>> result = synkro.generate_scenarios("Your policy...", count=100)
236
+ >>>
237
+ >>> for scenario in result.scenarios:
238
+ ... # Run YOUR model
239
+ ... response = my_model(scenario.user_message)
240
+ ...
241
+ ... # Grade the response
242
+ ... grade = synkro.grade(response, scenario, policy)
243
+ ... print(f"Passed: {grade.passed}")
244
+ """
245
+ from synkro.generation.generator import Generator
246
+
247
+ if isinstance(policy, str):
248
+ policy = Policy(text=policy)
249
+
250
+ generator = Generator(
251
+ dataset_type=DatasetType.CONVERSATION, # Type doesn't matter for scenarios-only
252
+ generation_model=generation_model,
253
+ grading_model=generation_model, # Not used but required
254
+ skip_grading=True,
255
+ reporter=reporter,
256
+ enable_hitl=enable_hitl,
257
+ base_url=base_url,
258
+ temperature=temperature,
259
+ )
260
+
261
+ return generator.generate_scenarios(policy, count=count)
262
+
263
+
264
+ def grade(
265
+ response: str,
266
+ scenario: EvalScenario,
267
+ policy: str | Policy,
268
+ model: OpenAI | Anthropic | Google | LocalModel | str = OpenAI.GPT_4O,
269
+ base_url: str | None = None,
270
+ ) -> GradeResult:
271
+ """
272
+ Grade an external model's response against a scenario and policy.
273
+
274
+ Use this to evaluate your own model's outputs against scenarios
275
+ generated by synkro.generate_scenarios().
276
+
277
+ Args:
278
+ response: The response from the model being evaluated
279
+ scenario: The eval scenario with expected_outcome and target_rules
280
+ policy: The policy document for grading context
281
+ model: LLM to use for grading (default: gpt-4o, stronger = better)
282
+ base_url: Optional API base URL for local LLM providers
283
+
284
+ Returns:
285
+ GradeResult with passed, feedback, and issues
286
+
287
+ Example:
288
+ >>> scenarios = synkro.generate_scenarios(policy, count=100)
289
+ >>> for scenario in scenarios:
290
+ ... response = my_model(scenario.user_message)
291
+ ... grade = synkro.grade(response, scenario, policy)
292
+ ... if not grade.passed:
293
+ ... print(f"Failed: {grade.feedback}")
294
+ """
295
+ import asyncio
296
+ from synkro.llm.client import LLM
297
+ from synkro.quality.grader import Grader
298
+ from synkro.types.core import Trace, Message, Scenario as BaseScenario
299
+
300
+ if isinstance(policy, str):
301
+ policy_text = policy
302
+ else:
303
+ policy_text = policy.text
304
+
305
+ # Create grader with specified model
306
+ grading_llm = LLM(model=model, base_url=base_url, temperature=0.1)
307
+ grader = Grader(llm=grading_llm)
308
+
309
+ # Build a Trace object from the scenario and response
310
+ base_scenario = BaseScenario(
311
+ description=scenario.user_message,
312
+ context=scenario.context,
313
+ category=scenario.category,
314
+ scenario_type=scenario.scenario_type,
315
+ target_rule_ids=scenario.target_rule_ids,
316
+ expected_outcome=scenario.expected_outcome,
317
+ )
318
+
319
+ trace = Trace(
320
+ messages=[
321
+ Message(role="user", content=scenario.user_message),
322
+ Message(role="assistant", content=response),
323
+ ],
324
+ scenario=base_scenario,
325
+ )
326
+
327
+ # Run grading
328
+ async def _grade():
329
+ return await grader.grade(trace, policy_text)
330
+
331
+ return asyncio.run(_grade())
synkro/advanced.py ADDED
@@ -0,0 +1,184 @@
1
+ """Advanced components for power users.
2
+
3
+ This module exposes internal components for developers who need fine-grained
4
+ control over the generation pipeline.
5
+
6
+ Usage:
7
+ from synkro.advanced import (
8
+ # Golden Trace components
9
+ LogicExtractor,
10
+ GoldenScenarioGenerator,
11
+ GoldenResponseGenerator,
12
+ TraceVerifier,
13
+ GoldenRefiner,
14
+
15
+ # Types
16
+ LogicMap,
17
+ Rule,
18
+ GoldenScenario,
19
+ VerificationResult,
20
+ GenerationResult,
21
+
22
+ # Pipeline internals
23
+ GenerationPipeline,
24
+ ComponentFactory,
25
+ )
26
+
27
+ Examples:
28
+ >>> # Extract Logic Map manually
29
+ >>> from synkro.advanced import LogicExtractor, LLM
30
+ >>> extractor = LogicExtractor(llm=LLM(model="gpt-4o"))
31
+ >>> logic_map = await extractor.extract(policy_text)
32
+ >>> print(logic_map.rules)
33
+
34
+ >>> # Verify a trace against Logic Map
35
+ >>> from synkro.advanced import TraceVerifier
36
+ >>> verifier = TraceVerifier()
37
+ >>> result = await verifier.verify(trace, logic_map, scenario)
38
+ >>> if not result.passed:
39
+ ... print(f"Failed: {result.issues}")
40
+ """
41
+
42
+ # Golden Trace components (The 4 Stages)
43
+ from synkro.generation.logic_extractor import LogicExtractor
44
+ from synkro.generation.golden_scenarios import GoldenScenarioGenerator
45
+ from synkro.generation.golden_responses import GoldenResponseGenerator
46
+ from synkro.generation.golden_tool_responses import GoldenToolCallResponseGenerator
47
+ from synkro.quality.verifier import TraceVerifier
48
+ from synkro.quality.golden_refiner import GoldenRefiner
49
+
50
+ # Logic Map types
51
+ from synkro.types.logic_map import (
52
+ LogicMap,
53
+ Rule,
54
+ RuleCategory,
55
+ GoldenScenario,
56
+ ScenarioType,
57
+ ReasoningStep,
58
+ VerificationResult,
59
+ )
60
+
61
+ # Pipeline internals
62
+ from synkro.pipeline.runner import GenerationPipeline, GenerationResult
63
+ from synkro.factory import ComponentFactory
64
+
65
+ # Pipeline phases
66
+ from synkro.pipeline.phases import (
67
+ PlanPhase,
68
+ LogicExtractionPhase,
69
+ GoldenScenarioPhase,
70
+ GoldenTracePhase,
71
+ GoldenToolCallPhase,
72
+ VerificationPhase,
73
+ )
74
+
75
+ # Low-level generators
76
+ from synkro.generation.generator import Generator
77
+ from synkro.generation.scenarios import ScenarioGenerator
78
+ from synkro.generation.responses import ResponseGenerator
79
+ from synkro.generation.planner import Planner
80
+ from synkro.generation.follow_ups import FollowUpGenerator
81
+ from synkro.generation.multiturn_responses import MultiTurnResponseGenerator
82
+
83
+ # Quality components
84
+ from synkro.quality.grader import Grader
85
+ from synkro.quality.refiner import Refiner
86
+ from synkro.quality.tool_grader import ToolCallGrader
87
+ from synkro.quality.tool_refiner import ToolCallRefiner
88
+ from synkro.quality.multiturn_grader import MultiTurnGrader
89
+
90
+ # LLM client
91
+ from synkro.llm.client import LLM
92
+
93
+ # Prompts (for customization)
94
+ from synkro.prompts import SystemPrompt, ScenarioPrompt, ResponsePrompt, GradePrompt
95
+ from synkro.prompts.golden_templates import (
96
+ LOGIC_EXTRACTION_PROMPT,
97
+ GOLDEN_SCENARIO_PROMPT,
98
+ GOLDEN_TRACE_PROMPT,
99
+ VERIFICATION_PROMPT,
100
+ GOLDEN_REFINE_PROMPT,
101
+ GOLDEN_TOOL_TRACE_PROMPT,
102
+ )
103
+
104
+ # Formatters
105
+ from synkro.formatters.sft import SFTFormatter
106
+ from synkro.formatters.tool_call import ToolCallFormatter
107
+
108
+ # Schemas (for structured output)
109
+ from synkro.schemas import (
110
+ RuleExtraction,
111
+ LogicMapOutput,
112
+ GoldenScenarioOutput,
113
+ GoldenScenariosArray,
114
+ ReasoningStepOutput,
115
+ GoldenTraceOutput,
116
+ VerificationOutput,
117
+ )
118
+
119
+
120
+ __all__ = [
121
+ # Golden Trace components
122
+ "LogicExtractor",
123
+ "GoldenScenarioGenerator",
124
+ "GoldenResponseGenerator",
125
+ "GoldenToolCallResponseGenerator",
126
+ "TraceVerifier",
127
+ "GoldenRefiner",
128
+ # Logic Map types
129
+ "LogicMap",
130
+ "Rule",
131
+ "RuleCategory",
132
+ "GoldenScenario",
133
+ "ScenarioType",
134
+ "ReasoningStep",
135
+ "VerificationResult",
136
+ # Pipeline
137
+ "GenerationPipeline",
138
+ "GenerationResult",
139
+ "ComponentFactory",
140
+ # Phases
141
+ "PlanPhase",
142
+ "LogicExtractionPhase",
143
+ "GoldenScenarioPhase",
144
+ "GoldenTracePhase",
145
+ "GoldenToolCallPhase",
146
+ "VerificationPhase",
147
+ # Generators
148
+ "Generator",
149
+ "ScenarioGenerator",
150
+ "ResponseGenerator",
151
+ "Planner",
152
+ "FollowUpGenerator",
153
+ "MultiTurnResponseGenerator",
154
+ # Quality
155
+ "Grader",
156
+ "Refiner",
157
+ "ToolCallGrader",
158
+ "ToolCallRefiner",
159
+ "MultiTurnGrader",
160
+ # LLM
161
+ "LLM",
162
+ # Prompts
163
+ "SystemPrompt",
164
+ "ScenarioPrompt",
165
+ "ResponsePrompt",
166
+ "GradePrompt",
167
+ "LOGIC_EXTRACTION_PROMPT",
168
+ "GOLDEN_SCENARIO_PROMPT",
169
+ "GOLDEN_TRACE_PROMPT",
170
+ "VERIFICATION_PROMPT",
171
+ "GOLDEN_REFINE_PROMPT",
172
+ "GOLDEN_TOOL_TRACE_PROMPT",
173
+ # Formatters
174
+ "SFTFormatter",
175
+ "ToolCallFormatter",
176
+ # Schemas
177
+ "RuleExtraction",
178
+ "LogicMapOutput",
179
+ "GoldenScenarioOutput",
180
+ "GoldenScenariosArray",
181
+ "ReasoningStepOutput",
182
+ "GoldenTraceOutput",
183
+ "VerificationOutput",
184
+ ]
synkro/cli.py ADDED
@@ -0,0 +1,156 @@
1
+ """Synkro CLI - Generate training data from the command line."""
2
+
3
+ import typer
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ app = typer.Typer(
8
+ name="synkro",
9
+ help="Generate training datasets from documents.",
10
+ no_args_is_help=True,
11
+ )
12
+
13
+
14
+ @app.command()
15
+ def generate(
16
+ source: str = typer.Argument(
17
+ ...,
18
+ help="Policy text, file path (.pdf, .docx, .txt, .md), folder path, or URL",
19
+ ),
20
+ output: Optional[Path] = typer.Option(
21
+ None,
22
+ "--output", "-o",
23
+ help="Output file path (auto-generated if not specified)",
24
+ ),
25
+ traces: int = typer.Option(
26
+ 20,
27
+ "--traces", "-n",
28
+ help="Number of traces to generate",
29
+ ),
30
+ format: str = typer.Option(
31
+ "sft",
32
+ "--format", "-f",
33
+ help="Output format: sft or qa",
34
+ ),
35
+ model: str = typer.Option(
36
+ "gpt-4o-mini",
37
+ "--model", "-m",
38
+ help="Model for generation (e.g., gpt-4o-mini, claude-3-5-sonnet, gemini-2.5-flash, llama3.1)",
39
+ ),
40
+ provider: Optional[str] = typer.Option(
41
+ None,
42
+ "--provider", "-p",
43
+ help="LLM provider for local models (ollama, vllm)",
44
+ ),
45
+ endpoint: Optional[str] = typer.Option(
46
+ None,
47
+ "--endpoint", "-e",
48
+ help="API endpoint URL (e.g., http://localhost:11434)",
49
+ ),
50
+ interactive: bool = typer.Option(
51
+ True,
52
+ "--interactive/--no-interactive", "-i/-I",
53
+ help="Enable interactive Logic Map editing before generation (enabled by default)",
54
+ ),
55
+ ):
56
+ """
57
+ Generate training data from a policy document.
58
+
59
+ Examples:
60
+
61
+ synkro generate policy.pdf
62
+
63
+ synkro generate policies/ # Load all files from folder
64
+
65
+ synkro generate "All expenses over $50 need approval" --traces 50
66
+
67
+ synkro generate handbook.docx -o training.jsonl -n 100
68
+
69
+ synkro generate policy.pdf --interactive # Review and edit Logic Map
70
+ """
71
+ import synkro
72
+ from synkro import Policy
73
+
74
+ # Determine if source is text, file, or URL
75
+ source_path = Path(source)
76
+
77
+ if source_path.exists():
78
+ # It's a file
79
+ policy = Policy.from_file(source_path)
80
+ elif source.startswith(("http://", "https://")):
81
+ # It's a URL
82
+ policy = Policy.from_url(source)
83
+ else:
84
+ # Treat as raw text
85
+ policy = Policy(text=source)
86
+
87
+ # Handle local LLM provider configuration
88
+ base_url = endpoint
89
+ effective_model = model
90
+
91
+ if provider:
92
+ # Format model string for LiteLLM if provider specified
93
+ if "/" not in model:
94
+ effective_model = f"{provider}/{model}"
95
+
96
+ # Use default endpoint if not specified
97
+ if not endpoint:
98
+ defaults = {
99
+ "ollama": "http://localhost:11434",
100
+ "vllm": "http://localhost:8000",
101
+ }
102
+ base_url = defaults.get(provider)
103
+
104
+ # Generate
105
+ dataset = synkro.generate(
106
+ policy,
107
+ traces=traces,
108
+ generation_model=effective_model,
109
+ enable_hitl=interactive,
110
+ base_url=base_url,
111
+ )
112
+
113
+ # Save
114
+ if output:
115
+ dataset.save(output, format=format)
116
+ else:
117
+ dataset.save(format=format)
118
+
119
+
120
+ @app.command()
121
+ def demo():
122
+ """
123
+ Run a quick demo with a built-in example policy.
124
+ """
125
+ import synkro
126
+ from synkro.examples import EXPENSE_POLICY
127
+ from rich.console import Console
128
+
129
+ console = Console()
130
+ console.print("\n[cyan]Running demo with built-in expense policy...[/cyan]\n")
131
+
132
+ dataset = synkro.generate(EXPENSE_POLICY, traces=5)
133
+ dataset.save("demo_output.jsonl")
134
+
135
+ console.print("\n[green]Demo complete![/green]")
136
+ console.print("[dim]Check demo_output.jsonl for the generated training data.[/dim]\n")
137
+
138
+
139
+ @app.command()
140
+ def version():
141
+ """Show version information."""
142
+ import synkro
143
+ from rich.console import Console
144
+
145
+ console = Console()
146
+ console.print(f"[cyan]synkro[/cyan] v{synkro.__version__}")
147
+
148
+
149
+ def main():
150
+ """Entry point for the CLI."""
151
+ app()
152
+
153
+
154
+ if __name__ == "__main__":
155
+ main()
156
+
@@ -0,0 +1,7 @@
1
+ """Core classes for policy and dataset management."""
2
+
3
+ from synkro.core.policy import Policy
4
+ from synkro.core.dataset import Dataset
5
+
6
+ __all__ = ["Policy", "Dataset"]
7
+