synkro 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. synkro/__init__.py +179 -0
  2. synkro/advanced.py +186 -0
  3. synkro/cli.py +128 -0
  4. synkro/core/__init__.py +7 -0
  5. synkro/core/checkpoint.py +250 -0
  6. synkro/core/dataset.py +402 -0
  7. synkro/core/policy.py +337 -0
  8. synkro/errors.py +178 -0
  9. synkro/examples/__init__.py +148 -0
  10. synkro/factory.py +276 -0
  11. synkro/formatters/__init__.py +12 -0
  12. synkro/formatters/qa.py +98 -0
  13. synkro/formatters/sft.py +90 -0
  14. synkro/formatters/tool_call.py +127 -0
  15. synkro/generation/__init__.py +9 -0
  16. synkro/generation/follow_ups.py +134 -0
  17. synkro/generation/generator.py +220 -0
  18. synkro/generation/golden_responses.py +244 -0
  19. synkro/generation/golden_scenarios.py +276 -0
  20. synkro/generation/golden_tool_responses.py +416 -0
  21. synkro/generation/logic_extractor.py +126 -0
  22. synkro/generation/multiturn_responses.py +177 -0
  23. synkro/generation/planner.py +131 -0
  24. synkro/generation/responses.py +189 -0
  25. synkro/generation/scenarios.py +90 -0
  26. synkro/generation/tool_responses.py +376 -0
  27. synkro/generation/tool_simulator.py +114 -0
  28. synkro/interactive/__init__.py +12 -0
  29. synkro/interactive/hitl_session.py +77 -0
  30. synkro/interactive/logic_map_editor.py +173 -0
  31. synkro/interactive/rich_ui.py +205 -0
  32. synkro/llm/__init__.py +7 -0
  33. synkro/llm/client.py +235 -0
  34. synkro/llm/rate_limits.py +95 -0
  35. synkro/models/__init__.py +43 -0
  36. synkro/models/anthropic.py +26 -0
  37. synkro/models/google.py +19 -0
  38. synkro/models/openai.py +31 -0
  39. synkro/modes/__init__.py +15 -0
  40. synkro/modes/config.py +66 -0
  41. synkro/modes/qa.py +18 -0
  42. synkro/modes/sft.py +18 -0
  43. synkro/modes/tool_call.py +18 -0
  44. synkro/parsers.py +442 -0
  45. synkro/pipeline/__init__.py +20 -0
  46. synkro/pipeline/phases.py +592 -0
  47. synkro/pipeline/runner.py +424 -0
  48. synkro/pipelines.py +123 -0
  49. synkro/prompts/__init__.py +57 -0
  50. synkro/prompts/base.py +167 -0
  51. synkro/prompts/golden_templates.py +474 -0
  52. synkro/prompts/interactive_templates.py +65 -0
  53. synkro/prompts/multiturn_templates.py +156 -0
  54. synkro/prompts/qa_templates.py +97 -0
  55. synkro/prompts/templates.py +281 -0
  56. synkro/prompts/tool_templates.py +201 -0
  57. synkro/quality/__init__.py +14 -0
  58. synkro/quality/golden_refiner.py +163 -0
  59. synkro/quality/grader.py +153 -0
  60. synkro/quality/multiturn_grader.py +150 -0
  61. synkro/quality/refiner.py +137 -0
  62. synkro/quality/tool_grader.py +126 -0
  63. synkro/quality/tool_refiner.py +128 -0
  64. synkro/quality/verifier.py +228 -0
  65. synkro/reporting.py +537 -0
  66. synkro/schemas.py +472 -0
  67. synkro/types/__init__.py +41 -0
  68. synkro/types/core.py +126 -0
  69. synkro/types/dataset_type.py +30 -0
  70. synkro/types/logic_map.py +345 -0
  71. synkro/types/tool.py +94 -0
  72. synkro-0.4.12.data/data/examples/__init__.py +148 -0
  73. synkro-0.4.12.dist-info/METADATA +258 -0
  74. synkro-0.4.12.dist-info/RECORD +77 -0
  75. synkro-0.4.12.dist-info/WHEEL +4 -0
  76. synkro-0.4.12.dist-info/entry_points.txt +2 -0
  77. synkro-0.4.12.dist-info/licenses/LICENSE +21 -0
synkro/__init__.py ADDED
@@ -0,0 +1,179 @@
1
+ """
2
+ Synkro - Generate high-quality training datasets from any document.
3
+
4
+ Quick Start:
5
+ >>> import synkro
6
+ >>> dataset = synkro.generate("Your policy text...")
7
+ >>> dataset.save("training.jsonl")
8
+
9
+ Pipeline Usage (more control):
10
+ >>> from synkro import create_pipeline, DatasetType
11
+ >>> pipeline = create_pipeline(dataset_type=DatasetType.SFT)
12
+ >>> dataset = pipeline.generate("policy text", traces=50)
13
+
14
+ Access Logic Map (for inspection):
15
+ >>> result = pipeline.generate("policy text", return_logic_map=True)
16
+ >>> print(result.logic_map.rules) # See extracted rules
17
+ >>> dataset = result.dataset
18
+
19
+ Silent Mode:
20
+ >>> from synkro import SilentReporter, create_pipeline
21
+ >>> pipeline = create_pipeline(reporter=SilentReporter())
22
+
23
+ Progress Callbacks:
24
+ >>> from synkro import CallbackReporter, create_pipeline
25
+ >>> reporter = CallbackReporter(
26
+ ... on_progress=lambda event, data: print(f"{event}: {data}")
27
+ ... )
28
+ >>> pipeline = create_pipeline(reporter=reporter)
29
+
30
+ Tool Call Dataset:
31
+ >>> from synkro import create_pipeline, ToolDefinition, DatasetType
32
+ >>> tools = [ToolDefinition(name="search", description="...", parameters={})]
33
+ >>> pipeline = create_pipeline(dataset_type=DatasetType.TOOL_CALL, tools=tools)
34
+
35
+ Advanced Usage (power users):
36
+ >>> from synkro.advanced import LogicExtractor, TraceVerifier, LogicMap
37
+ >>> # Full access to Golden Trace internals
38
+ """
39
+
40
+ # Dynamic version from package metadata
41
+ try:
42
+ from importlib.metadata import version as _get_version
43
+ __version__ = _get_version("synkro")
44
+ except Exception:
45
+ __version__ = "0.4.6" # Fallback
46
+
47
+ # =============================================================================
48
+ # PRIMARY API - What most developers need
49
+ # =============================================================================
50
+
51
+ from synkro.pipelines import create_pipeline
52
+ from synkro.models import OpenAI, Anthropic, Google
53
+ from synkro.types import DatasetType
54
+ from synkro.core.policy import Policy
55
+ from synkro.core.dataset import Dataset
56
+ from synkro.reporting import SilentReporter, RichReporter, CallbackReporter
57
+
58
+ # Tool types (needed for TOOL_CALL dataset type)
59
+ from synkro.types import ToolDefinition
60
+
61
+ # =============================================================================
62
+ # SECONDARY API - Less commonly needed
63
+ # =============================================================================
64
+
65
+ from synkro.types import Message, Scenario, Trace, GradeResult, Plan, Category
66
+ from synkro.types import ToolCall, ToolFunction, ToolResult
67
+ from synkro.reporting import ProgressReporter
68
+
69
+ # GenerationResult for return_logic_map=True
70
+ from synkro.pipeline.runner import GenerationResult
71
+
72
+ __all__ = [
73
+ # Primary API
74
+ "create_pipeline",
75
+ "generate",
76
+ "DatasetType",
77
+ "Policy",
78
+ "Dataset",
79
+ "ToolDefinition",
80
+ # Reporters
81
+ "SilentReporter",
82
+ "RichReporter",
83
+ "CallbackReporter",
84
+ "ProgressReporter",
85
+ # Models
86
+ "OpenAI",
87
+ "Anthropic",
88
+ "Google",
89
+ # Result types
90
+ "GenerationResult",
91
+ # Data types (less common)
92
+ "Trace",
93
+ "Scenario",
94
+ "Message",
95
+ "GradeResult",
96
+ "Plan",
97
+ "Category",
98
+ "ToolCall",
99
+ "ToolFunction",
100
+ "ToolResult",
101
+ ]
102
+
103
+
104
+ # Note: For advanced usage (LogicMap, TraceVerifier, etc.), use:
105
+ # from synkro.advanced import ...
106
+
107
+
108
+ def generate(
109
+ policy: str | Policy,
110
+ traces: int = 20,
111
+ turns: int | str = "auto",
112
+ dataset_type: DatasetType = DatasetType.SFT,
113
+ generation_model: OpenAI | Anthropic | Google | str = OpenAI.GPT_5_MINI,
114
+ grading_model: OpenAI | Anthropic | Google | str = OpenAI.GPT_52,
115
+ max_iterations: int = 3,
116
+ skip_grading: bool = False,
117
+ reporter: ProgressReporter | None = None,
118
+ return_logic_map: bool = False,
119
+ enable_hitl: bool = True,
120
+ ) -> Dataset | GenerationResult:
121
+ """
122
+ Generate training traces from a policy document.
123
+
124
+ This is a convenience function. For more control, use create_pipeline().
125
+
126
+ Args:
127
+ policy: Policy text or Policy object
128
+ traces: Number of traces to generate (default: 20)
129
+ turns: Conversation turns per trace. Use int for fixed turns, or "auto"
130
+ for policy complexity-driven turns (Simple=1-2, Conditional=3, Complex=5+)
131
+ dataset_type: Type of dataset - SFT (default) or QA
132
+ generation_model: Model for generating (default: gpt-5-mini)
133
+ grading_model: Model for grading (default: gpt-5.2)
134
+ max_iterations: Max refinement iterations per trace (default: 3)
135
+ skip_grading: Skip grading phase for faster generation (default: False)
136
+ reporter: Progress reporter (default: RichReporter for console output)
137
+ return_logic_map: If True, return GenerationResult with Logic Map access
138
+ enable_hitl: Enable Human-in-the-Loop Logic Map editing (default: False)
139
+
140
+ Returns:
141
+ Dataset (default) or GenerationResult if return_logic_map=True
142
+
143
+ Example:
144
+ >>> import synkro
145
+ >>> dataset = synkro.generate("All expenses over $50 require approval")
146
+ >>> dataset.save("training.jsonl")
147
+
148
+ >>> # Access Logic Map
149
+ >>> result = synkro.generate(policy, return_logic_map=True)
150
+ >>> print(result.logic_map.rules)
151
+ >>> dataset = result.dataset
152
+
153
+ >>> # Multi-turn with fixed 3 turns
154
+ >>> dataset = synkro.generate(policy, turns=3)
155
+
156
+ >>> # Interactive Logic Map editing
157
+ >>> dataset = synkro.generate(policy, enable_hitl=True)
158
+
159
+ >>> # Silent mode
160
+ >>> from synkro import SilentReporter
161
+ >>> dataset = synkro.generate(policy, reporter=SilentReporter())
162
+ """
163
+ from synkro.generation.generator import Generator
164
+
165
+ if isinstance(policy, str):
166
+ policy = Policy(text=policy)
167
+
168
+ generator = Generator(
169
+ dataset_type=dataset_type,
170
+ generation_model=generation_model,
171
+ grading_model=grading_model,
172
+ max_iterations=max_iterations,
173
+ skip_grading=skip_grading,
174
+ reporter=reporter,
175
+ turns=turns,
176
+ enable_hitl=enable_hitl,
177
+ )
178
+
179
+ return generator.generate(policy, traces=traces, return_logic_map=return_logic_map)
synkro/advanced.py ADDED
@@ -0,0 +1,186 @@
1
+ """Advanced components for power users.
2
+
3
+ This module exposes internal components for developers who need fine-grained
4
+ control over the generation pipeline.
5
+
6
+ Usage:
7
+ from synkro.advanced import (
8
+ # Golden Trace components
9
+ LogicExtractor,
10
+ GoldenScenarioGenerator,
11
+ GoldenResponseGenerator,
12
+ TraceVerifier,
13
+ GoldenRefiner,
14
+
15
+ # Types
16
+ LogicMap,
17
+ Rule,
18
+ GoldenScenario,
19
+ VerificationResult,
20
+ GenerationResult,
21
+
22
+ # Pipeline internals
23
+ GenerationPipeline,
24
+ ComponentFactory,
25
+ )
26
+
27
+ Examples:
28
+ >>> # Extract Logic Map manually
29
+ >>> from synkro.advanced import LogicExtractor, LLM
30
+ >>> extractor = LogicExtractor(llm=LLM(model="gpt-4o"))
31
+ >>> logic_map = await extractor.extract(policy_text)
32
+ >>> print(logic_map.rules)
33
+
34
+ >>> # Verify a trace against Logic Map
35
+ >>> from synkro.advanced import TraceVerifier
36
+ >>> verifier = TraceVerifier()
37
+ >>> result = await verifier.verify(trace, logic_map, scenario)
38
+ >>> if not result.passed:
39
+ ... print(f"Failed: {result.issues}")
40
+ """
41
+
42
+ # Golden Trace components (The 4 Stages)
43
+ from synkro.generation.logic_extractor import LogicExtractor
44
+ from synkro.generation.golden_scenarios import GoldenScenarioGenerator
45
+ from synkro.generation.golden_responses import GoldenResponseGenerator
46
+ from synkro.generation.golden_tool_responses import GoldenToolCallResponseGenerator
47
+ from synkro.quality.verifier import TraceVerifier
48
+ from synkro.quality.golden_refiner import GoldenRefiner
49
+
50
+ # Logic Map types
51
+ from synkro.types.logic_map import (
52
+ LogicMap,
53
+ Rule,
54
+ RuleCategory,
55
+ GoldenScenario,
56
+ ScenarioType,
57
+ ReasoningStep,
58
+ VerificationResult,
59
+ )
60
+
61
+ # Pipeline internals
62
+ from synkro.pipeline.runner import GenerationPipeline, GenerationResult
63
+ from synkro.factory import ComponentFactory
64
+
65
+ # Pipeline phases
66
+ from synkro.pipeline.phases import (
67
+ PlanPhase,
68
+ LogicExtractionPhase,
69
+ GoldenScenarioPhase,
70
+ GoldenTracePhase,
71
+ GoldenToolCallPhase,
72
+ VerificationPhase,
73
+ )
74
+
75
+ # Low-level generators
76
+ from synkro.generation.generator import Generator
77
+ from synkro.generation.scenarios import ScenarioGenerator
78
+ from synkro.generation.responses import ResponseGenerator
79
+ from synkro.generation.planner import Planner
80
+ from synkro.generation.follow_ups import FollowUpGenerator
81
+ from synkro.generation.multiturn_responses import MultiTurnResponseGenerator
82
+
83
+ # Quality components
84
+ from synkro.quality.grader import Grader
85
+ from synkro.quality.refiner import Refiner
86
+ from synkro.quality.tool_grader import ToolCallGrader
87
+ from synkro.quality.tool_refiner import ToolCallRefiner
88
+ from synkro.quality.multiturn_grader import MultiTurnGrader
89
+
90
+ # LLM client
91
+ from synkro.llm.client import LLM
92
+
93
+ # Prompts (for customization)
94
+ from synkro.prompts import SystemPrompt, ScenarioPrompt, ResponsePrompt, GradePrompt
95
+ from synkro.prompts.golden_templates import (
96
+ LOGIC_EXTRACTION_PROMPT,
97
+ GOLDEN_SCENARIO_PROMPT,
98
+ GOLDEN_TRACE_PROMPT,
99
+ VERIFICATION_PROMPT,
100
+ GOLDEN_REFINE_PROMPT,
101
+ GOLDEN_TOOL_TRACE_PROMPT,
102
+ )
103
+
104
+ # Formatters
105
+ from synkro.formatters.sft import SFTFormatter
106
+ from synkro.formatters.qa import QAFormatter
107
+ from synkro.formatters.tool_call import ToolCallFormatter
108
+
109
+ # Schemas (for structured output)
110
+ from synkro.schemas import (
111
+ RuleExtraction,
112
+ LogicMapOutput,
113
+ GoldenScenarioOutput,
114
+ GoldenScenariosArray,
115
+ ReasoningStepOutput,
116
+ GoldenTraceOutput,
117
+ VerificationOutput,
118
+ )
119
+
120
+
121
+ __all__ = [
122
+ # Golden Trace components
123
+ "LogicExtractor",
124
+ "GoldenScenarioGenerator",
125
+ "GoldenResponseGenerator",
126
+ "GoldenToolCallResponseGenerator",
127
+ "TraceVerifier",
128
+ "GoldenRefiner",
129
+ # Logic Map types
130
+ "LogicMap",
131
+ "Rule",
132
+ "RuleCategory",
133
+ "GoldenScenario",
134
+ "ScenarioType",
135
+ "ReasoningStep",
136
+ "VerificationResult",
137
+ # Pipeline
138
+ "GenerationPipeline",
139
+ "GenerationResult",
140
+ "ComponentFactory",
141
+ # Phases
142
+ "PlanPhase",
143
+ "LogicExtractionPhase",
144
+ "GoldenScenarioPhase",
145
+ "GoldenTracePhase",
146
+ "GoldenToolCallPhase",
147
+ "VerificationPhase",
148
+ # Generators
149
+ "Generator",
150
+ "ScenarioGenerator",
151
+ "ResponseGenerator",
152
+ "Planner",
153
+ "FollowUpGenerator",
154
+ "MultiTurnResponseGenerator",
155
+ # Quality
156
+ "Grader",
157
+ "Refiner",
158
+ "ToolCallGrader",
159
+ "ToolCallRefiner",
160
+ "MultiTurnGrader",
161
+ # LLM
162
+ "LLM",
163
+ # Prompts
164
+ "SystemPrompt",
165
+ "ScenarioPrompt",
166
+ "ResponsePrompt",
167
+ "GradePrompt",
168
+ "LOGIC_EXTRACTION_PROMPT",
169
+ "GOLDEN_SCENARIO_PROMPT",
170
+ "GOLDEN_TRACE_PROMPT",
171
+ "VERIFICATION_PROMPT",
172
+ "GOLDEN_REFINE_PROMPT",
173
+ "GOLDEN_TOOL_TRACE_PROMPT",
174
+ # Formatters
175
+ "SFTFormatter",
176
+ "QAFormatter",
177
+ "ToolCallFormatter",
178
+ # Schemas
179
+ "RuleExtraction",
180
+ "LogicMapOutput",
181
+ "GoldenScenarioOutput",
182
+ "GoldenScenariosArray",
183
+ "ReasoningStepOutput",
184
+ "GoldenTraceOutput",
185
+ "VerificationOutput",
186
+ ]
synkro/cli.py ADDED
@@ -0,0 +1,128 @@
1
+ """Synkro CLI - Generate training data from the command line."""
2
+
3
+ import typer
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ app = typer.Typer(
8
+ name="synkro",
9
+ help="Generate training datasets from documents.",
10
+ no_args_is_help=True,
11
+ )
12
+
13
+
14
+ @app.command()
15
+ def generate(
16
+ source: str = typer.Argument(
17
+ ...,
18
+ help="Policy text, file path (.pdf, .docx, .txt, .md), folder path, or URL",
19
+ ),
20
+ output: Optional[Path] = typer.Option(
21
+ None,
22
+ "--output", "-o",
23
+ help="Output file path (auto-generated if not specified)",
24
+ ),
25
+ traces: int = typer.Option(
26
+ 20,
27
+ "--traces", "-n",
28
+ help="Number of traces to generate",
29
+ ),
30
+ format: str = typer.Option(
31
+ "sft",
32
+ "--format", "-f",
33
+ help="Output format: sft or qa",
34
+ ),
35
+ model: str = typer.Option(
36
+ "gpt-4o-mini",
37
+ "--model", "-m",
38
+ help="Model for generation (e.g., gpt-4o-mini, claude-3-5-sonnet, gemini-2.5-flash)",
39
+ ),
40
+ interactive: bool = typer.Option(
41
+ True,
42
+ "--interactive/--no-interactive", "-i/-I",
43
+ help="Enable interactive Logic Map editing before generation (enabled by default)",
44
+ ),
45
+ ):
46
+ """
47
+ Generate training data from a policy document.
48
+
49
+ Examples:
50
+
51
+ synkro generate policy.pdf
52
+
53
+ synkro generate policies/ # Load all files from folder
54
+
55
+ synkro generate "All expenses over $50 need approval" --traces 50
56
+
57
+ synkro generate handbook.docx -o training.jsonl -n 100
58
+
59
+ synkro generate policy.pdf --interactive # Review and edit Logic Map
60
+ """
61
+ import synkro
62
+ from synkro import Policy
63
+
64
+ # Determine if source is text, file, or URL
65
+ source_path = Path(source)
66
+
67
+ if source_path.exists():
68
+ # It's a file
69
+ policy = Policy.from_file(source_path)
70
+ elif source.startswith(("http://", "https://")):
71
+ # It's a URL
72
+ policy = Policy.from_url(source)
73
+ else:
74
+ # Treat as raw text
75
+ policy = Policy(text=source)
76
+
77
+ # Generate
78
+ dataset = synkro.generate(
79
+ policy,
80
+ traces=traces,
81
+ generation_model=model,
82
+ enable_hitl=interactive,
83
+ )
84
+
85
+ # Save
86
+ if output:
87
+ dataset.save(output, format=format)
88
+ else:
89
+ dataset.save(format=format)
90
+
91
+
92
+ @app.command()
93
+ def demo():
94
+ """
95
+ Run a quick demo with a built-in example policy.
96
+ """
97
+ import synkro
98
+ from synkro.examples import EXPENSE_POLICY
99
+ from rich.console import Console
100
+
101
+ console = Console()
102
+ console.print("\n[cyan]Running demo with built-in expense policy...[/cyan]\n")
103
+
104
+ dataset = synkro.generate(EXPENSE_POLICY, traces=5)
105
+ dataset.save("demo_output.jsonl")
106
+
107
+ console.print("\n[green]Demo complete![/green]")
108
+ console.print("[dim]Check demo_output.jsonl for the generated training data.[/dim]\n")
109
+
110
+
111
+ @app.command()
112
+ def version():
113
+ """Show version information."""
114
+ import synkro
115
+ from rich.console import Console
116
+
117
+ console = Console()
118
+ console.print(f"[cyan]synkro[/cyan] v{synkro.__version__}")
119
+
120
+
121
+ def main():
122
+ """Entry point for the CLI."""
123
+ app()
124
+
125
+
126
+ if __name__ == "__main__":
127
+ main()
128
+
@@ -0,0 +1,7 @@
1
+ """Core classes for policy and dataset management."""
2
+
3
+ from synkro.core.policy import Policy
4
+ from synkro.core.dataset import Dataset
5
+
6
+ __all__ = ["Policy", "Dataset"]
7
+