synkro 0.4.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synkro might be problematic. Click here for more details.

Files changed (81) hide show
  1. synkro/__init__.py +331 -0
  2. synkro/advanced.py +184 -0
  3. synkro/cli.py +156 -0
  4. synkro/core/__init__.py +7 -0
  5. synkro/core/checkpoint.py +250 -0
  6. synkro/core/dataset.py +432 -0
  7. synkro/core/policy.py +337 -0
  8. synkro/errors.py +178 -0
  9. synkro/examples/__init__.py +148 -0
  10. synkro/factory.py +291 -0
  11. synkro/formatters/__init__.py +18 -0
  12. synkro/formatters/chatml.py +121 -0
  13. synkro/formatters/langfuse.py +98 -0
  14. synkro/formatters/langsmith.py +98 -0
  15. synkro/formatters/qa.py +112 -0
  16. synkro/formatters/sft.py +90 -0
  17. synkro/formatters/tool_call.py +127 -0
  18. synkro/generation/__init__.py +9 -0
  19. synkro/generation/follow_ups.py +134 -0
  20. synkro/generation/generator.py +314 -0
  21. synkro/generation/golden_responses.py +269 -0
  22. synkro/generation/golden_scenarios.py +333 -0
  23. synkro/generation/golden_tool_responses.py +791 -0
  24. synkro/generation/logic_extractor.py +126 -0
  25. synkro/generation/multiturn_responses.py +177 -0
  26. synkro/generation/planner.py +131 -0
  27. synkro/generation/responses.py +189 -0
  28. synkro/generation/scenarios.py +90 -0
  29. synkro/generation/tool_responses.py +625 -0
  30. synkro/generation/tool_simulator.py +114 -0
  31. synkro/interactive/__init__.py +16 -0
  32. synkro/interactive/hitl_session.py +205 -0
  33. synkro/interactive/intent_classifier.py +94 -0
  34. synkro/interactive/logic_map_editor.py +176 -0
  35. synkro/interactive/rich_ui.py +459 -0
  36. synkro/interactive/scenario_editor.py +198 -0
  37. synkro/llm/__init__.py +7 -0
  38. synkro/llm/client.py +309 -0
  39. synkro/llm/rate_limits.py +99 -0
  40. synkro/models/__init__.py +50 -0
  41. synkro/models/anthropic.py +26 -0
  42. synkro/models/google.py +19 -0
  43. synkro/models/local.py +104 -0
  44. synkro/models/openai.py +31 -0
  45. synkro/modes/__init__.py +13 -0
  46. synkro/modes/config.py +66 -0
  47. synkro/modes/conversation.py +35 -0
  48. synkro/modes/tool_call.py +18 -0
  49. synkro/parsers.py +442 -0
  50. synkro/pipeline/__init__.py +20 -0
  51. synkro/pipeline/phases.py +592 -0
  52. synkro/pipeline/runner.py +769 -0
  53. synkro/pipelines.py +136 -0
  54. synkro/prompts/__init__.py +57 -0
  55. synkro/prompts/base.py +167 -0
  56. synkro/prompts/golden_templates.py +533 -0
  57. synkro/prompts/interactive_templates.py +198 -0
  58. synkro/prompts/multiturn_templates.py +156 -0
  59. synkro/prompts/templates.py +281 -0
  60. synkro/prompts/tool_templates.py +318 -0
  61. synkro/quality/__init__.py +14 -0
  62. synkro/quality/golden_refiner.py +163 -0
  63. synkro/quality/grader.py +153 -0
  64. synkro/quality/multiturn_grader.py +150 -0
  65. synkro/quality/refiner.py +137 -0
  66. synkro/quality/tool_grader.py +126 -0
  67. synkro/quality/tool_refiner.py +128 -0
  68. synkro/quality/verifier.py +228 -0
  69. synkro/reporting.py +464 -0
  70. synkro/schemas.py +521 -0
  71. synkro/types/__init__.py +43 -0
  72. synkro/types/core.py +153 -0
  73. synkro/types/dataset_type.py +33 -0
  74. synkro/types/logic_map.py +348 -0
  75. synkro/types/tool.py +94 -0
  76. synkro-0.4.36.data/data/examples/__init__.py +148 -0
  77. synkro-0.4.36.dist-info/METADATA +507 -0
  78. synkro-0.4.36.dist-info/RECORD +81 -0
  79. synkro-0.4.36.dist-info/WHEEL +4 -0
  80. synkro-0.4.36.dist-info/entry_points.txt +2 -0
  81. synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
synkro/pipelines.py ADDED
@@ -0,0 +1,136 @@
1
+ """Pipeline creation utilities.
2
+
3
+ Usage:
4
+ from synkro.pipelines import create_pipeline
5
+ from synkro.models.openai import OpenAI
6
+ from synkro.types import DatasetType
7
+
8
+ pipeline = create_pipeline(
9
+ model=OpenAI.GPT_5_MINI,
10
+ dataset_type=DatasetType.CONVERSATION,
11
+ )
12
+ dataset = pipeline.generate("policy text", traces=50)
13
+
14
+ # Tool calling pipeline
15
+ from synkro import ToolDefinition
16
+
17
+ web_search = ToolDefinition(
18
+ name="web_search",
19
+ description="Search the web",
20
+ parameters={"type": "object", "properties": {"query": {"type": "string"}}}
21
+ )
22
+
23
+ pipeline = create_pipeline(
24
+ dataset_type=DatasetType.TOOL_CALL,
25
+ tools=[web_search],
26
+ )
27
+ dataset = pipeline.generate("Search guidelines", traces=50)
28
+ """
29
+
30
+ from typing import TYPE_CHECKING
31
+
32
+ from synkro.generation.generator import Generator
33
+ from synkro.types import DatasetType
34
+ from synkro.models import Model, OpenAI
35
+ from synkro.reporting import ProgressReporter
36
+
37
+ if TYPE_CHECKING:
38
+ from synkro.types.tool import ToolDefinition
39
+
40
+
41
+ def create_pipeline(
42
+ model: Model = OpenAI.GPT_5_MINI,
43
+ dataset_type: DatasetType = DatasetType.CONVERSATION,
44
+ grading_model: Model = OpenAI.GPT_52,
45
+ max_iterations: int = 3,
46
+ skip_grading: bool = False,
47
+ reporter: ProgressReporter | None = None,
48
+ tools: list["ToolDefinition"] | None = None,
49
+ turns: int | str = "auto",
50
+ checkpoint_dir: str | None = None,
51
+ enable_hitl: bool = True,
52
+ base_url: str | None = None,
53
+ thinking: bool = False,
54
+ temperature: float = 0.7,
55
+ ) -> Generator:
56
+ """
57
+ Create a pipeline for generating training datasets.
58
+
59
+ Args:
60
+ model: Model enum for generation (default: OpenAI.GPT_5_MINI)
61
+ dataset_type: Type of dataset - CONVERSATION, INSTRUCTION, EVALUATION, or TOOL_CALL (default: CONVERSATION)
62
+ grading_model: Model enum for grading (default: OpenAI.GPT_52)
63
+ max_iterations: Max refinement iterations per trace (default: 3)
64
+ skip_grading: Skip grading phase for faster generation (default: False)
65
+ reporter: Progress reporter (default: RichReporter for console output)
66
+ tools: List of ToolDefinition for TOOL_CALL dataset type
67
+ turns: Conversation turns per trace. Use int for fixed turns, or "auto"
68
+ for policy complexity-driven turns (Simple=1-2, Conditional=3, Complex=5+)
69
+ checkpoint_dir: Directory for checkpoints. Enables resumable generation.
70
+ enable_hitl: Enable Human-in-the-Loop Logic Map editing (default: False)
71
+ base_url: Optional API base URL for local LLM providers (Ollama, vLLM, etc.)
72
+ thinking: Enable thinking mode with <think> tags in responses (default: False).
73
+ When enabled, assistant responses will include reasoning wrapped in
74
+ <think>...</think> tags, compatible with Qwen3 and DeepSeek-R1 formats.
75
+ temperature: Sampling temperature for generation (0.0-2.0, default: 0.7).
76
+ Lower values (0.1-0.3) produce more deterministic outputs for eval datasets.
77
+ Higher values (0.7-1.0) produce more diverse outputs for training data.
78
+
79
+ Returns:
80
+ Generator instance ready to use
81
+
82
+ Example:
83
+ >>> from synkro.pipelines import create_pipeline
84
+ >>> from synkro.models.openai import OpenAI
85
+ >>> from synkro.types import DatasetType
86
+ >>>
87
+ >>> pipeline = create_pipeline(
88
+ ... model=OpenAI.GPT_5_MINI,
89
+ ... dataset_type=DatasetType.CONVERSATION,
90
+ ... )
91
+ >>> dataset = pipeline.generate("policy text", traces=50)
92
+ >>> dataset.save("training.jsonl")
93
+
94
+ >>> # Multi-turn with fixed 3 turns
95
+ >>> pipeline = create_pipeline(turns=3)
96
+ >>> dataset = pipeline.generate("policy text", traces=50)
97
+
98
+ >>> # Silent mode for embedding
99
+ >>> from synkro.reporting import SilentReporter
100
+ >>> pipeline = create_pipeline(reporter=SilentReporter())
101
+
102
+ >>> # Interactive Logic Map editing
103
+ >>> pipeline = create_pipeline(enable_hitl=True)
104
+ >>> dataset = pipeline.generate("policy text", traces=50)
105
+
106
+ >>> # Tool calling dataset
107
+ >>> from synkro import ToolDefinition
108
+ >>> search_tool = ToolDefinition(
109
+ ... name="web_search",
110
+ ... description="Search the web for information",
111
+ ... parameters={"type": "object", "properties": {"query": {"type": "string"}}}
112
+ ... )
113
+ >>> pipeline = create_pipeline(
114
+ ... dataset_type=DatasetType.TOOL_CALL,
115
+ ... tools=[search_tool],
116
+ ... )
117
+ >>> dataset = pipeline.generate("Search guidelines", traces=50)
118
+ """
119
+ return Generator(
120
+ dataset_type=dataset_type,
121
+ generation_model=model,
122
+ grading_model=grading_model,
123
+ max_iterations=max_iterations,
124
+ skip_grading=skip_grading,
125
+ reporter=reporter,
126
+ tools=tools,
127
+ turns=turns,
128
+ checkpoint_dir=checkpoint_dir,
129
+ enable_hitl=enable_hitl,
130
+ base_url=base_url,
131
+ thinking=thinking,
132
+ temperature=temperature,
133
+ )
134
+
135
+
136
+ __all__ = ["create_pipeline"]
@@ -0,0 +1,57 @@
1
+ """Prompt templates and customizable prompt classes for Synkro."""
2
+
3
+ from synkro.prompts.base import (
4
+ SystemPrompt,
5
+ ScenarioPrompt,
6
+ ResponsePrompt,
7
+ GradePrompt,
8
+ RefinePrompt,
9
+ PlanPrompt,
10
+ )
11
+ from synkro.prompts.templates import (
12
+ SYSTEM_PROMPT,
13
+ SCENARIO_GENERATOR_PROMPT,
14
+ CATEGORY_SCENARIO_PROMPT,
15
+ POLICY_PLANNING_PROMPT,
16
+ POLICY_COMPLEXITY_PROMPT,
17
+ BATCHED_RESPONSE_PROMPT,
18
+ BATCHED_GRADER_PROMPT,
19
+ BATCHED_REFINER_PROMPT,
20
+ SINGLE_RESPONSE_PROMPT,
21
+ SINGLE_GRADE_PROMPT,
22
+ )
23
+ from synkro.prompts.multiturn_templates import (
24
+ FOLLOW_UP_GENERATION_PROMPT,
25
+ MULTI_TURN_RESPONSE_PROMPT,
26
+ MULTI_TURN_INITIAL_PROMPT,
27
+ MULTI_TURN_GRADE_PROMPT,
28
+ MULTI_TURN_REFINE_PROMPT,
29
+ )
30
+
31
+ __all__ = [
32
+ # Prompt classes
33
+ "SystemPrompt",
34
+ "ScenarioPrompt",
35
+ "ResponsePrompt",
36
+ "GradePrompt",
37
+ "RefinePrompt",
38
+ "PlanPrompt",
39
+ # Raw templates
40
+ "SYSTEM_PROMPT",
41
+ "SCENARIO_GENERATOR_PROMPT",
42
+ "CATEGORY_SCENARIO_PROMPT",
43
+ "POLICY_PLANNING_PROMPT",
44
+ "POLICY_COMPLEXITY_PROMPT",
45
+ "BATCHED_RESPONSE_PROMPT",
46
+ "BATCHED_GRADER_PROMPT",
47
+ "BATCHED_REFINER_PROMPT",
48
+ "SINGLE_RESPONSE_PROMPT",
49
+ "SINGLE_GRADE_PROMPT",
50
+ # Multi-turn templates
51
+ "FOLLOW_UP_GENERATION_PROMPT",
52
+ "MULTI_TURN_RESPONSE_PROMPT",
53
+ "MULTI_TURN_INITIAL_PROMPT",
54
+ "MULTI_TURN_GRADE_PROMPT",
55
+ "MULTI_TURN_REFINE_PROMPT",
56
+ ]
57
+
synkro/prompts/base.py ADDED
@@ -0,0 +1,167 @@
1
+ """Customizable prompt classes for building your own generation pipelines."""
2
+
3
+ from pydantic import BaseModel, Field
4
+ from synkro.prompts.templates import (
5
+ SYSTEM_PROMPT,
6
+ SCENARIO_GENERATOR_PROMPT,
7
+ BATCHED_RESPONSE_PROMPT,
8
+ BATCHED_GRADER_PROMPT,
9
+ BATCHED_REFINER_PROMPT,
10
+ POLICY_PLANNING_PROMPT,
11
+ )
12
+
13
+
14
+ class SystemPrompt(BaseModel):
15
+ """The system prompt that defines the expert's role and behavior."""
16
+
17
+ template: str = Field(default=SYSTEM_PROMPT)
18
+
19
+ def render(self, **kwargs) -> str:
20
+ """Render the prompt with any custom variables."""
21
+ return self.template.format(**kwargs) if kwargs else self.template
22
+
23
+
24
+ class ScenarioPrompt(BaseModel):
25
+ """Prompt for generating scenarios from policy documents."""
26
+
27
+ template: str = Field(default=SCENARIO_GENERATOR_PROMPT)
28
+
29
+ def render(self, policy: str, count: int, category: str | None = None) -> str:
30
+ """
31
+ Render the scenario generation prompt.
32
+
33
+ Args:
34
+ policy: The policy text
35
+ count: Number of scenarios to generate
36
+ category: Optional category to focus scenarios on
37
+ """
38
+ prompt = f"{self.template}\n\nPOLICY:\n{policy}\n\nGenerate exactly {count} scenarios."
39
+ if category:
40
+ prompt += f"\n\nFocus on scenarios related to: {category}"
41
+ return prompt
42
+
43
+
44
+ class ResponsePrompt(BaseModel):
45
+ """Prompt for generating responses to scenarios."""
46
+
47
+ template: str = Field(default=BATCHED_RESPONSE_PROMPT)
48
+ system_prompt: str = Field(default=SYSTEM_PROMPT)
49
+
50
+ def render(self, scenarios: list[dict], policy: str) -> str:
51
+ """
52
+ Render the response generation prompt.
53
+
54
+ Args:
55
+ scenarios: List of scenario dicts with 'description' and 'context'
56
+ policy: The policy text for grounding responses
57
+ """
58
+ scenarios_text = "\n\n".join(
59
+ f"SCENARIO {i}:\n{s['description']}\n\nCONTEXT:\n{s['context']}"
60
+ for i, s in enumerate(scenarios)
61
+ )
62
+
63
+ return f"""{self.template}
64
+
65
+ SYSTEM PROMPT TO USE:
66
+ {self.system_prompt}
67
+
68
+ POLICY:
69
+ {policy}
70
+
71
+ SCENARIOS:
72
+ {scenarios_text}"""
73
+
74
+
75
+ class GradePrompt(BaseModel):
76
+ """Prompt for grading response quality."""
77
+
78
+ template: str = Field(default=BATCHED_GRADER_PROMPT)
79
+
80
+ def render(self, responses: list[dict], policy: str) -> str:
81
+ """
82
+ Render the grading prompt.
83
+
84
+ Args:
85
+ responses: List of response dicts with messages
86
+ policy: The policy text to grade against
87
+ """
88
+ responses_text = "\n\n".join(
89
+ f"RESPONSE {i}:\n{r.get('assistant_message', r.get('messages', [{}])[-1].get('content', ''))}"
90
+ for i, r in enumerate(responses)
91
+ )
92
+
93
+ return f"""{self.template}
94
+
95
+ POLICY:
96
+ {policy}
97
+
98
+ RESPONSES TO GRADE:
99
+ {responses_text}"""
100
+
101
+
102
+ class RefinePrompt(BaseModel):
103
+ """Prompt for refining failed responses."""
104
+
105
+ template: str = Field(default=BATCHED_REFINER_PROMPT)
106
+ system_prompt: str = Field(default=SYSTEM_PROMPT)
107
+
108
+ def render(self, failed_items: list[dict], policy: str) -> str:
109
+ """
110
+ Render the refinement prompt.
111
+
112
+ Args:
113
+ failed_items: List of dicts with 'scenario', 'response', and 'feedback'
114
+ policy: The policy text
115
+ """
116
+ items_text = "\n\n".join(
117
+ f"""SCENARIO {i}:
118
+ {item['scenario']}
119
+
120
+ ORIGINAL RESPONSE:
121
+ {item['response']}
122
+
123
+ GRADER FEEDBACK:
124
+ - Policy Violations: {item.get('policy_violations', [])}
125
+ - Missing Citations: {item.get('missing_citations', [])}
126
+ - Incomplete Reasoning: {item.get('incomplete_reasoning', [])}
127
+ - Vague Recommendations: {item.get('vague_recommendations', [])}
128
+ - Summary: {item.get('feedback', '')}"""
129
+ for i, item in enumerate(failed_items)
130
+ )
131
+
132
+ return f"""{self.template}
133
+
134
+ SYSTEM PROMPT TO USE:
135
+ {self.system_prompt}
136
+
137
+ POLICY:
138
+ {policy}
139
+
140
+ ITEMS TO REFINE:
141
+ {items_text}"""
142
+
143
+
144
+ class PlanPrompt(BaseModel):
145
+ """Prompt for planning generation categories."""
146
+
147
+ template: str = Field(default=POLICY_PLANNING_PROMPT)
148
+
149
+ def render(self, policy: str, target_traces: int) -> str:
150
+ """
151
+ Render the planning prompt.
152
+
153
+ Args:
154
+ policy: The policy text to analyze
155
+ target_traces: Target number of traces to generate
156
+ """
157
+ return f"""{self.template}
158
+
159
+ POLICY/DOMAIN SPECIFICATION:
160
+ {policy}
161
+
162
+ TARGET TRACES: {target_traces}
163
+
164
+ Respond with a JSON object containing:
165
+ - "categories": array of category objects with "name", "description", and "traces"
166
+ - "reasoning": explanation of your analysis and category choices"""
167
+