synkro 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synkro might be problematic. Click here for more details.

Files changed (58) hide show
  1. synkro/__init__.py +165 -0
  2. synkro/cli.py +120 -0
  3. synkro/core/__init__.py +7 -0
  4. synkro/core/dataset.py +233 -0
  5. synkro/core/policy.py +337 -0
  6. synkro/errors.py +178 -0
  7. synkro/examples/__init__.py +148 -0
  8. synkro/factory.py +160 -0
  9. synkro/formatters/__init__.py +12 -0
  10. synkro/formatters/qa.py +85 -0
  11. synkro/formatters/sft.py +90 -0
  12. synkro/formatters/tool_call.py +127 -0
  13. synkro/generation/__init__.py +9 -0
  14. synkro/generation/generator.py +163 -0
  15. synkro/generation/planner.py +87 -0
  16. synkro/generation/responses.py +160 -0
  17. synkro/generation/scenarios.py +90 -0
  18. synkro/generation/tool_responses.py +370 -0
  19. synkro/generation/tool_simulator.py +114 -0
  20. synkro/llm/__init__.py +7 -0
  21. synkro/llm/client.py +235 -0
  22. synkro/llm/rate_limits.py +95 -0
  23. synkro/models/__init__.py +43 -0
  24. synkro/models/anthropic.py +26 -0
  25. synkro/models/google.py +19 -0
  26. synkro/models/openai.py +31 -0
  27. synkro/modes/__init__.py +15 -0
  28. synkro/modes/config.py +66 -0
  29. synkro/modes/qa.py +18 -0
  30. synkro/modes/sft.py +18 -0
  31. synkro/modes/tool_call.py +18 -0
  32. synkro/parsers.py +442 -0
  33. synkro/pipeline/__init__.py +20 -0
  34. synkro/pipeline/phases.py +237 -0
  35. synkro/pipeline/runner.py +198 -0
  36. synkro/pipelines.py +105 -0
  37. synkro/prompts/__init__.py +44 -0
  38. synkro/prompts/base.py +167 -0
  39. synkro/prompts/qa_templates.py +97 -0
  40. synkro/prompts/templates.py +281 -0
  41. synkro/prompts/tool_templates.py +201 -0
  42. synkro/quality/__init__.py +14 -0
  43. synkro/quality/grader.py +130 -0
  44. synkro/quality/refiner.py +137 -0
  45. synkro/quality/tool_grader.py +126 -0
  46. synkro/quality/tool_refiner.py +128 -0
  47. synkro/reporting.py +213 -0
  48. synkro/schemas.py +325 -0
  49. synkro/types/__init__.py +41 -0
  50. synkro/types/core.py +113 -0
  51. synkro/types/dataset_type.py +30 -0
  52. synkro/types/tool.py +94 -0
  53. synkro-0.4.5.data/data/examples/__init__.py +148 -0
  54. synkro-0.4.5.dist-info/METADATA +221 -0
  55. synkro-0.4.5.dist-info/RECORD +58 -0
  56. synkro-0.4.5.dist-info/WHEEL +4 -0
  57. synkro-0.4.5.dist-info/entry_points.txt +2 -0
  58. synkro-0.4.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,114 @@
1
+ """Tool response simulator for training data generation."""
2
+
3
+ import json
4
+ import uuid
5
+ from typing import TYPE_CHECKING
6
+
7
+ from synkro.prompts.tool_templates import TOOL_SIMULATION_PROMPT
8
+
9
+ if TYPE_CHECKING:
10
+ from synkro.llm.client import LLM
11
+ from synkro.types.tool import ToolDefinition, ToolCall
12
+
13
+
14
+ class ToolSimulator:
15
+ """
16
+ Simulates tool responses for training data generation.
17
+
18
+ Uses an LLM to generate realistic, contextual tool responses
19
+ based on tool definitions and call arguments.
20
+
21
+ Example:
22
+ >>> from synkro.types.tool import ToolDefinition, ToolCall, ToolFunction
23
+ >>> simulator = ToolSimulator(tools=[web_search_tool], llm=llm)
24
+ >>> call = ToolCall(
25
+ ... id="call_1",
26
+ ... function=ToolFunction(name="web_search", arguments='{"query": "weather NYC"}')
27
+ ... )
28
+ >>> response = await simulator.simulate(call)
29
+ >>> print(response)
30
+ "NYC: 72°F, sunny with a high of 75°F expected"
31
+ """
32
+
33
+ def __init__(self, tools: list["ToolDefinition"], llm: "LLM"):
34
+ """
35
+ Initialize the simulator.
36
+
37
+ Args:
38
+ tools: List of available tool definitions
39
+ llm: LLM client for generating responses
40
+ """
41
+ self.tools = {t.name: t for t in tools}
42
+ self.llm = llm
43
+
44
+ async def simulate(self, tool_call: "ToolCall") -> str:
45
+ """
46
+ Simulate a tool response for the given call.
47
+
48
+ Args:
49
+ tool_call: The tool call to simulate
50
+
51
+ Returns:
52
+ Simulated tool response content
53
+ """
54
+ tool_name = tool_call.function.name
55
+
56
+ if tool_name not in self.tools:
57
+ return json.dumps({"error": f"Unknown tool: {tool_name}"})
58
+
59
+ tool = self.tools[tool_name]
60
+
61
+ # Format mock responses for the prompt
62
+ mock_responses = "\n".join(
63
+ f"- {r}" for r in tool.mock_responses
64
+ ) if tool.mock_responses else "No example responses provided"
65
+
66
+ prompt = TOOL_SIMULATION_PROMPT.format(
67
+ TOOL_NAME=tool.name,
68
+ TOOL_DESCRIPTION=tool.description,
69
+ TOOL_PARAMETERS=json.dumps(tool.parameters, indent=2),
70
+ ARGUMENTS=tool_call.function.arguments,
71
+ MOCK_RESPONSES=mock_responses,
72
+ )
73
+
74
+ response = await self.llm.generate(prompt)
75
+ return response.strip()
76
+
77
+ async def simulate_batch(self, tool_calls: list["ToolCall"]) -> list[str]:
78
+ """
79
+ Simulate responses for multiple tool calls.
80
+
81
+ Args:
82
+ tool_calls: List of tool calls to simulate
83
+
84
+ Returns:
85
+ List of simulated responses in order
86
+ """
87
+ import asyncio
88
+ return await asyncio.gather(*[self.simulate(tc) for tc in tool_calls])
89
+
90
+ def generate_call_id(self) -> str:
91
+ """Generate a unique tool call ID."""
92
+ return f"call_{uuid.uuid4().hex[:12]}"
93
+
94
+ def get_tools_description(self) -> str:
95
+ """
96
+ Get a formatted description of all available tools.
97
+
98
+ Returns:
99
+ Formatted string describing all tools
100
+ """
101
+ descriptions = []
102
+ for tool in self.tools.values():
103
+ descriptions.append(tool.to_system_prompt())
104
+ return "\n\n".join(descriptions)
105
+
106
+ def get_tools_json(self) -> list[dict]:
107
+ """
108
+ Get tools in OpenAI function format.
109
+
110
+ Returns:
111
+ List of tool definitions in OpenAI format
112
+ """
113
+ return [tool.to_openai_format() for tool in self.tools.values()]
114
+
synkro/llm/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """LLM client wrapper for multiple providers via LiteLLM."""
2
+
3
+ from synkro.llm.client import LLM
4
+ from synkro.llm.rate_limits import auto_workers, get_provider
5
+
6
+ __all__ = ["LLM", "auto_workers", "get_provider"]
7
+
synkro/llm/client.py ADDED
@@ -0,0 +1,235 @@
1
+ """Type-safe LLM wrapper using LiteLLM."""
2
+
3
+ from typing import TypeVar, Type, overload
4
+
5
+ import litellm
6
+ from litellm import acompletion, supports_response_schema
7
+ from pydantic import BaseModel
8
+
9
+ # Configure litellm
10
+ litellm.suppress_debug_info = True
11
+ litellm.enable_json_schema_validation=True
12
+
13
+ from synkro.models import OpenAI, Model, get_model_string
14
+
15
+
16
+ T = TypeVar("T", bound=BaseModel)
17
+
18
+
19
+ class LLM:
20
+ """
21
+ Type-safe LLM wrapper using LiteLLM for universal provider support.
22
+
23
+ Supports structured outputs via native JSON mode for reliable responses.
24
+
25
+ Supported providers: OpenAI, Anthropic, Google (Gemini)
26
+
27
+ Examples:
28
+ >>> from synkro import LLM, OpenAI, Anthropic, Google
29
+
30
+ # Use OpenAI
31
+ >>> llm = LLM(model=OpenAI.GPT_4O_MINI)
32
+ >>> response = await llm.generate("Hello!")
33
+
34
+ # Use Anthropic
35
+ >>> llm = LLM(model=Anthropic.CLAUDE_35_SONNET)
36
+
37
+ # Use Google Gemini
38
+ >>> llm = LLM(model=Google.GEMINI_25_FLASH)
39
+
40
+ # Structured output
41
+ >>> class Output(BaseModel):
42
+ ... answer: str
43
+ ... confidence: float
44
+ >>> result = await llm.generate_structured("What is 2+2?", Output)
45
+ >>> result.answer
46
+ '4'
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ model: Model = OpenAI.GPT_4O_MINI,
52
+ temperature: float = 0.7,
53
+ max_tokens: int | None = None,
54
+ api_key: str | None = None,
55
+ ):
56
+ """
57
+ Initialize the LLM client.
58
+
59
+ Args:
60
+ model: Model to use (enum or string)
61
+ temperature: Sampling temperature (0.0-2.0)
62
+ max_tokens: Maximum tokens to generate (default: None = model's max)
63
+ api_key: Optional API key override
64
+ """
65
+ self.model = get_model_string(model)
66
+ self.temperature = temperature
67
+ self.max_tokens = max_tokens
68
+ self._api_key = api_key
69
+
70
+ async def generate(self, prompt: str, system: str | None = None) -> str:
71
+ """
72
+ Generate a text response.
73
+
74
+ Args:
75
+ prompt: The user prompt
76
+ system: Optional system prompt
77
+
78
+ Returns:
79
+ Generated text response
80
+ """
81
+ messages = []
82
+ if system:
83
+ messages.append({"role": "system", "content": system})
84
+ messages.append({"role": "user", "content": prompt})
85
+
86
+ kwargs = {
87
+ "model": self.model,
88
+ "messages": messages,
89
+ "temperature": self.temperature,
90
+ "api_key": self._api_key,
91
+ }
92
+ if self.max_tokens is not None:
93
+ kwargs["max_tokens"] = self.max_tokens
94
+
95
+ response = await acompletion(**kwargs)
96
+ return response.choices[0].message.content
97
+
98
+ async def generate_batch(
99
+ self, prompts: list[str], system: str | None = None
100
+ ) -> list[str]:
101
+ """
102
+ Generate responses for multiple prompts in parallel.
103
+
104
+ Args:
105
+ prompts: List of user prompts
106
+ system: Optional system prompt for all
107
+
108
+ Returns:
109
+ List of generated responses
110
+ """
111
+ import asyncio
112
+
113
+ tasks = [self.generate(p, system) for p in prompts]
114
+ return await asyncio.gather(*tasks)
115
+
116
+ @overload
117
+ async def generate_structured(
118
+ self,
119
+ prompt: str,
120
+ response_model: Type[T],
121
+ system: str | None = None,
122
+ ) -> T: ...
123
+
124
+ @overload
125
+ async def generate_structured(
126
+ self,
127
+ prompt: str,
128
+ response_model: Type[list[T]],
129
+ system: str | None = None,
130
+ ) -> list[T]: ...
131
+
132
+ async def generate_structured(
133
+ self,
134
+ prompt: str,
135
+ response_model: Type[T] | Type[list[T]],
136
+ system: str | None = None,
137
+ ) -> T | list[T]:
138
+ """
139
+ Generate a structured response matching a Pydantic model.
140
+
141
+ Uses LiteLLM's native JSON mode with response_format for
142
+ reliable structured outputs.
143
+
144
+ Args:
145
+ prompt: The user prompt
146
+ response_model: Pydantic model class for the response
147
+ system: Optional system prompt
148
+
149
+ Returns:
150
+ Parsed response matching the model
151
+
152
+ Example:
153
+ >>> class Analysis(BaseModel):
154
+ ... sentiment: str
155
+ ... score: float
156
+ >>> result = await llm.generate_structured(
157
+ ... "Analyze: I love this product!",
158
+ ... Analysis
159
+ ... )
160
+ >>> result.sentiment
161
+ 'positive'
162
+ """
163
+ # Check if model supports structured outputs
164
+ if not supports_response_schema(model=self.model, custom_llm_provider=None):
165
+ raise ValueError(
166
+ f"Model '{self.model}' does not support structured outputs (response_format). "
167
+ f"Use a model that supports JSON schema like GPT-4o, Gemini 1.5+, or Claude 3.5+."
168
+ )
169
+
170
+ messages = []
171
+ if system:
172
+ messages.append({"role": "system", "content": system})
173
+ messages.append({"role": "user", "content": prompt})
174
+
175
+ # Use LiteLLM's native response_format with Pydantic model
176
+ kwargs = {
177
+ "model": self.model,
178
+ "messages": messages,
179
+ "response_format": response_model,
180
+ "temperature": self.temperature,
181
+ "api_key": self._api_key,
182
+ }
183
+ if self.max_tokens is not None:
184
+ kwargs["max_tokens"] = self.max_tokens
185
+
186
+ response = await acompletion(**kwargs)
187
+ return response_model.model_validate_json(response.choices[0].message.content)
188
+
189
+ async def generate_chat(
190
+ self, messages: list[dict], response_model: Type[T] | None = None
191
+ ) -> str | T:
192
+ """
193
+ Generate a response for a full conversation.
194
+
195
+ Args:
196
+ messages: List of message dicts with 'role' and 'content'
197
+ response_model: Optional Pydantic model for structured output
198
+
199
+ Returns:
200
+ Generated response (string or structured)
201
+ """
202
+ if response_model:
203
+ # Check if model supports structured outputs
204
+ if not supports_response_schema(model=self.model, custom_llm_provider=None):
205
+ raise ValueError(
206
+ f"Model '{self.model}' does not support structured outputs (response_format). "
207
+ f"Use a model that supports JSON schema like GPT-4o, Gemini 1.5+, or Claude 3.5+."
208
+ )
209
+
210
+ # Use LiteLLM's native response_format with Pydantic model
211
+ kwargs = {
212
+ "model": self.model,
213
+ "messages": messages,
214
+ "response_format": response_model,
215
+ "temperature": self.temperature,
216
+ "api_key": self._api_key,
217
+ }
218
+ if self.max_tokens is not None:
219
+ kwargs["max_tokens"] = self.max_tokens
220
+
221
+ response = await acompletion(**kwargs)
222
+ return response_model.model_validate_json(response.choices[0].message.content)
223
+
224
+ kwargs = {
225
+ "model": self.model,
226
+ "messages": messages,
227
+ "temperature": self.temperature,
228
+ "api_key": self._api_key,
229
+ }
230
+ if self.max_tokens is not None:
231
+ kwargs["max_tokens"] = self.max_tokens
232
+
233
+ response = await acompletion(**kwargs)
234
+ return response.choices[0].message.content
235
+
@@ -0,0 +1,95 @@
1
+ """Automatic worker scaling based on provider rate limits."""
2
+
3
+ # Known rate limits per provider (requests per minute)
4
+ PROVIDER_RATE_LIMITS = {
5
+ "openai": 60, # Tier 1 default, scales with tier
6
+ "anthropic": 60, # Standard limit
7
+ "google": 60, # Gemini API
8
+ "gemini": 60, # Gemini API (alternative prefix)
9
+ }
10
+
11
+ # Target 80% of rate limit to avoid hitting caps
12
+ UTILIZATION_TARGET = 0.8
13
+
14
+ # Default workers per provider (pre-computed for convenience)
15
+ DEFAULT_WORKERS = {
16
+ "openai": 15, # ~60 RPM / 3 calls = 20, use 15 to be safe
17
+ "anthropic": 10, # ~60 RPM, more conservative
18
+ "google": 15, # Gemini
19
+ "gemini": 15, # Gemini
20
+ }
21
+
22
+
23
+ def get_provider(model: str) -> str:
24
+ """
25
+ Extract provider name from model string.
26
+
27
+ Args:
28
+ model: Model string like "gpt-4o" or "ollama/llama3.1:8b"
29
+
30
+ Returns:
31
+ Provider name
32
+ """
33
+ # Check for explicit prefix
34
+ if "/" in model:
35
+ return model.split("/")[0]
36
+
37
+ # Infer from model name
38
+ if model.startswith("gpt") or model.startswith("o1"):
39
+ return "openai"
40
+ if model.startswith("claude"):
41
+ return "anthropic"
42
+ if model.startswith("gemini"):
43
+ return "google"
44
+
45
+ return "openai" # Default
46
+
47
+
48
+ def auto_workers(model: str) -> int:
49
+ """
50
+ Determine optimal worker count based on model's provider.
51
+
52
+ This calculates a safe default that won't hit rate limits,
53
+ accounting for the fact that each trace needs ~3 LLM calls
54
+ (generate, grade, maybe refine).
55
+
56
+ Args:
57
+ model: Model string
58
+
59
+ Returns:
60
+ Recommended worker count
61
+
62
+ Example:
63
+ >>> auto_workers("gpt-4o")
64
+ 15
65
+ >>> auto_workers("gemini/gemini-2.5-flash")
66
+ 15
67
+ """
68
+ provider = get_provider(model)
69
+ rpm = PROVIDER_RATE_LIMITS.get(provider, 60)
70
+
71
+ # Workers = RPM * utilization / avg_calls_per_trace
72
+ # Each trace needs ~3 calls (generate, grade, maybe refine)
73
+ avg_calls_per_trace = 3
74
+
75
+ workers = int((rpm * UTILIZATION_TARGET) / avg_calls_per_trace)
76
+
77
+ # Clamp to reasonable bounds
78
+ return max(5, min(workers, 100))
79
+
80
+
81
+ def get_default_workers(model: str) -> int:
82
+ """
83
+ Quick lookup for worker count.
84
+
85
+ Uses pre-computed defaults for common providers.
86
+
87
+ Args:
88
+ model: Model string
89
+
90
+ Returns:
91
+ Default worker count for the provider
92
+ """
93
+ provider = get_provider(model)
94
+ return DEFAULT_WORKERS.get(provider, 10)
95
+
@@ -0,0 +1,43 @@
1
+ """Model enums for supported LLM providers.
2
+
3
+ Supported providers:
4
+ - OpenAI (GPT-4o, GPT-4o-mini)
5
+ - Anthropic (Claude 3.5 Sonnet/Haiku)
6
+ - Google (Gemini 2.5 Flash/Pro)
7
+
8
+ Usage:
9
+ # Per-provider import (recommended)
10
+ from synkro.models.openai import OpenAI
11
+ from synkro.models.anthropic import Anthropic
12
+ from synkro.models.google import Google
13
+
14
+ # Convenience import (all at once)
15
+ from synkro.models import OpenAI, Anthropic, Google
16
+ """
17
+
18
+ from enum import Enum
19
+ from typing import Union
20
+
21
+ from synkro.models.openai import OpenAI
22
+ from synkro.models.anthropic import Anthropic
23
+ from synkro.models.google import Google
24
+
25
+ # Union type for any model
26
+ Model = Union[OpenAI, Anthropic, Google, str]
27
+
28
+
29
+ def get_model_string(model: Model) -> str:
30
+ """Convert a model enum or string to its string value."""
31
+ if isinstance(model, Enum):
32
+ return model.value
33
+ return model
34
+
35
+
36
+ __all__ = [
37
+ "OpenAI",
38
+ "Anthropic",
39
+ "Google",
40
+ "Model",
41
+ "get_model_string",
42
+ ]
43
+
@@ -0,0 +1,26 @@
1
+ """Anthropic Claude models."""
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class Anthropic(str, Enum):
7
+ """Anthropic Claude models."""
8
+
9
+ # Claude 4.5 (latest)
10
+ CLAUDE_45_OPUS = "claude-opus-4-5-20250601"
11
+ """Premium: State-of-the-art for coding and autonomous agents"""
12
+
13
+ CLAUDE_45_SONNET = "claude-sonnet-4-5-20250601"
14
+ """Standard: Default model for most users, faster and more context-aware"""
15
+
16
+ CLAUDE_45_HAIKU = "claude-haiku-4-5-20250601"
17
+ """Light: High-speed, cost-effective, matches Claude 3 Opus intelligence"""
18
+
19
+ # Claude 4 (previous gen)
20
+ CLAUDE_4_SONNET = "claude-sonnet-4-20250514"
21
+ CLAUDE_4_OPUS = "claude-opus-4-20250514"
22
+
23
+ # Claude 3.5 (legacy)
24
+ CLAUDE_35_SONNET = "claude-3-5-sonnet-20241022"
25
+ CLAUDE_35_HAIKU = "claude-3-5-haiku-20241022"
26
+
@@ -0,0 +1,19 @@
1
+ """Google Gemini models.
2
+
3
+ Updated based on: https://ai.google.dev/gemini-api/docs/models#model-versions
4
+ """
5
+
6
+ from enum import Enum
7
+
8
+
9
+ class Google(str, Enum):
10
+ """Google Gemini models."""
11
+
12
+ GEMINI_3_PRO = "gemini/gemini-3-pro"
13
+ GEMINI_3_FLASH = "gemini/gemini-3-flash"
14
+
15
+ GEMINI_25_FLASH = "gemini/gemini-2.5-flash"
16
+ GEMINI_25_PRO = "gemini/gemini-2.5-pro"
17
+
18
+ GEMINI_2_FLASH = "gemini/gemini-2.0-flash"
19
+ GEMINI_2_FLASH_LITE = "gemini/gemini-2.0-flash-lite"
@@ -0,0 +1,31 @@
1
+ """OpenAI models."""
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class OpenAI(str, Enum):
7
+ """OpenAI models."""
8
+
9
+ # GPT-5 series (latest)
10
+ GPT_52 = "gpt-5.2"
11
+ """Flagship: High-speed, human-like dialogue, agentic tool-calling"""
12
+
13
+ GPT_5_MINI = "gpt-5-mini"
14
+ """Mid-tier: Balanced cost and intelligence, primary workhorse"""
15
+
16
+ GPT_5_NANO = "gpt-5-nano"
17
+ """Edge: Extremely low latency, high-volume basic tasks"""
18
+
19
+ # GPT-4 series (legacy)
20
+ GPT_41 = "gpt-4.1"
21
+ """Legacy flagship: Smartest non-reasoning model from previous gen"""
22
+
23
+ GPT_4O = "gpt-4o"
24
+ GPT_4O_MINI = "gpt-4o-mini"
25
+
26
+ # Reasoning models
27
+ O3 = "o3"
28
+ O3_MINI = "o3-mini"
29
+ O1 = "o1"
30
+ O1_MINI = "o1-mini"
31
+
@@ -0,0 +1,15 @@
1
+ """Mode configurations for different dataset types."""
2
+
3
+ from synkro.modes.config import ModeConfig, get_mode_config
4
+ from synkro.modes.qa import QA_CONFIG
5
+ from synkro.modes.sft import SFT_CONFIG
6
+ from synkro.modes.tool_call import TOOL_CALL_CONFIG
7
+
8
+ __all__ = [
9
+ "ModeConfig",
10
+ "get_mode_config",
11
+ "QA_CONFIG",
12
+ "SFT_CONFIG",
13
+ "TOOL_CALL_CONFIG",
14
+ ]
15
+
synkro/modes/config.py ADDED
@@ -0,0 +1,66 @@
1
+ """Mode configuration that bundles prompts, schema, and formatter per dataset type."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import TYPE_CHECKING, Type
5
+
6
+ if TYPE_CHECKING:
7
+ from synkro.types.dataset_type import DatasetType
8
+
9
+
10
+ @dataclass
11
+ class ModeConfig:
12
+ """
13
+ Configuration bundle for a dataset type.
14
+
15
+ Defines all the prompts, schemas, and formatters needed
16
+ for generating a specific type of dataset.
17
+ """
18
+
19
+ # Prompts
20
+ scenario_prompt: str
21
+ """Prompt for generating scenarios/questions"""
22
+
23
+ response_prompt: str
24
+ """Prompt for generating responses/answers"""
25
+
26
+ grade_prompt: str
27
+ """Prompt for grading quality"""
28
+
29
+ refine_prompt: str
30
+ """Prompt for refining failed responses"""
31
+
32
+ # Output configuration
33
+ output_description: str
34
+ """Human-readable description of output format"""
35
+
36
+
37
+ def get_mode_config(dataset_type: "DatasetType") -> ModeConfig:
38
+ """
39
+ Get the mode configuration for a dataset type.
40
+
41
+ Args:
42
+ dataset_type: The type of dataset to generate
43
+
44
+ Returns:
45
+ ModeConfig with appropriate prompts and settings
46
+
47
+ Example:
48
+ >>> from synkro import DatasetType
49
+ >>> config = get_mode_config(DatasetType.QA)
50
+ """
51
+ from synkro.types.dataset_type import DatasetType
52
+ from synkro.modes.qa import QA_CONFIG
53
+ from synkro.modes.sft import SFT_CONFIG
54
+ from synkro.modes.tool_call import TOOL_CALL_CONFIG
55
+
56
+ configs = {
57
+ DatasetType.QA: QA_CONFIG,
58
+ DatasetType.SFT: SFT_CONFIG,
59
+ DatasetType.TOOL_CALL: TOOL_CALL_CONFIG,
60
+ }
61
+
62
+ if dataset_type not in configs:
63
+ raise ValueError(f"Unknown dataset type: {dataset_type}")
64
+
65
+ return configs[dataset_type]
66
+
synkro/modes/qa.py ADDED
@@ -0,0 +1,18 @@
1
+ """QA mode configuration."""
2
+
3
+ from synkro.modes.config import ModeConfig
4
+ from synkro.prompts.qa_templates import (
5
+ QA_SCENARIO_PROMPT,
6
+ QA_RESPONSE_PROMPT,
7
+ QA_GRADE_PROMPT,
8
+ QA_REFINE_PROMPT,
9
+ )
10
+
11
+ QA_CONFIG = ModeConfig(
12
+ scenario_prompt=QA_SCENARIO_PROMPT,
13
+ response_prompt=QA_RESPONSE_PROMPT,
14
+ grade_prompt=QA_GRADE_PROMPT,
15
+ refine_prompt=QA_REFINE_PROMPT,
16
+ output_description="Question-Answer pairs: {question, answer, context}",
17
+ )
18
+
synkro/modes/sft.py ADDED
@@ -0,0 +1,18 @@
1
+ """SFT mode configuration."""
2
+
3
+ from synkro.modes.config import ModeConfig
4
+ from synkro.prompts.templates import (
5
+ SCENARIO_GENERATOR_PROMPT,
6
+ SINGLE_RESPONSE_PROMPT,
7
+ SINGLE_GRADE_PROMPT,
8
+ BATCHED_REFINER_PROMPT,
9
+ )
10
+
11
+ SFT_CONFIG = ModeConfig(
12
+ scenario_prompt=SCENARIO_GENERATOR_PROMPT,
13
+ response_prompt=SINGLE_RESPONSE_PROMPT,
14
+ grade_prompt=SINGLE_GRADE_PROMPT,
15
+ refine_prompt=BATCHED_REFINER_PROMPT,
16
+ output_description="Chat messages: {messages: [system, user, assistant]}",
17
+ )
18
+