synkro 0.4.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synkro might be problematic. Click here for more details.
- synkro/__init__.py +331 -0
- synkro/advanced.py +184 -0
- synkro/cli.py +156 -0
- synkro/core/__init__.py +7 -0
- synkro/core/checkpoint.py +250 -0
- synkro/core/dataset.py +432 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +291 -0
- synkro/formatters/__init__.py +18 -0
- synkro/formatters/chatml.py +121 -0
- synkro/formatters/langfuse.py +98 -0
- synkro/formatters/langsmith.py +98 -0
- synkro/formatters/qa.py +112 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/follow_ups.py +134 -0
- synkro/generation/generator.py +314 -0
- synkro/generation/golden_responses.py +269 -0
- synkro/generation/golden_scenarios.py +333 -0
- synkro/generation/golden_tool_responses.py +791 -0
- synkro/generation/logic_extractor.py +126 -0
- synkro/generation/multiturn_responses.py +177 -0
- synkro/generation/planner.py +131 -0
- synkro/generation/responses.py +189 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +625 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/interactive/__init__.py +16 -0
- synkro/interactive/hitl_session.py +205 -0
- synkro/interactive/intent_classifier.py +94 -0
- synkro/interactive/logic_map_editor.py +176 -0
- synkro/interactive/rich_ui.py +459 -0
- synkro/interactive/scenario_editor.py +198 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +309 -0
- synkro/llm/rate_limits.py +99 -0
- synkro/models/__init__.py +50 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/local.py +104 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +13 -0
- synkro/modes/config.py +66 -0
- synkro/modes/conversation.py +35 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +592 -0
- synkro/pipeline/runner.py +769 -0
- synkro/pipelines.py +136 -0
- synkro/prompts/__init__.py +57 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/golden_templates.py +533 -0
- synkro/prompts/interactive_templates.py +198 -0
- synkro/prompts/multiturn_templates.py +156 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +318 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/golden_refiner.py +163 -0
- synkro/quality/grader.py +153 -0
- synkro/quality/multiturn_grader.py +150 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/quality/verifier.py +228 -0
- synkro/reporting.py +464 -0
- synkro/schemas.py +521 -0
- synkro/types/__init__.py +43 -0
- synkro/types/core.py +153 -0
- synkro/types/dataset_type.py +33 -0
- synkro/types/logic_map.py +348 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.36.data/data/examples/__init__.py +148 -0
- synkro-0.4.36.dist-info/METADATA +507 -0
- synkro-0.4.36.dist-info/RECORD +81 -0
- synkro-0.4.36.dist-info/WHEEL +4 -0
- synkro-0.4.36.dist-info/entry_points.txt +2 -0
- synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
synkro/llm/client.py
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""Type-safe LLM wrapper using LiteLLM."""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import TypeVar, Type, overload
|
|
5
|
+
|
|
6
|
+
import litellm
|
|
7
|
+
from litellm import acompletion, supports_response_schema, completion_cost
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
# Configure litellm
|
|
11
|
+
litellm.suppress_debug_info = True
|
|
12
|
+
litellm.enable_json_schema_validation = True
|
|
13
|
+
litellm.drop_params = True # Drop unsupported params (e.g., temperature for gpt-5)
|
|
14
|
+
|
|
15
|
+
# Suppress Pydantic serialization warnings from litellm response types
|
|
16
|
+
warnings.filterwarnings(
|
|
17
|
+
"ignore",
|
|
18
|
+
message=".*Pydantic serializer warnings.*",
|
|
19
|
+
category=UserWarning,
|
|
20
|
+
)
|
|
21
|
+
warnings.filterwarnings(
|
|
22
|
+
"ignore",
|
|
23
|
+
message=".*Expected `Message`.*",
|
|
24
|
+
category=UserWarning,
|
|
25
|
+
)
|
|
26
|
+
warnings.filterwarnings(
|
|
27
|
+
"ignore",
|
|
28
|
+
message=".*Expected `StreamingChoices`.*",
|
|
29
|
+
category=UserWarning,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
from synkro.models import OpenAI, Model, get_model_string, LocalModel
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
T = TypeVar("T", bound=BaseModel)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class LLM:
|
|
39
|
+
"""
|
|
40
|
+
Type-safe LLM wrapper using LiteLLM for universal provider support.
|
|
41
|
+
|
|
42
|
+
Supports structured outputs via native JSON mode for reliable responses.
|
|
43
|
+
|
|
44
|
+
Supported providers: OpenAI, Anthropic, Google (Gemini), Local (Ollama, vLLM)
|
|
45
|
+
|
|
46
|
+
Examples:
|
|
47
|
+
>>> from synkro import LLM, OpenAI, Anthropic, Google, Local
|
|
48
|
+
|
|
49
|
+
# Use OpenAI
|
|
50
|
+
>>> llm = LLM(model=OpenAI.GPT_4O_MINI)
|
|
51
|
+
>>> response = await llm.generate("Hello!")
|
|
52
|
+
|
|
53
|
+
# Use Anthropic
|
|
54
|
+
>>> llm = LLM(model=Anthropic.CLAUDE_35_SONNET)
|
|
55
|
+
|
|
56
|
+
# Use Google Gemini
|
|
57
|
+
>>> llm = LLM(model=Google.GEMINI_25_FLASH)
|
|
58
|
+
|
|
59
|
+
# Use local Ollama
|
|
60
|
+
>>> llm = LLM(model=Local.OLLAMA("llama3.1"))
|
|
61
|
+
|
|
62
|
+
# Use local vLLM
|
|
63
|
+
>>> llm = LLM(model=Local.VLLM("mistral"))
|
|
64
|
+
|
|
65
|
+
# Structured output
|
|
66
|
+
>>> class Output(BaseModel):
|
|
67
|
+
... answer: str
|
|
68
|
+
... confidence: float
|
|
69
|
+
>>> result = await llm.generate_structured("What is 2+2?", Output)
|
|
70
|
+
>>> result.answer
|
|
71
|
+
'4'
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
model: Model = OpenAI.GPT_4O_MINI,
|
|
77
|
+
temperature: float = 0.7,
|
|
78
|
+
max_tokens: int | None = None,
|
|
79
|
+
api_key: str | None = None,
|
|
80
|
+
base_url: str | None = None,
|
|
81
|
+
):
|
|
82
|
+
"""
|
|
83
|
+
Initialize the LLM client.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
model: Model to use (enum, LocalModel, or string)
|
|
87
|
+
temperature: Sampling temperature (0.0-2.0)
|
|
88
|
+
max_tokens: Maximum tokens to generate (default: None = model's max)
|
|
89
|
+
api_key: Optional API key override
|
|
90
|
+
base_url: Optional API base URL (auto-set when using Local models)
|
|
91
|
+
"""
|
|
92
|
+
# Handle LocalModel - extract endpoint automatically
|
|
93
|
+
if isinstance(model, LocalModel):
|
|
94
|
+
self.model = f"{model.provider}/{model.model}"
|
|
95
|
+
self._base_url = model.endpoint
|
|
96
|
+
else:
|
|
97
|
+
self.model = get_model_string(model)
|
|
98
|
+
self._base_url = base_url
|
|
99
|
+
|
|
100
|
+
self.temperature = temperature
|
|
101
|
+
self.max_tokens = max_tokens
|
|
102
|
+
self._api_key = api_key
|
|
103
|
+
|
|
104
|
+
# Cost and usage tracking
|
|
105
|
+
self._total_cost = 0.0
|
|
106
|
+
self._call_count = 0
|
|
107
|
+
|
|
108
|
+
async def generate(self, prompt: str, system: str | None = None) -> str:
|
|
109
|
+
"""
|
|
110
|
+
Generate a text response.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
prompt: The user prompt
|
|
114
|
+
system: Optional system prompt
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Generated text response
|
|
118
|
+
"""
|
|
119
|
+
messages = []
|
|
120
|
+
if system:
|
|
121
|
+
messages.append({"role": "system", "content": system})
|
|
122
|
+
messages.append({"role": "user", "content": prompt})
|
|
123
|
+
|
|
124
|
+
kwargs = {
|
|
125
|
+
"model": self.model,
|
|
126
|
+
"messages": messages,
|
|
127
|
+
"temperature": self.temperature,
|
|
128
|
+
"api_key": self._api_key,
|
|
129
|
+
}
|
|
130
|
+
if self.max_tokens is not None:
|
|
131
|
+
kwargs["max_tokens"] = self.max_tokens
|
|
132
|
+
if self._base_url:
|
|
133
|
+
kwargs["api_base"] = self._base_url
|
|
134
|
+
|
|
135
|
+
response = await acompletion(**kwargs)
|
|
136
|
+
self._track_cost(response)
|
|
137
|
+
return response.choices[0].message.content
|
|
138
|
+
|
|
139
|
+
async def generate_batch(
|
|
140
|
+
self, prompts: list[str], system: str | None = None
|
|
141
|
+
) -> list[str]:
|
|
142
|
+
"""
|
|
143
|
+
Generate responses for multiple prompts in parallel.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
prompts: List of user prompts
|
|
147
|
+
system: Optional system prompt for all
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
List of generated responses
|
|
151
|
+
"""
|
|
152
|
+
import asyncio
|
|
153
|
+
|
|
154
|
+
tasks = [self.generate(p, system) for p in prompts]
|
|
155
|
+
return await asyncio.gather(*tasks)
|
|
156
|
+
|
|
157
|
+
@overload
|
|
158
|
+
async def generate_structured(
|
|
159
|
+
self,
|
|
160
|
+
prompt: str,
|
|
161
|
+
response_model: Type[T],
|
|
162
|
+
system: str | None = None,
|
|
163
|
+
) -> T: ...
|
|
164
|
+
|
|
165
|
+
@overload
|
|
166
|
+
async def generate_structured(
|
|
167
|
+
self,
|
|
168
|
+
prompt: str,
|
|
169
|
+
response_model: Type[list[T]],
|
|
170
|
+
system: str | None = None,
|
|
171
|
+
) -> list[T]: ...
|
|
172
|
+
|
|
173
|
+
async def generate_structured(
|
|
174
|
+
self,
|
|
175
|
+
prompt: str,
|
|
176
|
+
response_model: Type[T] | Type[list[T]],
|
|
177
|
+
system: str | None = None,
|
|
178
|
+
) -> T | list[T]:
|
|
179
|
+
"""
|
|
180
|
+
Generate a structured response matching a Pydantic model.
|
|
181
|
+
|
|
182
|
+
Uses LiteLLM's native JSON mode with response_format for
|
|
183
|
+
reliable structured outputs.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
prompt: The user prompt
|
|
187
|
+
response_model: Pydantic model class for the response
|
|
188
|
+
system: Optional system prompt
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Parsed response matching the model
|
|
192
|
+
|
|
193
|
+
Example:
|
|
194
|
+
>>> class Analysis(BaseModel):
|
|
195
|
+
... sentiment: str
|
|
196
|
+
... score: float
|
|
197
|
+
>>> result = await llm.generate_structured(
|
|
198
|
+
... "Analyze: I love this product!",
|
|
199
|
+
... Analysis
|
|
200
|
+
... )
|
|
201
|
+
>>> result.sentiment
|
|
202
|
+
'positive'
|
|
203
|
+
"""
|
|
204
|
+
# Check if model supports structured outputs
|
|
205
|
+
if not supports_response_schema(model=self.model, custom_llm_provider=None):
|
|
206
|
+
raise ValueError(
|
|
207
|
+
f"Model '{self.model}' does not support structured outputs (response_format). "
|
|
208
|
+
f"Use a model that supports JSON schema like GPT-4o, Gemini 1.5+, or Claude 3.5+."
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
messages = []
|
|
212
|
+
if system:
|
|
213
|
+
messages.append({"role": "system", "content": system})
|
|
214
|
+
messages.append({"role": "user", "content": prompt})
|
|
215
|
+
|
|
216
|
+
# Use LiteLLM's native response_format with Pydantic model
|
|
217
|
+
kwargs = {
|
|
218
|
+
"model": self.model,
|
|
219
|
+
"messages": messages,
|
|
220
|
+
"response_format": response_model,
|
|
221
|
+
"temperature": self.temperature,
|
|
222
|
+
"api_key": self._api_key,
|
|
223
|
+
}
|
|
224
|
+
if self.max_tokens is not None:
|
|
225
|
+
kwargs["max_tokens"] = self.max_tokens
|
|
226
|
+
if self._base_url:
|
|
227
|
+
kwargs["api_base"] = self._base_url
|
|
228
|
+
|
|
229
|
+
response = await acompletion(**kwargs)
|
|
230
|
+
self._track_cost(response)
|
|
231
|
+
return response_model.model_validate_json(response.choices[0].message.content)
|
|
232
|
+
|
|
233
|
+
async def generate_chat(
|
|
234
|
+
self, messages: list[dict], response_model: Type[T] | None = None
|
|
235
|
+
) -> str | T:
|
|
236
|
+
"""
|
|
237
|
+
Generate a response for a full conversation.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
messages: List of message dicts with 'role' and 'content'
|
|
241
|
+
response_model: Optional Pydantic model for structured output
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
Generated response (string or structured)
|
|
245
|
+
"""
|
|
246
|
+
if response_model:
|
|
247
|
+
# Check if model supports structured outputs
|
|
248
|
+
if not supports_response_schema(model=self.model, custom_llm_provider=None):
|
|
249
|
+
raise ValueError(
|
|
250
|
+
f"Model '{self.model}' does not support structured outputs (response_format). "
|
|
251
|
+
f"Use a model that supports JSON schema like GPT-4o, Gemini 1.5+, or Claude 3.5+."
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Use LiteLLM's native response_format with Pydantic model
|
|
255
|
+
kwargs = {
|
|
256
|
+
"model": self.model,
|
|
257
|
+
"messages": messages,
|
|
258
|
+
"response_format": response_model,
|
|
259
|
+
"temperature": self.temperature,
|
|
260
|
+
"api_key": self._api_key,
|
|
261
|
+
}
|
|
262
|
+
if self.max_tokens is not None:
|
|
263
|
+
kwargs["max_tokens"] = self.max_tokens
|
|
264
|
+
if self._base_url:
|
|
265
|
+
kwargs["api_base"] = self._base_url
|
|
266
|
+
|
|
267
|
+
response = await acompletion(**kwargs)
|
|
268
|
+
self._track_cost(response)
|
|
269
|
+
return response_model.model_validate_json(response.choices[0].message.content)
|
|
270
|
+
|
|
271
|
+
kwargs = {
|
|
272
|
+
"model": self.model,
|
|
273
|
+
"messages": messages,
|
|
274
|
+
"temperature": self.temperature,
|
|
275
|
+
"api_key": self._api_key,
|
|
276
|
+
}
|
|
277
|
+
if self.max_tokens is not None:
|
|
278
|
+
kwargs["max_tokens"] = self.max_tokens
|
|
279
|
+
if self._base_url:
|
|
280
|
+
kwargs["api_base"] = self._base_url
|
|
281
|
+
|
|
282
|
+
response = await acompletion(**kwargs)
|
|
283
|
+
self._track_cost(response)
|
|
284
|
+
return response.choices[0].message.content
|
|
285
|
+
|
|
286
|
+
def _track_cost(self, response) -> None:
|
|
287
|
+
"""Track cost and call count from a response."""
|
|
288
|
+
self._call_count += 1
|
|
289
|
+
try:
|
|
290
|
+
cost = completion_cost(completion_response=response)
|
|
291
|
+
self._total_cost += cost
|
|
292
|
+
except Exception:
|
|
293
|
+
# Some models may not have pricing info
|
|
294
|
+
pass
|
|
295
|
+
|
|
296
|
+
@property
|
|
297
|
+
def total_cost(self) -> float:
|
|
298
|
+
"""Get total cost of all LLM calls made by this client."""
|
|
299
|
+
return self._total_cost
|
|
300
|
+
|
|
301
|
+
@property
|
|
302
|
+
def call_count(self) -> int:
|
|
303
|
+
"""Get total number of LLM calls made by this client."""
|
|
304
|
+
return self._call_count
|
|
305
|
+
|
|
306
|
+
def reset_tracking(self) -> None:
|
|
307
|
+
"""Reset cost and call tracking."""
|
|
308
|
+
self._total_cost = 0.0
|
|
309
|
+
self._call_count = 0
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Automatic worker scaling based on provider rate limits."""
|
|
2
|
+
|
|
3
|
+
# Known rate limits per provider (requests per minute)
|
|
4
|
+
PROVIDER_RATE_LIMITS = {
|
|
5
|
+
"openai": 60, # Tier 1 default, scales with tier
|
|
6
|
+
"anthropic": 60, # Standard limit
|
|
7
|
+
"google": 60, # Gemini API
|
|
8
|
+
"gemini": 60, # Gemini API (alternative prefix)
|
|
9
|
+
"ollama": 1000, # Local - no real limit
|
|
10
|
+
"vllm": 1000, # Local - no real limit
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
# Target 80% of rate limit to avoid hitting caps
|
|
14
|
+
UTILIZATION_TARGET = 0.8
|
|
15
|
+
|
|
16
|
+
# Default workers per provider (pre-computed for convenience)
|
|
17
|
+
DEFAULT_WORKERS = {
|
|
18
|
+
"openai": 15, # ~60 RPM / 3 calls = 20, use 15 to be safe
|
|
19
|
+
"anthropic": 10, # ~60 RPM, more conservative
|
|
20
|
+
"google": 15, # Gemini
|
|
21
|
+
"gemini": 15, # Gemini
|
|
22
|
+
"ollama": 50, # Local - high parallelism
|
|
23
|
+
"vllm": 50, # Local - high parallelism
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_provider(model: str) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Extract provider name from model string.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
model: Model string like "gpt-4o" or "ollama/llama3.1:8b"
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Provider name
|
|
36
|
+
"""
|
|
37
|
+
# Check for explicit prefix
|
|
38
|
+
if "/" in model:
|
|
39
|
+
return model.split("/")[0]
|
|
40
|
+
|
|
41
|
+
# Infer from model name
|
|
42
|
+
if model.startswith("gpt") or model.startswith("o1"):
|
|
43
|
+
return "openai"
|
|
44
|
+
if model.startswith("claude"):
|
|
45
|
+
return "anthropic"
|
|
46
|
+
if model.startswith("gemini"):
|
|
47
|
+
return "google"
|
|
48
|
+
|
|
49
|
+
return "openai" # Default
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def auto_workers(model: str) -> int:
|
|
53
|
+
"""
|
|
54
|
+
Determine optimal worker count based on model's provider.
|
|
55
|
+
|
|
56
|
+
This calculates a safe default that won't hit rate limits,
|
|
57
|
+
accounting for the fact that each trace needs ~3 LLM calls
|
|
58
|
+
(generate, grade, maybe refine).
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
model: Model string
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Recommended worker count
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
>>> auto_workers("gpt-4o")
|
|
68
|
+
15
|
|
69
|
+
>>> auto_workers("gemini/gemini-2.5-flash")
|
|
70
|
+
15
|
|
71
|
+
"""
|
|
72
|
+
provider = get_provider(model)
|
|
73
|
+
rpm = PROVIDER_RATE_LIMITS.get(provider, 60)
|
|
74
|
+
|
|
75
|
+
# Workers = RPM * utilization / avg_calls_per_trace
|
|
76
|
+
# Each trace needs ~3 calls (generate, grade, maybe refine)
|
|
77
|
+
avg_calls_per_trace = 3
|
|
78
|
+
|
|
79
|
+
workers = int((rpm * UTILIZATION_TARGET) / avg_calls_per_trace)
|
|
80
|
+
|
|
81
|
+
# Clamp to reasonable bounds
|
|
82
|
+
return max(5, min(workers, 100))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_default_workers(model: str) -> int:
|
|
86
|
+
"""
|
|
87
|
+
Quick lookup for worker count.
|
|
88
|
+
|
|
89
|
+
Uses pre-computed defaults for common providers.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
model: Model string
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Default worker count for the provider
|
|
96
|
+
"""
|
|
97
|
+
provider = get_provider(model)
|
|
98
|
+
return DEFAULT_WORKERS.get(provider, 10)
|
|
99
|
+
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Model enums for supported LLM providers.
|
|
2
|
+
|
|
3
|
+
Supported providers:
|
|
4
|
+
- OpenAI (GPT-4o, GPT-4o-mini)
|
|
5
|
+
- Anthropic (Claude 3.5 Sonnet/Haiku)
|
|
6
|
+
- Google (Gemini 2.5 Flash/Pro)
|
|
7
|
+
- Local (Ollama, vLLM, custom)
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
# Per-provider import (recommended)
|
|
11
|
+
from synkro.models.openai import OpenAI
|
|
12
|
+
from synkro.models.anthropic import Anthropic
|
|
13
|
+
from synkro.models.google import Google
|
|
14
|
+
from synkro.models.local import Local
|
|
15
|
+
|
|
16
|
+
# Convenience import (all at once)
|
|
17
|
+
from synkro.models import OpenAI, Anthropic, Google, Local
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from typing import Union
|
|
22
|
+
|
|
23
|
+
from synkro.models.openai import OpenAI
|
|
24
|
+
from synkro.models.anthropic import Anthropic
|
|
25
|
+
from synkro.models.google import Google
|
|
26
|
+
from synkro.models.local import Local, LocalModel
|
|
27
|
+
|
|
28
|
+
# Union type for any model
|
|
29
|
+
Model = Union[OpenAI, Anthropic, Google, LocalModel, str]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_model_string(model: Model) -> str:
|
|
33
|
+
"""Convert a model enum or string to its string value."""
|
|
34
|
+
if isinstance(model, Enum):
|
|
35
|
+
return model.value
|
|
36
|
+
if isinstance(model, LocalModel):
|
|
37
|
+
return str(model)
|
|
38
|
+
return model
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
"OpenAI",
|
|
43
|
+
"Anthropic",
|
|
44
|
+
"Google",
|
|
45
|
+
"Local",
|
|
46
|
+
"LocalModel",
|
|
47
|
+
"Model",
|
|
48
|
+
"get_model_string",
|
|
49
|
+
]
|
|
50
|
+
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Anthropic Claude models."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Anthropic(str, Enum):
|
|
7
|
+
"""Anthropic Claude models."""
|
|
8
|
+
|
|
9
|
+
# Claude 4.5 (latest)
|
|
10
|
+
CLAUDE_45_OPUS = "claude-opus-4-5-20250601"
|
|
11
|
+
"""Premium: State-of-the-art for coding and autonomous agents"""
|
|
12
|
+
|
|
13
|
+
CLAUDE_45_SONNET = "claude-sonnet-4-5-20250601"
|
|
14
|
+
"""Standard: Default model for most users, faster and more context-aware"""
|
|
15
|
+
|
|
16
|
+
CLAUDE_45_HAIKU = "claude-haiku-4-5-20250601"
|
|
17
|
+
"""Light: High-speed, cost-effective, matches Claude 3 Opus intelligence"""
|
|
18
|
+
|
|
19
|
+
# Claude 4 (previous gen)
|
|
20
|
+
CLAUDE_4_SONNET = "claude-sonnet-4-20250514"
|
|
21
|
+
CLAUDE_4_OPUS = "claude-opus-4-20250514"
|
|
22
|
+
|
|
23
|
+
# Claude 3.5 (legacy)
|
|
24
|
+
CLAUDE_35_SONNET = "claude-3-5-sonnet-20241022"
|
|
25
|
+
CLAUDE_35_HAIKU = "claude-3-5-haiku-20241022"
|
|
26
|
+
|
synkro/models/google.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Google Gemini models.
|
|
2
|
+
|
|
3
|
+
Updated based on: https://ai.google.dev/gemini-api/docs/models#model-versions
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Google(str, Enum):
|
|
10
|
+
"""Google Gemini models."""
|
|
11
|
+
|
|
12
|
+
GEMINI_3_PRO = "gemini/gemini-3-pro"
|
|
13
|
+
GEMINI_3_FLASH = "gemini/gemini-3-flash"
|
|
14
|
+
|
|
15
|
+
GEMINI_25_FLASH = "gemini/gemini-2.5-flash"
|
|
16
|
+
GEMINI_25_PRO = "gemini/gemini-2.5-pro"
|
|
17
|
+
|
|
18
|
+
GEMINI_2_FLASH = "gemini/gemini-2.0-flash"
|
|
19
|
+
GEMINI_2_FLASH_LITE = "gemini/gemini-2.0-flash-lite"
|
synkro/models/local.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Local LLM providers (Ollama, vLLM, etc.)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class LocalModel:
|
|
10
|
+
"""Represents a local model configuration.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
provider: The provider name (ollama, vllm, openai)
|
|
14
|
+
model: The model name
|
|
15
|
+
endpoint: The API endpoint URL
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
provider: str
|
|
19
|
+
model: str
|
|
20
|
+
endpoint: str
|
|
21
|
+
|
|
22
|
+
def __str__(self) -> str:
|
|
23
|
+
return f"{self.provider}/{self.model}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Local:
|
|
27
|
+
"""Factory for local LLM configurations.
|
|
28
|
+
|
|
29
|
+
Provides a clean API for configuring local LLM providers like Ollama and vLLM.
|
|
30
|
+
Returns LocalModel instances that the LLM client can use to configure the
|
|
31
|
+
connection automatically.
|
|
32
|
+
|
|
33
|
+
Examples:
|
|
34
|
+
>>> from synkro import LLM, Local
|
|
35
|
+
|
|
36
|
+
# Ollama (default localhost:11434)
|
|
37
|
+
>>> llm = LLM(model=Local.OLLAMA("llama3.1"))
|
|
38
|
+
|
|
39
|
+
# vLLM (default localhost:8000)
|
|
40
|
+
>>> llm = LLM(model=Local.VLLM("mistral"))
|
|
41
|
+
|
|
42
|
+
# Custom endpoint
|
|
43
|
+
>>> llm = LLM(model=Local.OLLAMA("llama3.1", endpoint="http://server:11434"))
|
|
44
|
+
|
|
45
|
+
# Any OpenAI-compatible server
|
|
46
|
+
>>> llm = LLM(model=Local.CUSTOM("my-model", endpoint="http://localhost:8080/v1"))
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
DEFAULT_ENDPOINTS = {
|
|
50
|
+
"ollama": "http://localhost:11434",
|
|
51
|
+
"vllm": "http://localhost:8000",
|
|
52
|
+
"openai": "http://localhost:8000/v1",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def OLLAMA(cls, model: str, endpoint: str | None = None) -> LocalModel:
|
|
57
|
+
"""Create Ollama model config.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
model: Model name (e.g., "llama3.1", "mistral", "codellama")
|
|
61
|
+
endpoint: Optional custom endpoint (default: http://localhost:11434)
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
LocalModel configured for Ollama
|
|
65
|
+
"""
|
|
66
|
+
return LocalModel(
|
|
67
|
+
provider="ollama",
|
|
68
|
+
model=model,
|
|
69
|
+
endpoint=endpoint or cls.DEFAULT_ENDPOINTS["ollama"],
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def VLLM(cls, model: str, endpoint: str | None = None) -> LocalModel:
|
|
74
|
+
"""Create vLLM model config.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
model: Model name (e.g., "mistral-7b", "llama-2-13b")
|
|
78
|
+
endpoint: Optional custom endpoint (default: http://localhost:8000)
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
LocalModel configured for vLLM
|
|
82
|
+
"""
|
|
83
|
+
return LocalModel(
|
|
84
|
+
provider="vllm",
|
|
85
|
+
model=model,
|
|
86
|
+
endpoint=endpoint or cls.DEFAULT_ENDPOINTS["vllm"],
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
def CUSTOM(cls, model: str, endpoint: str) -> LocalModel:
|
|
91
|
+
"""Create custom local model config for any OpenAI-compatible server.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
model: Model name
|
|
95
|
+
endpoint: API endpoint URL (required)
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
LocalModel configured for OpenAI-compatible API
|
|
99
|
+
"""
|
|
100
|
+
return LocalModel(
|
|
101
|
+
provider="openai",
|
|
102
|
+
model=model,
|
|
103
|
+
endpoint=endpoint,
|
|
104
|
+
)
|
synkro/models/openai.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""OpenAI models."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class OpenAI(str, Enum):
|
|
7
|
+
"""OpenAI models."""
|
|
8
|
+
|
|
9
|
+
# GPT-5 series (latest)
|
|
10
|
+
GPT_52 = "gpt-5.2"
|
|
11
|
+
"""Flagship: High-speed, human-like dialogue, agentic tool-calling"""
|
|
12
|
+
|
|
13
|
+
GPT_5_MINI = "gpt-5-mini"
|
|
14
|
+
"""Mid-tier: Balanced cost and intelligence, primary workhorse"""
|
|
15
|
+
|
|
16
|
+
GPT_5_NANO = "gpt-5-nano"
|
|
17
|
+
"""Edge: Extremely low latency, high-volume basic tasks"""
|
|
18
|
+
|
|
19
|
+
# GPT-4 series (legacy)
|
|
20
|
+
GPT_41 = "gpt-4.1"
|
|
21
|
+
"""Legacy flagship: Smartest non-reasoning model from previous gen"""
|
|
22
|
+
|
|
23
|
+
GPT_4O = "gpt-4o"
|
|
24
|
+
GPT_4O_MINI = "gpt-4o-mini"
|
|
25
|
+
|
|
26
|
+
# Reasoning models
|
|
27
|
+
O3 = "o3"
|
|
28
|
+
O3_MINI = "o3-mini"
|
|
29
|
+
O1 = "o1"
|
|
30
|
+
O1_MINI = "o1-mini"
|
|
31
|
+
|
synkro/modes/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Mode configurations for different dataset types."""
|
|
2
|
+
|
|
3
|
+
from synkro.modes.config import ModeConfig, get_mode_config
|
|
4
|
+
from synkro.modes.conversation import CONVERSATION_CONFIG, INSTRUCTION_CONFIG
|
|
5
|
+
from synkro.modes.tool_call import TOOL_CALL_CONFIG
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"ModeConfig",
|
|
9
|
+
"get_mode_config",
|
|
10
|
+
"CONVERSATION_CONFIG",
|
|
11
|
+
"INSTRUCTION_CONFIG",
|
|
12
|
+
"TOOL_CALL_CONFIG",
|
|
13
|
+
]
|