synkro 0.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synkro/__init__.py +179 -0
- synkro/advanced.py +186 -0
- synkro/cli.py +128 -0
- synkro/core/__init__.py +7 -0
- synkro/core/checkpoint.py +250 -0
- synkro/core/dataset.py +402 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +276 -0
- synkro/formatters/__init__.py +12 -0
- synkro/formatters/qa.py +98 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/follow_ups.py +134 -0
- synkro/generation/generator.py +220 -0
- synkro/generation/golden_responses.py +244 -0
- synkro/generation/golden_scenarios.py +276 -0
- synkro/generation/golden_tool_responses.py +416 -0
- synkro/generation/logic_extractor.py +126 -0
- synkro/generation/multiturn_responses.py +177 -0
- synkro/generation/planner.py +131 -0
- synkro/generation/responses.py +189 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +376 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/interactive/__init__.py +12 -0
- synkro/interactive/hitl_session.py +77 -0
- synkro/interactive/logic_map_editor.py +173 -0
- synkro/interactive/rich_ui.py +205 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +235 -0
- synkro/llm/rate_limits.py +95 -0
- synkro/models/__init__.py +43 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +15 -0
- synkro/modes/config.py +66 -0
- synkro/modes/qa.py +18 -0
- synkro/modes/sft.py +18 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +592 -0
- synkro/pipeline/runner.py +424 -0
- synkro/pipelines.py +123 -0
- synkro/prompts/__init__.py +57 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/golden_templates.py +474 -0
- synkro/prompts/interactive_templates.py +65 -0
- synkro/prompts/multiturn_templates.py +156 -0
- synkro/prompts/qa_templates.py +97 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +201 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/golden_refiner.py +163 -0
- synkro/quality/grader.py +153 -0
- synkro/quality/multiturn_grader.py +150 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/quality/verifier.py +228 -0
- synkro/reporting.py +537 -0
- synkro/schemas.py +472 -0
- synkro/types/__init__.py +41 -0
- synkro/types/core.py +126 -0
- synkro/types/dataset_type.py +30 -0
- synkro/types/logic_map.py +345 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.12.data/data/examples/__init__.py +148 -0
- synkro-0.4.12.dist-info/METADATA +258 -0
- synkro-0.4.12.dist-info/RECORD +77 -0
- synkro-0.4.12.dist-info/WHEEL +4 -0
- synkro-0.4.12.dist-info/entry_points.txt +2 -0
- synkro-0.4.12.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Automatic worker scaling based on provider rate limits."""
|
|
2
|
+
|
|
3
|
+
# Known rate limits per provider (requests per minute)
|
|
4
|
+
PROVIDER_RATE_LIMITS = {
|
|
5
|
+
"openai": 60, # Tier 1 default, scales with tier
|
|
6
|
+
"anthropic": 60, # Standard limit
|
|
7
|
+
"google": 60, # Gemini API
|
|
8
|
+
"gemini": 60, # Gemini API (alternative prefix)
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
# Target 80% of rate limit to avoid hitting caps
|
|
12
|
+
UTILIZATION_TARGET = 0.8
|
|
13
|
+
|
|
14
|
+
# Default workers per provider (pre-computed for convenience)
|
|
15
|
+
DEFAULT_WORKERS = {
|
|
16
|
+
"openai": 15, # ~60 RPM / 3 calls = 20, use 15 to be safe
|
|
17
|
+
"anthropic": 10, # ~60 RPM, more conservative
|
|
18
|
+
"google": 15, # Gemini
|
|
19
|
+
"gemini": 15, # Gemini
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_provider(model: str) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Extract provider name from model string.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
model: Model string like "gpt-4o" or "ollama/llama3.1:8b"
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Provider name
|
|
32
|
+
"""
|
|
33
|
+
# Check for explicit prefix
|
|
34
|
+
if "/" in model:
|
|
35
|
+
return model.split("/")[0]
|
|
36
|
+
|
|
37
|
+
# Infer from model name
|
|
38
|
+
if model.startswith("gpt") or model.startswith("o1"):
|
|
39
|
+
return "openai"
|
|
40
|
+
if model.startswith("claude"):
|
|
41
|
+
return "anthropic"
|
|
42
|
+
if model.startswith("gemini"):
|
|
43
|
+
return "google"
|
|
44
|
+
|
|
45
|
+
return "openai" # Default
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def auto_workers(model: str) -> int:
|
|
49
|
+
"""
|
|
50
|
+
Determine optimal worker count based on model's provider.
|
|
51
|
+
|
|
52
|
+
This calculates a safe default that won't hit rate limits,
|
|
53
|
+
accounting for the fact that each trace needs ~3 LLM calls
|
|
54
|
+
(generate, grade, maybe refine).
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
model: Model string
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Recommended worker count
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
>>> auto_workers("gpt-4o")
|
|
64
|
+
15
|
|
65
|
+
>>> auto_workers("gemini/gemini-2.5-flash")
|
|
66
|
+
15
|
|
67
|
+
"""
|
|
68
|
+
provider = get_provider(model)
|
|
69
|
+
rpm = PROVIDER_RATE_LIMITS.get(provider, 60)
|
|
70
|
+
|
|
71
|
+
# Workers = RPM * utilization / avg_calls_per_trace
|
|
72
|
+
# Each trace needs ~3 calls (generate, grade, maybe refine)
|
|
73
|
+
avg_calls_per_trace = 3
|
|
74
|
+
|
|
75
|
+
workers = int((rpm * UTILIZATION_TARGET) / avg_calls_per_trace)
|
|
76
|
+
|
|
77
|
+
# Clamp to reasonable bounds
|
|
78
|
+
return max(5, min(workers, 100))
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_default_workers(model: str) -> int:
|
|
82
|
+
"""
|
|
83
|
+
Quick lookup for worker count.
|
|
84
|
+
|
|
85
|
+
Uses pre-computed defaults for common providers.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
model: Model string
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Default worker count for the provider
|
|
92
|
+
"""
|
|
93
|
+
provider = get_provider(model)
|
|
94
|
+
return DEFAULT_WORKERS.get(provider, 10)
|
|
95
|
+
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Model enums for supported LLM providers.
|
|
2
|
+
|
|
3
|
+
Supported providers:
|
|
4
|
+
- OpenAI (GPT-4o, GPT-4o-mini)
|
|
5
|
+
- Anthropic (Claude 3.5 Sonnet/Haiku)
|
|
6
|
+
- Google (Gemini 2.5 Flash/Pro)
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
# Per-provider import (recommended)
|
|
10
|
+
from synkro.models.openai import OpenAI
|
|
11
|
+
from synkro.models.anthropic import Anthropic
|
|
12
|
+
from synkro.models.google import Google
|
|
13
|
+
|
|
14
|
+
# Convenience import (all at once)
|
|
15
|
+
from synkro.models import OpenAI, Anthropic, Google
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from typing import Union
|
|
20
|
+
|
|
21
|
+
from synkro.models.openai import OpenAI
|
|
22
|
+
from synkro.models.anthropic import Anthropic
|
|
23
|
+
from synkro.models.google import Google
|
|
24
|
+
|
|
25
|
+
# Union type for any model
|
|
26
|
+
Model = Union[OpenAI, Anthropic, Google, str]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_model_string(model: Model) -> str:
|
|
30
|
+
"""Convert a model enum or string to its string value."""
|
|
31
|
+
if isinstance(model, Enum):
|
|
32
|
+
return model.value
|
|
33
|
+
return model
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"OpenAI",
|
|
38
|
+
"Anthropic",
|
|
39
|
+
"Google",
|
|
40
|
+
"Model",
|
|
41
|
+
"get_model_string",
|
|
42
|
+
]
|
|
43
|
+
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Anthropic Claude models."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Anthropic(str, Enum):
|
|
7
|
+
"""Anthropic Claude models."""
|
|
8
|
+
|
|
9
|
+
# Claude 4.5 (latest)
|
|
10
|
+
CLAUDE_45_OPUS = "claude-opus-4-5-20250601"
|
|
11
|
+
"""Premium: State-of-the-art for coding and autonomous agents"""
|
|
12
|
+
|
|
13
|
+
CLAUDE_45_SONNET = "claude-sonnet-4-5-20250601"
|
|
14
|
+
"""Standard: Default model for most users, faster and more context-aware"""
|
|
15
|
+
|
|
16
|
+
CLAUDE_45_HAIKU = "claude-haiku-4-5-20250601"
|
|
17
|
+
"""Light: High-speed, cost-effective, matches Claude 3 Opus intelligence"""
|
|
18
|
+
|
|
19
|
+
# Claude 4 (previous gen)
|
|
20
|
+
CLAUDE_4_SONNET = "claude-sonnet-4-20250514"
|
|
21
|
+
CLAUDE_4_OPUS = "claude-opus-4-20250514"
|
|
22
|
+
|
|
23
|
+
# Claude 3.5 (legacy)
|
|
24
|
+
CLAUDE_35_SONNET = "claude-3-5-sonnet-20241022"
|
|
25
|
+
CLAUDE_35_HAIKU = "claude-3-5-haiku-20241022"
|
|
26
|
+
|
synkro/models/google.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Google Gemini models.
|
|
2
|
+
|
|
3
|
+
Updated based on: https://ai.google.dev/gemini-api/docs/models#model-versions
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Google(str, Enum):
|
|
10
|
+
"""Google Gemini models."""
|
|
11
|
+
|
|
12
|
+
GEMINI_3_PRO = "gemini/gemini-3-pro"
|
|
13
|
+
GEMINI_3_FLASH = "gemini/gemini-3-flash"
|
|
14
|
+
|
|
15
|
+
GEMINI_25_FLASH = "gemini/gemini-2.5-flash"
|
|
16
|
+
GEMINI_25_PRO = "gemini/gemini-2.5-pro"
|
|
17
|
+
|
|
18
|
+
GEMINI_2_FLASH = "gemini/gemini-2.0-flash"
|
|
19
|
+
GEMINI_2_FLASH_LITE = "gemini/gemini-2.0-flash-lite"
|
synkro/models/openai.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""OpenAI models."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class OpenAI(str, Enum):
|
|
7
|
+
"""OpenAI models."""
|
|
8
|
+
|
|
9
|
+
# GPT-5 series (latest)
|
|
10
|
+
GPT_52 = "gpt-5.2"
|
|
11
|
+
"""Flagship: High-speed, human-like dialogue, agentic tool-calling"""
|
|
12
|
+
|
|
13
|
+
GPT_5_MINI = "gpt-5-mini"
|
|
14
|
+
"""Mid-tier: Balanced cost and intelligence, primary workhorse"""
|
|
15
|
+
|
|
16
|
+
GPT_5_NANO = "gpt-5-nano"
|
|
17
|
+
"""Edge: Extremely low latency, high-volume basic tasks"""
|
|
18
|
+
|
|
19
|
+
# GPT-4 series (legacy)
|
|
20
|
+
GPT_41 = "gpt-4.1"
|
|
21
|
+
"""Legacy flagship: Smartest non-reasoning model from previous gen"""
|
|
22
|
+
|
|
23
|
+
GPT_4O = "gpt-4o"
|
|
24
|
+
GPT_4O_MINI = "gpt-4o-mini"
|
|
25
|
+
|
|
26
|
+
# Reasoning models
|
|
27
|
+
O3 = "o3"
|
|
28
|
+
O3_MINI = "o3-mini"
|
|
29
|
+
O1 = "o1"
|
|
30
|
+
O1_MINI = "o1-mini"
|
|
31
|
+
|
synkro/modes/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Mode configurations for different dataset types."""
|
|
2
|
+
|
|
3
|
+
from synkro.modes.config import ModeConfig, get_mode_config
|
|
4
|
+
from synkro.modes.qa import QA_CONFIG
|
|
5
|
+
from synkro.modes.sft import SFT_CONFIG
|
|
6
|
+
from synkro.modes.tool_call import TOOL_CALL_CONFIG
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ModeConfig",
|
|
10
|
+
"get_mode_config",
|
|
11
|
+
"QA_CONFIG",
|
|
12
|
+
"SFT_CONFIG",
|
|
13
|
+
"TOOL_CALL_CONFIG",
|
|
14
|
+
]
|
|
15
|
+
|
synkro/modes/config.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Mode configuration that bundles prompts, schema, and formatter per dataset type."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import TYPE_CHECKING, Type
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from synkro.types.dataset_type import DatasetType
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ModeConfig:
|
|
12
|
+
"""
|
|
13
|
+
Configuration bundle for a dataset type.
|
|
14
|
+
|
|
15
|
+
Defines all the prompts, schemas, and formatters needed
|
|
16
|
+
for generating a specific type of dataset.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# Prompts
|
|
20
|
+
scenario_prompt: str
|
|
21
|
+
"""Prompt for generating scenarios/questions"""
|
|
22
|
+
|
|
23
|
+
response_prompt: str
|
|
24
|
+
"""Prompt for generating responses/answers"""
|
|
25
|
+
|
|
26
|
+
grade_prompt: str
|
|
27
|
+
"""Prompt for grading quality"""
|
|
28
|
+
|
|
29
|
+
refine_prompt: str
|
|
30
|
+
"""Prompt for refining failed responses"""
|
|
31
|
+
|
|
32
|
+
# Output configuration
|
|
33
|
+
output_description: str
|
|
34
|
+
"""Human-readable description of output format"""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_mode_config(dataset_type: "DatasetType") -> ModeConfig:
|
|
38
|
+
"""
|
|
39
|
+
Get the mode configuration for a dataset type.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
dataset_type: The type of dataset to generate
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
ModeConfig with appropriate prompts and settings
|
|
46
|
+
|
|
47
|
+
Example:
|
|
48
|
+
>>> from synkro import DatasetType
|
|
49
|
+
>>> config = get_mode_config(DatasetType.QA)
|
|
50
|
+
"""
|
|
51
|
+
from synkro.types.dataset_type import DatasetType
|
|
52
|
+
from synkro.modes.qa import QA_CONFIG
|
|
53
|
+
from synkro.modes.sft import SFT_CONFIG
|
|
54
|
+
from synkro.modes.tool_call import TOOL_CALL_CONFIG
|
|
55
|
+
|
|
56
|
+
configs = {
|
|
57
|
+
DatasetType.QA: QA_CONFIG,
|
|
58
|
+
DatasetType.SFT: SFT_CONFIG,
|
|
59
|
+
DatasetType.TOOL_CALL: TOOL_CALL_CONFIG,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if dataset_type not in configs:
|
|
63
|
+
raise ValueError(f"Unknown dataset type: {dataset_type}")
|
|
64
|
+
|
|
65
|
+
return configs[dataset_type]
|
|
66
|
+
|
synkro/modes/qa.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""QA mode configuration."""
|
|
2
|
+
|
|
3
|
+
from synkro.modes.config import ModeConfig
|
|
4
|
+
from synkro.prompts.qa_templates import (
|
|
5
|
+
QA_SCENARIO_PROMPT,
|
|
6
|
+
QA_RESPONSE_PROMPT,
|
|
7
|
+
QA_GRADE_PROMPT,
|
|
8
|
+
QA_REFINE_PROMPT,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
QA_CONFIG = ModeConfig(
|
|
12
|
+
scenario_prompt=QA_SCENARIO_PROMPT,
|
|
13
|
+
response_prompt=QA_RESPONSE_PROMPT,
|
|
14
|
+
grade_prompt=QA_GRADE_PROMPT,
|
|
15
|
+
refine_prompt=QA_REFINE_PROMPT,
|
|
16
|
+
output_description="Question-Answer pairs: {question, answer, context}",
|
|
17
|
+
)
|
|
18
|
+
|
synkro/modes/sft.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""SFT mode configuration."""
|
|
2
|
+
|
|
3
|
+
from synkro.modes.config import ModeConfig
|
|
4
|
+
from synkro.prompts.templates import (
|
|
5
|
+
SCENARIO_GENERATOR_PROMPT,
|
|
6
|
+
SINGLE_RESPONSE_PROMPT,
|
|
7
|
+
SINGLE_GRADE_PROMPT,
|
|
8
|
+
BATCHED_REFINER_PROMPT,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
SFT_CONFIG = ModeConfig(
|
|
12
|
+
scenario_prompt=SCENARIO_GENERATOR_PROMPT,
|
|
13
|
+
response_prompt=SINGLE_RESPONSE_PROMPT,
|
|
14
|
+
grade_prompt=SINGLE_GRADE_PROMPT,
|
|
15
|
+
refine_prompt=BATCHED_REFINER_PROMPT,
|
|
16
|
+
output_description="Chat messages: {messages: [system, user, assistant]}",
|
|
17
|
+
)
|
|
18
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Tool Call mode configuration."""
|
|
2
|
+
|
|
3
|
+
from synkro.modes.config import ModeConfig
|
|
4
|
+
from synkro.prompts.tool_templates import (
|
|
5
|
+
TOOL_SCENARIO_PROMPT,
|
|
6
|
+
TOOL_RESPONSE_PROMPT,
|
|
7
|
+
TOOL_GRADE_PROMPT,
|
|
8
|
+
TOOL_REFINE_PROMPT,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
TOOL_CALL_CONFIG = ModeConfig(
|
|
12
|
+
scenario_prompt=TOOL_SCENARIO_PROMPT,
|
|
13
|
+
response_prompt=TOOL_RESPONSE_PROMPT,
|
|
14
|
+
grade_prompt=TOOL_GRADE_PROMPT,
|
|
15
|
+
refine_prompt=TOOL_REFINE_PROMPT,
|
|
16
|
+
output_description="Tool calling: {messages: [system, user, {tool_calls}, {tool}, assistant]}",
|
|
17
|
+
)
|
|
18
|
+
|