synkro 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. synkro/__init__.py +179 -0
  2. synkro/advanced.py +186 -0
  3. synkro/cli.py +128 -0
  4. synkro/core/__init__.py +7 -0
  5. synkro/core/checkpoint.py +250 -0
  6. synkro/core/dataset.py +402 -0
  7. synkro/core/policy.py +337 -0
  8. synkro/errors.py +178 -0
  9. synkro/examples/__init__.py +148 -0
  10. synkro/factory.py +276 -0
  11. synkro/formatters/__init__.py +12 -0
  12. synkro/formatters/qa.py +98 -0
  13. synkro/formatters/sft.py +90 -0
  14. synkro/formatters/tool_call.py +127 -0
  15. synkro/generation/__init__.py +9 -0
  16. synkro/generation/follow_ups.py +134 -0
  17. synkro/generation/generator.py +220 -0
  18. synkro/generation/golden_responses.py +244 -0
  19. synkro/generation/golden_scenarios.py +276 -0
  20. synkro/generation/golden_tool_responses.py +416 -0
  21. synkro/generation/logic_extractor.py +126 -0
  22. synkro/generation/multiturn_responses.py +177 -0
  23. synkro/generation/planner.py +131 -0
  24. synkro/generation/responses.py +189 -0
  25. synkro/generation/scenarios.py +90 -0
  26. synkro/generation/tool_responses.py +376 -0
  27. synkro/generation/tool_simulator.py +114 -0
  28. synkro/interactive/__init__.py +12 -0
  29. synkro/interactive/hitl_session.py +77 -0
  30. synkro/interactive/logic_map_editor.py +173 -0
  31. synkro/interactive/rich_ui.py +205 -0
  32. synkro/llm/__init__.py +7 -0
  33. synkro/llm/client.py +235 -0
  34. synkro/llm/rate_limits.py +95 -0
  35. synkro/models/__init__.py +43 -0
  36. synkro/models/anthropic.py +26 -0
  37. synkro/models/google.py +19 -0
  38. synkro/models/openai.py +31 -0
  39. synkro/modes/__init__.py +15 -0
  40. synkro/modes/config.py +66 -0
  41. synkro/modes/qa.py +18 -0
  42. synkro/modes/sft.py +18 -0
  43. synkro/modes/tool_call.py +18 -0
  44. synkro/parsers.py +442 -0
  45. synkro/pipeline/__init__.py +20 -0
  46. synkro/pipeline/phases.py +592 -0
  47. synkro/pipeline/runner.py +424 -0
  48. synkro/pipelines.py +123 -0
  49. synkro/prompts/__init__.py +57 -0
  50. synkro/prompts/base.py +167 -0
  51. synkro/prompts/golden_templates.py +474 -0
  52. synkro/prompts/interactive_templates.py +65 -0
  53. synkro/prompts/multiturn_templates.py +156 -0
  54. synkro/prompts/qa_templates.py +97 -0
  55. synkro/prompts/templates.py +281 -0
  56. synkro/prompts/tool_templates.py +201 -0
  57. synkro/quality/__init__.py +14 -0
  58. synkro/quality/golden_refiner.py +163 -0
  59. synkro/quality/grader.py +153 -0
  60. synkro/quality/multiturn_grader.py +150 -0
  61. synkro/quality/refiner.py +137 -0
  62. synkro/quality/tool_grader.py +126 -0
  63. synkro/quality/tool_refiner.py +128 -0
  64. synkro/quality/verifier.py +228 -0
  65. synkro/reporting.py +537 -0
  66. synkro/schemas.py +472 -0
  67. synkro/types/__init__.py +41 -0
  68. synkro/types/core.py +126 -0
  69. synkro/types/dataset_type.py +30 -0
  70. synkro/types/logic_map.py +345 -0
  71. synkro/types/tool.py +94 -0
  72. synkro-0.4.12.data/data/examples/__init__.py +148 -0
  73. synkro-0.4.12.dist-info/METADATA +258 -0
  74. synkro-0.4.12.dist-info/RECORD +77 -0
  75. synkro-0.4.12.dist-info/WHEEL +4 -0
  76. synkro-0.4.12.dist-info/entry_points.txt +2 -0
  77. synkro-0.4.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,95 @@
1
+ """Automatic worker scaling based on provider rate limits."""
2
+
3
+ # Known rate limits per provider (requests per minute)
4
+ PROVIDER_RATE_LIMITS = {
5
+ "openai": 60, # Tier 1 default, scales with tier
6
+ "anthropic": 60, # Standard limit
7
+ "google": 60, # Gemini API
8
+ "gemini": 60, # Gemini API (alternative prefix)
9
+ }
10
+
11
+ # Target 80% of rate limit to avoid hitting caps
12
+ UTILIZATION_TARGET = 0.8
13
+
14
+ # Default workers per provider (pre-computed for convenience)
15
+ DEFAULT_WORKERS = {
16
+ "openai": 15, # ~60 RPM / 3 calls = 20, use 15 to be safe
17
+ "anthropic": 10, # ~60 RPM, more conservative
18
+ "google": 15, # Gemini
19
+ "gemini": 15, # Gemini
20
+ }
21
+
22
+
23
+ def get_provider(model: str) -> str:
24
+ """
25
+ Extract provider name from model string.
26
+
27
+ Args:
28
+ model: Model string like "gpt-4o" or "ollama/llama3.1:8b"
29
+
30
+ Returns:
31
+ Provider name
32
+ """
33
+ # Check for explicit prefix
34
+ if "/" in model:
35
+ return model.split("/")[0]
36
+
37
+ # Infer from model name
38
+ if model.startswith("gpt") or model.startswith("o1"):
39
+ return "openai"
40
+ if model.startswith("claude"):
41
+ return "anthropic"
42
+ if model.startswith("gemini"):
43
+ return "google"
44
+
45
+ return "openai" # Default
46
+
47
+
48
+ def auto_workers(model: str) -> int:
49
+ """
50
+ Determine optimal worker count based on model's provider.
51
+
52
+ This calculates a safe default that won't hit rate limits,
53
+ accounting for the fact that each trace needs ~3 LLM calls
54
+ (generate, grade, maybe refine).
55
+
56
+ Args:
57
+ model: Model string
58
+
59
+ Returns:
60
+ Recommended worker count
61
+
62
+ Example:
63
+ >>> auto_workers("gpt-4o")
64
+ 15
65
+ >>> auto_workers("gemini/gemini-2.5-flash")
66
+ 15
67
+ """
68
+ provider = get_provider(model)
69
+ rpm = PROVIDER_RATE_LIMITS.get(provider, 60)
70
+
71
+ # Workers = RPM * utilization / avg_calls_per_trace
72
+ # Each trace needs ~3 calls (generate, grade, maybe refine)
73
+ avg_calls_per_trace = 3
74
+
75
+ workers = int((rpm * UTILIZATION_TARGET) / avg_calls_per_trace)
76
+
77
+ # Clamp to reasonable bounds
78
+ return max(5, min(workers, 100))
79
+
80
+
81
+ def get_default_workers(model: str) -> int:
82
+ """
83
+ Quick lookup for worker count.
84
+
85
+ Uses pre-computed defaults for common providers.
86
+
87
+ Args:
88
+ model: Model string
89
+
90
+ Returns:
91
+ Default worker count for the provider
92
+ """
93
+ provider = get_provider(model)
94
+ return DEFAULT_WORKERS.get(provider, 10)
95
+
@@ -0,0 +1,43 @@
1
+ """Model enums for supported LLM providers.
2
+
3
+ Supported providers:
4
+ - OpenAI (GPT-4o, GPT-4o-mini)
5
+ - Anthropic (Claude 3.5 Sonnet/Haiku)
6
+ - Google (Gemini 2.5 Flash/Pro)
7
+
8
+ Usage:
9
+ # Per-provider import (recommended)
10
+ from synkro.models.openai import OpenAI
11
+ from synkro.models.anthropic import Anthropic
12
+ from synkro.models.google import Google
13
+
14
+ # Convenience import (all at once)
15
+ from synkro.models import OpenAI, Anthropic, Google
16
+ """
17
+
18
+ from enum import Enum
19
+ from typing import Union
20
+
21
+ from synkro.models.openai import OpenAI
22
+ from synkro.models.anthropic import Anthropic
23
+ from synkro.models.google import Google
24
+
25
+ # Union type for any model
26
+ Model = Union[OpenAI, Anthropic, Google, str]
27
+
28
+
29
+ def get_model_string(model: Model) -> str:
30
+ """Convert a model enum or string to its string value."""
31
+ if isinstance(model, Enum):
32
+ return model.value
33
+ return model
34
+
35
+
36
+ __all__ = [
37
+ "OpenAI",
38
+ "Anthropic",
39
+ "Google",
40
+ "Model",
41
+ "get_model_string",
42
+ ]
43
+
@@ -0,0 +1,26 @@
1
+ """Anthropic Claude models."""
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class Anthropic(str, Enum):
7
+ """Anthropic Claude models."""
8
+
9
+ # Claude 4.5 (latest)
10
+ CLAUDE_45_OPUS = "claude-opus-4-5-20250601"
11
+ """Premium: State-of-the-art for coding and autonomous agents"""
12
+
13
+ CLAUDE_45_SONNET = "claude-sonnet-4-5-20250601"
14
+ """Standard: Default model for most users, faster and more context-aware"""
15
+
16
+ CLAUDE_45_HAIKU = "claude-haiku-4-5-20250601"
17
+ """Light: High-speed, cost-effective, matches Claude 3 Opus intelligence"""
18
+
19
+ # Claude 4 (previous gen)
20
+ CLAUDE_4_SONNET = "claude-sonnet-4-20250514"
21
+ CLAUDE_4_OPUS = "claude-opus-4-20250514"
22
+
23
+ # Claude 3.5 (legacy)
24
+ CLAUDE_35_SONNET = "claude-3-5-sonnet-20241022"
25
+ CLAUDE_35_HAIKU = "claude-3-5-haiku-20241022"
26
+
@@ -0,0 +1,19 @@
1
+ """Google Gemini models.
2
+
3
+ Updated based on: https://ai.google.dev/gemini-api/docs/models#model-versions
4
+ """
5
+
6
+ from enum import Enum
7
+
8
+
9
+ class Google(str, Enum):
10
+ """Google Gemini models."""
11
+
12
+ GEMINI_3_PRO = "gemini/gemini-3-pro"
13
+ GEMINI_3_FLASH = "gemini/gemini-3-flash"
14
+
15
+ GEMINI_25_FLASH = "gemini/gemini-2.5-flash"
16
+ GEMINI_25_PRO = "gemini/gemini-2.5-pro"
17
+
18
+ GEMINI_2_FLASH = "gemini/gemini-2.0-flash"
19
+ GEMINI_2_FLASH_LITE = "gemini/gemini-2.0-flash-lite"
@@ -0,0 +1,31 @@
1
+ """OpenAI models."""
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class OpenAI(str, Enum):
7
+ """OpenAI models."""
8
+
9
+ # GPT-5 series (latest)
10
+ GPT_52 = "gpt-5.2"
11
+ """Flagship: High-speed, human-like dialogue, agentic tool-calling"""
12
+
13
+ GPT_5_MINI = "gpt-5-mini"
14
+ """Mid-tier: Balanced cost and intelligence, primary workhorse"""
15
+
16
+ GPT_5_NANO = "gpt-5-nano"
17
+ """Edge: Extremely low latency, high-volume basic tasks"""
18
+
19
+ # GPT-4 series (legacy)
20
+ GPT_41 = "gpt-4.1"
21
+ """Legacy flagship: Smartest non-reasoning model from previous gen"""
22
+
23
+ GPT_4O = "gpt-4o"
24
+ GPT_4O_MINI = "gpt-4o-mini"
25
+
26
+ # Reasoning models
27
+ O3 = "o3"
28
+ O3_MINI = "o3-mini"
29
+ O1 = "o1"
30
+ O1_MINI = "o1-mini"
31
+
@@ -0,0 +1,15 @@
1
+ """Mode configurations for different dataset types."""
2
+
3
+ from synkro.modes.config import ModeConfig, get_mode_config
4
+ from synkro.modes.qa import QA_CONFIG
5
+ from synkro.modes.sft import SFT_CONFIG
6
+ from synkro.modes.tool_call import TOOL_CALL_CONFIG
7
+
8
+ __all__ = [
9
+ "ModeConfig",
10
+ "get_mode_config",
11
+ "QA_CONFIG",
12
+ "SFT_CONFIG",
13
+ "TOOL_CALL_CONFIG",
14
+ ]
15
+
synkro/modes/config.py ADDED
@@ -0,0 +1,66 @@
1
+ """Mode configuration that bundles prompts, schema, and formatter per dataset type."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import TYPE_CHECKING, Type
5
+
6
+ if TYPE_CHECKING:
7
+ from synkro.types.dataset_type import DatasetType
8
+
9
+
10
+ @dataclass
11
+ class ModeConfig:
12
+ """
13
+ Configuration bundle for a dataset type.
14
+
15
+ Defines all the prompts, schemas, and formatters needed
16
+ for generating a specific type of dataset.
17
+ """
18
+
19
+ # Prompts
20
+ scenario_prompt: str
21
+ """Prompt for generating scenarios/questions"""
22
+
23
+ response_prompt: str
24
+ """Prompt for generating responses/answers"""
25
+
26
+ grade_prompt: str
27
+ """Prompt for grading quality"""
28
+
29
+ refine_prompt: str
30
+ """Prompt for refining failed responses"""
31
+
32
+ # Output configuration
33
+ output_description: str
34
+ """Human-readable description of output format"""
35
+
36
+
37
+ def get_mode_config(dataset_type: "DatasetType") -> ModeConfig:
38
+ """
39
+ Get the mode configuration for a dataset type.
40
+
41
+ Args:
42
+ dataset_type: The type of dataset to generate
43
+
44
+ Returns:
45
+ ModeConfig with appropriate prompts and settings
46
+
47
+ Example:
48
+ >>> from synkro import DatasetType
49
+ >>> config = get_mode_config(DatasetType.QA)
50
+ """
51
+ from synkro.types.dataset_type import DatasetType
52
+ from synkro.modes.qa import QA_CONFIG
53
+ from synkro.modes.sft import SFT_CONFIG
54
+ from synkro.modes.tool_call import TOOL_CALL_CONFIG
55
+
56
+ configs = {
57
+ DatasetType.QA: QA_CONFIG,
58
+ DatasetType.SFT: SFT_CONFIG,
59
+ DatasetType.TOOL_CALL: TOOL_CALL_CONFIG,
60
+ }
61
+
62
+ if dataset_type not in configs:
63
+ raise ValueError(f"Unknown dataset type: {dataset_type}")
64
+
65
+ return configs[dataset_type]
66
+
synkro/modes/qa.py ADDED
@@ -0,0 +1,18 @@
1
+ """QA mode configuration."""
2
+
3
+ from synkro.modes.config import ModeConfig
4
+ from synkro.prompts.qa_templates import (
5
+ QA_SCENARIO_PROMPT,
6
+ QA_RESPONSE_PROMPT,
7
+ QA_GRADE_PROMPT,
8
+ QA_REFINE_PROMPT,
9
+ )
10
+
11
+ QA_CONFIG = ModeConfig(
12
+ scenario_prompt=QA_SCENARIO_PROMPT,
13
+ response_prompt=QA_RESPONSE_PROMPT,
14
+ grade_prompt=QA_GRADE_PROMPT,
15
+ refine_prompt=QA_REFINE_PROMPT,
16
+ output_description="Question-Answer pairs: {question, answer, context}",
17
+ )
18
+
synkro/modes/sft.py ADDED
@@ -0,0 +1,18 @@
1
+ """SFT mode configuration."""
2
+
3
+ from synkro.modes.config import ModeConfig
4
+ from synkro.prompts.templates import (
5
+ SCENARIO_GENERATOR_PROMPT,
6
+ SINGLE_RESPONSE_PROMPT,
7
+ SINGLE_GRADE_PROMPT,
8
+ BATCHED_REFINER_PROMPT,
9
+ )
10
+
11
+ SFT_CONFIG = ModeConfig(
12
+ scenario_prompt=SCENARIO_GENERATOR_PROMPT,
13
+ response_prompt=SINGLE_RESPONSE_PROMPT,
14
+ grade_prompt=SINGLE_GRADE_PROMPT,
15
+ refine_prompt=BATCHED_REFINER_PROMPT,
16
+ output_description="Chat messages: {messages: [system, user, assistant]}",
17
+ )
18
+
@@ -0,0 +1,18 @@
1
+ """Tool Call mode configuration."""
2
+
3
+ from synkro.modes.config import ModeConfig
4
+ from synkro.prompts.tool_templates import (
5
+ TOOL_SCENARIO_PROMPT,
6
+ TOOL_RESPONSE_PROMPT,
7
+ TOOL_GRADE_PROMPT,
8
+ TOOL_REFINE_PROMPT,
9
+ )
10
+
11
+ TOOL_CALL_CONFIG = ModeConfig(
12
+ scenario_prompt=TOOL_SCENARIO_PROMPT,
13
+ response_prompt=TOOL_RESPONSE_PROMPT,
14
+ grade_prompt=TOOL_GRADE_PROMPT,
15
+ refine_prompt=TOOL_REFINE_PROMPT,
16
+ output_description="Tool calling: {messages: [system, user, {tool_calls}, {tool}, assistant]}",
17
+ )
18
+