synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +13 -13
- synth_ai/cli/__init__.py +6 -15
- synth_ai/cli/commands/eval/__init__.py +6 -15
- synth_ai/cli/commands/eval/config.py +338 -0
- synth_ai/cli/commands/eval/core.py +236 -1091
- synth_ai/cli/commands/eval/runner.py +704 -0
- synth_ai/cli/commands/eval/validation.py +44 -117
- synth_ai/cli/commands/filter/core.py +7 -7
- synth_ai/cli/commands/filter/validation.py +2 -2
- synth_ai/cli/commands/smoke/core.py +7 -17
- synth_ai/cli/commands/status/__init__.py +1 -64
- synth_ai/cli/commands/status/client.py +50 -151
- synth_ai/cli/commands/status/config.py +3 -83
- synth_ai/cli/commands/status/errors.py +4 -13
- synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
- synth_ai/cli/commands/status/subcommands/config.py +13 -0
- synth_ai/cli/commands/status/subcommands/files.py +18 -63
- synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
- synth_ai/cli/commands/status/subcommands/models.py +18 -62
- synth_ai/cli/commands/status/subcommands/runs.py +16 -63
- synth_ai/cli/commands/status/subcommands/session.py +67 -172
- synth_ai/cli/commands/status/subcommands/summary.py +24 -32
- synth_ai/cli/commands/status/subcommands/utils.py +41 -0
- synth_ai/cli/commands/status/utils.py +16 -107
- synth_ai/cli/commands/train/__init__.py +18 -20
- synth_ai/cli/commands/train/errors.py +3 -3
- synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
- synth_ai/cli/commands/train/validation.py +7 -7
- synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
- synth_ai/cli/commands/train/verifier_validation.py +235 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
- synth_ai/cli/lib/apps/task_app.py +12 -13
- synth_ai/cli/lib/task_app_discovery.py +6 -6
- synth_ai/cli/lib/train_cfgs.py +10 -10
- synth_ai/cli/task_apps/__init__.py +11 -0
- synth_ai/cli/task_apps/commands.py +7 -15
- synth_ai/core/env.py +12 -1
- synth_ai/core/errors.py +1 -2
- synth_ai/core/integrations/cloudflare.py +209 -33
- synth_ai/core/tracing_v3/abstractions.py +46 -0
- synth_ai/data/__init__.py +3 -30
- synth_ai/data/enums.py +1 -20
- synth_ai/data/rewards.py +100 -3
- synth_ai/products/graph_evolve/__init__.py +1 -2
- synth_ai/products/graph_evolve/config.py +16 -16
- synth_ai/products/graph_evolve/converters/__init__.py +3 -3
- synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
- synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
- synth_ai/products/graph_gepa/__init__.py +23 -0
- synth_ai/products/graph_gepa/converters/__init__.py +19 -0
- synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
- synth_ai/sdk/__init__.py +45 -35
- synth_ai/sdk/api/eval/__init__.py +33 -0
- synth_ai/sdk/api/eval/job.py +732 -0
- synth_ai/sdk/api/research_agent/__init__.py +276 -66
- synth_ai/sdk/api/train/builders.py +181 -0
- synth_ai/sdk/api/train/cli.py +41 -33
- synth_ai/sdk/api/train/configs/__init__.py +6 -4
- synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
- synth_ai/sdk/api/train/configs/rl.py +264 -16
- synth_ai/sdk/api/train/configs/sft.py +165 -1
- synth_ai/sdk/api/train/graph_validators.py +12 -12
- synth_ai/sdk/api/train/graphgen.py +169 -51
- synth_ai/sdk/api/train/graphgen_models.py +95 -45
- synth_ai/sdk/api/train/local_api.py +10 -0
- synth_ai/sdk/api/train/pollers.py +36 -0
- synth_ai/sdk/api/train/prompt_learning.py +390 -60
- synth_ai/sdk/api/train/rl.py +41 -5
- synth_ai/sdk/api/train/sft.py +2 -0
- synth_ai/sdk/api/train/task_app.py +20 -0
- synth_ai/sdk/api/train/validators.py +17 -17
- synth_ai/sdk/graphs/completions.py +239 -33
- synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
- synth_ai/sdk/learning/__init__.py +35 -5
- synth_ai/sdk/learning/context_learning_client.py +531 -0
- synth_ai/sdk/learning/context_learning_types.py +294 -0
- synth_ai/sdk/learning/prompt_learning_client.py +1 -1
- synth_ai/sdk/learning/prompt_learning_types.py +2 -1
- synth_ai/sdk/learning/rl/__init__.py +0 -4
- synth_ai/sdk/learning/rl/contracts.py +0 -4
- synth_ai/sdk/localapi/__init__.py +40 -0
- synth_ai/sdk/localapi/apps/__init__.py +28 -0
- synth_ai/sdk/localapi/client.py +10 -0
- synth_ai/sdk/localapi/contracts.py +10 -0
- synth_ai/sdk/localapi/helpers.py +519 -0
- synth_ai/sdk/localapi/rollouts.py +93 -0
- synth_ai/sdk/localapi/server.py +29 -0
- synth_ai/sdk/localapi/template.py +49 -0
- synth_ai/sdk/streaming/handlers.py +6 -6
- synth_ai/sdk/streaming/streamer.py +10 -6
- synth_ai/sdk/task/__init__.py +18 -5
- synth_ai/sdk/task/apps/__init__.py +37 -1
- synth_ai/sdk/task/client.py +9 -1
- synth_ai/sdk/task/config.py +6 -11
- synth_ai/sdk/task/contracts.py +137 -95
- synth_ai/sdk/task/in_process.py +32 -22
- synth_ai/sdk/task/in_process_runner.py +9 -4
- synth_ai/sdk/task/rubrics/__init__.py +2 -3
- synth_ai/sdk/task/rubrics/loaders.py +4 -4
- synth_ai/sdk/task/rubrics/strict.py +3 -4
- synth_ai/sdk/task/server.py +76 -16
- synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
- synth_ai/sdk/task/validators.py +34 -49
- synth_ai/sdk/training/__init__.py +7 -16
- synth_ai/sdk/tunnels/__init__.py +118 -0
- synth_ai/sdk/tunnels/cleanup.py +83 -0
- synth_ai/sdk/tunnels/ports.py +120 -0
- synth_ai/sdk/tunnels/tunneled_api.py +363 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
- synth_ai/cli/commands/baseline/__init__.py +0 -12
- synth_ai/cli/commands/baseline/core.py +0 -636
- synth_ai/cli/commands/baseline/list.py +0 -94
- synth_ai/cli/commands/eval/errors.py +0 -81
- synth_ai/cli/commands/status/formatters.py +0 -164
- synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
- synth_ai/cli/commands/status/subcommands/usage.py +0 -203
- synth_ai/cli/commands/train/judge_validation.py +0 -305
- synth_ai/cli/usage.py +0 -159
- synth_ai/data/specs.py +0 -36
- synth_ai/sdk/api/research_agent/cli.py +0 -428
- synth_ai/sdk/api/research_agent/config.py +0 -357
- synth_ai/sdk/api/research_agent/job.py +0 -717
- synth_ai/sdk/baseline/__init__.py +0 -25
- synth_ai/sdk/baseline/config.py +0 -209
- synth_ai/sdk/baseline/discovery.py +0 -216
- synth_ai/sdk/baseline/execution.py +0 -154
- synth_ai/sdk/judging/__init__.py +0 -15
- synth_ai/sdk/judging/base.py +0 -24
- synth_ai/sdk/judging/client.py +0 -191
- synth_ai/sdk/judging/types.py +0 -42
- synth_ai/sdk/research_agent/__init__.py +0 -34
- synth_ai/sdk/research_agent/container_builder.py +0 -328
- synth_ai/sdk/research_agent/container_spec.py +0 -198
- synth_ai/sdk/research_agent/defaults.py +0 -34
- synth_ai/sdk/research_agent/results_collector.py +0 -69
- synth_ai/sdk/specs/__init__.py +0 -46
- synth_ai/sdk/specs/dataclasses.py +0 -149
- synth_ai/sdk/specs/loader.py +0 -144
- synth_ai/sdk/specs/serializer.py +0 -199
- synth_ai/sdk/specs/validation.py +0 -250
- synth_ai/sdk/tracing/__init__.py +0 -39
- synth_ai/sdk/usage/__init__.py +0 -37
- synth_ai/sdk/usage/client.py +0 -171
- synth_ai/sdk/usage/models.py +0 -261
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0
|
@@ -1,717 +0,0 @@
|
|
|
1
|
-
"""Research Agent Job SDK.
|
|
2
|
-
|
|
3
|
-
Provides high-level abstractions for running research agent jobs via the Synth API.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from __future__ import annotations
|
|
7
|
-
|
|
8
|
-
import os
|
|
9
|
-
import time
|
|
10
|
-
from dataclasses import dataclass, field
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from typing import Any, Callable, Dict, Iterator, List, Literal, Optional
|
|
13
|
-
|
|
14
|
-
import httpx
|
|
15
|
-
|
|
16
|
-
from .config import (
|
|
17
|
-
DatasetSource,
|
|
18
|
-
GEPAConfig,
|
|
19
|
-
MIPROConfig,
|
|
20
|
-
ModelProvider,
|
|
21
|
-
OptimizationTool,
|
|
22
|
-
PermittedModel,
|
|
23
|
-
PermittedModelsConfig,
|
|
24
|
-
ReasoningEffort,
|
|
25
|
-
ResearchConfig,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
# Backend type
|
|
29
|
-
BackendType = Literal["daytona", "modal", "docker"]
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
@dataclass
|
|
33
|
-
class ResearchAgentJobConfig:
|
|
34
|
-
"""Configuration for a research agent job.
|
|
35
|
-
|
|
36
|
-
Example:
|
|
37
|
-
>>> config = ResearchAgentJobConfig(
|
|
38
|
-
... research=ResearchConfig(
|
|
39
|
-
... task_description="Optimize prompt for banking classification",
|
|
40
|
-
... tools=[OptimizationTool.MIPRO],
|
|
41
|
-
... datasets=[DatasetSource(source_type="huggingface", hf_repo_id="PolyAI/banking77")],
|
|
42
|
-
... ),
|
|
43
|
-
... repo_url="https://github.com/my-org/my-pipeline",
|
|
44
|
-
... model="gpt-5.1-codex-mini",
|
|
45
|
-
... max_agent_spend_usd=25.0,
|
|
46
|
-
... )
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
# Research config (typed)
|
|
50
|
-
research: ResearchConfig
|
|
51
|
-
|
|
52
|
-
# Repository (optional if inline_files provided)
|
|
53
|
-
repo_url: str = ""
|
|
54
|
-
repo_branch: str = "main"
|
|
55
|
-
repo_commit: Optional[str] = None
|
|
56
|
-
|
|
57
|
-
# Inline files - alternative to repo_url
|
|
58
|
-
# Dict of filepath -> content (e.g., {"pipeline.py": "...", "eval.py": "..."})
|
|
59
|
-
inline_files: Optional[Dict[str, str]] = None
|
|
60
|
-
|
|
61
|
-
# Execution
|
|
62
|
-
backend: BackendType = "daytona"
|
|
63
|
-
model: str = "gpt-4o"
|
|
64
|
-
use_synth_proxy: bool = True
|
|
65
|
-
|
|
66
|
-
# Spend limits
|
|
67
|
-
max_agent_spend_usd: float = 10.0
|
|
68
|
-
"""Maximum spend in USD for agent inference and sandbox time. Default: $10."""
|
|
69
|
-
|
|
70
|
-
max_synth_spend_usd: float = 100.0
|
|
71
|
-
"""Maximum spend in USD for Synth API calls (experiments, evals). Default: $100."""
|
|
72
|
-
|
|
73
|
-
# Reasoning effort (for models that support it)
|
|
74
|
-
reasoning_effort: Optional[ReasoningEffort] = None
|
|
75
|
-
"""Reasoning effort level: low, medium, high. Only for supported models (o1, o3, gpt-5 family, synth-*)."""
|
|
76
|
-
|
|
77
|
-
# API configuration
|
|
78
|
-
backend_url: str = ""
|
|
79
|
-
api_key: str = ""
|
|
80
|
-
|
|
81
|
-
# Metadata
|
|
82
|
-
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
83
|
-
|
|
84
|
-
def __post_init__(self) -> None:
|
|
85
|
-
"""Validate and resolve defaults."""
|
|
86
|
-
if not self.backend_url:
|
|
87
|
-
self.backend_url = os.environ.get(
|
|
88
|
-
"SYNTH_BACKEND_URL", "https://api.usesynth.ai"
|
|
89
|
-
)
|
|
90
|
-
if not self.api_key:
|
|
91
|
-
self.api_key = os.environ.get("SYNTH_API_KEY", "")
|
|
92
|
-
if not self.api_key:
|
|
93
|
-
raise ValueError(
|
|
94
|
-
"api_key is required (provide explicitly or set SYNTH_API_KEY env var)"
|
|
95
|
-
)
|
|
96
|
-
if not self.repo_url and not self.inline_files:
|
|
97
|
-
raise ValueError("Either repo_url or inline_files must be provided")
|
|
98
|
-
|
|
99
|
-
@classmethod
|
|
100
|
-
def from_toml(cls, config_path: str | Path) -> ResearchAgentJobConfig:
|
|
101
|
-
"""Load configuration from a TOML file.
|
|
102
|
-
|
|
103
|
-
Expected TOML structure:
|
|
104
|
-
[research_agent]
|
|
105
|
-
repo_url = "https://github.com/your-org/repo"
|
|
106
|
-
repo_branch = "main"
|
|
107
|
-
backend = "daytona"
|
|
108
|
-
model = "gpt-5.1-codex-mini"
|
|
109
|
-
max_agent_spend_usd = 25.0
|
|
110
|
-
max_synth_spend_usd = 150.0
|
|
111
|
-
reasoning_effort = "medium"
|
|
112
|
-
|
|
113
|
-
[research_agent.research]
|
|
114
|
-
task_description = "Optimize prompt for accuracy"
|
|
115
|
-
tools = ["mipro"]
|
|
116
|
-
primary_metric = "accuracy"
|
|
117
|
-
num_iterations = 10
|
|
118
|
-
|
|
119
|
-
[[research_agent.research.datasets]]
|
|
120
|
-
source_type = "huggingface"
|
|
121
|
-
hf_repo_id = "PolyAI/banking77"
|
|
122
|
-
|
|
123
|
-
[research_agent.research.mipro_config]
|
|
124
|
-
meta_model = "llama-3.3-70b-versatile"
|
|
125
|
-
num_trials = 15
|
|
126
|
-
"""
|
|
127
|
-
import tomllib
|
|
128
|
-
|
|
129
|
-
path = Path(config_path)
|
|
130
|
-
if not path.exists():
|
|
131
|
-
raise FileNotFoundError(f"Config file not found: {path}")
|
|
132
|
-
|
|
133
|
-
with open(path, "rb") as f:
|
|
134
|
-
data = tomllib.load(f)
|
|
135
|
-
|
|
136
|
-
ra_config = data.get("research_agent", {})
|
|
137
|
-
if not ra_config:
|
|
138
|
-
raise ValueError("Config must have [research_agent] section")
|
|
139
|
-
|
|
140
|
-
# Parse research config
|
|
141
|
-
research_data = ra_config.get("research", {})
|
|
142
|
-
if not research_data:
|
|
143
|
-
raise ValueError("research_agent.research config is required")
|
|
144
|
-
|
|
145
|
-
research = _parse_research_config(research_data)
|
|
146
|
-
|
|
147
|
-
return cls(
|
|
148
|
-
research=research,
|
|
149
|
-
repo_url=ra_config.get("repo_url", ""),
|
|
150
|
-
repo_branch=ra_config.get("repo_branch", "main"),
|
|
151
|
-
repo_commit=ra_config.get("repo_commit"),
|
|
152
|
-
inline_files=ra_config.get("inline_files"),
|
|
153
|
-
backend=ra_config.get("backend", "daytona"),
|
|
154
|
-
model=ra_config.get("model", "gpt-4o"),
|
|
155
|
-
use_synth_proxy=ra_config.get("use_synth_proxy", True),
|
|
156
|
-
max_agent_spend_usd=ra_config.get("max_agent_spend_usd", 10.0),
|
|
157
|
-
max_synth_spend_usd=ra_config.get("max_synth_spend_usd", 100.0),
|
|
158
|
-
reasoning_effort=ra_config.get("reasoning_effort"),
|
|
159
|
-
backend_url=ra_config.get("backend_url", ""),
|
|
160
|
-
api_key=ra_config.get("api_key", ""),
|
|
161
|
-
metadata=ra_config.get("metadata", {}),
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
def _parse_research_config(data: Dict[str, Any]) -> ResearchConfig:
|
|
166
|
-
"""Parse ResearchConfig from dict (e.g., from TOML)."""
|
|
167
|
-
# Parse tools
|
|
168
|
-
tools_raw = data.get("tools", ["mipro"])
|
|
169
|
-
tools = [
|
|
170
|
-
OptimizationTool(t) if isinstance(t, str) else t
|
|
171
|
-
for t in tools_raw
|
|
172
|
-
]
|
|
173
|
-
|
|
174
|
-
# Parse datasets
|
|
175
|
-
datasets_raw = data.get("datasets", [])
|
|
176
|
-
datasets = [_parse_dataset_source(d) for d in datasets_raw]
|
|
177
|
-
|
|
178
|
-
# Parse permitted_models
|
|
179
|
-
permitted_models = None
|
|
180
|
-
if "permitted_models" in data:
|
|
181
|
-
permitted_models = _parse_permitted_models(data["permitted_models"])
|
|
182
|
-
|
|
183
|
-
# Parse GEPA config
|
|
184
|
-
gepa_config = None
|
|
185
|
-
if "gepa_config" in data:
|
|
186
|
-
gepa_config = _parse_gepa_config(data["gepa_config"])
|
|
187
|
-
|
|
188
|
-
# Parse MIPRO config
|
|
189
|
-
mipro_config = None
|
|
190
|
-
if "mipro_config" in data:
|
|
191
|
-
mipro_config = _parse_mipro_config(data["mipro_config"])
|
|
192
|
-
|
|
193
|
-
return ResearchConfig(
|
|
194
|
-
task_description=data.get("task_description", ""),
|
|
195
|
-
tools=tools,
|
|
196
|
-
datasets=datasets,
|
|
197
|
-
primary_metric=data.get("primary_metric", "accuracy"),
|
|
198
|
-
secondary_metrics=data.get("secondary_metrics", []),
|
|
199
|
-
num_iterations=data.get("num_iterations", 10),
|
|
200
|
-
population_size=data.get("population_size", 20),
|
|
201
|
-
timeout_minutes=data.get("timeout_minutes", 60),
|
|
202
|
-
max_eval_samples=data.get("max_eval_samples"),
|
|
203
|
-
permitted_models=permitted_models,
|
|
204
|
-
gepa_config=gepa_config,
|
|
205
|
-
mipro_config=mipro_config,
|
|
206
|
-
initial_prompt=data.get("initial_prompt"),
|
|
207
|
-
pipeline_entrypoint=data.get("pipeline_entrypoint"),
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
def _parse_dataset_source(data: Dict[str, Any]) -> DatasetSource:
|
|
212
|
-
"""Parse DatasetSource from dict."""
|
|
213
|
-
return DatasetSource(
|
|
214
|
-
source_type=data["source_type"],
|
|
215
|
-
description=data.get("description"),
|
|
216
|
-
hf_repo_id=data.get("hf_repo_id"),
|
|
217
|
-
hf_split=data.get("hf_split", "train"),
|
|
218
|
-
hf_subset=data.get("hf_subset"),
|
|
219
|
-
file_ids=data.get("file_ids"),
|
|
220
|
-
inline_data=data.get("inline_data"),
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def _parse_permitted_models(data: Dict[str, Any]) -> PermittedModelsConfig:
|
|
225
|
-
"""Parse PermittedModelsConfig from dict."""
|
|
226
|
-
models_raw = data.get("models", [])
|
|
227
|
-
models = [
|
|
228
|
-
PermittedModel(
|
|
229
|
-
model=m["model"],
|
|
230
|
-
provider=ModelProvider(m["provider"]) if isinstance(m["provider"], str) else m["provider"],
|
|
231
|
-
)
|
|
232
|
-
for m in models_raw
|
|
233
|
-
]
|
|
234
|
-
return PermittedModelsConfig(
|
|
235
|
-
models=models,
|
|
236
|
-
default_temperature=data.get("default_temperature", 0.7),
|
|
237
|
-
default_max_tokens=data.get("default_max_tokens", 4096),
|
|
238
|
-
)
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def _parse_gepa_config(data: Dict[str, Any]) -> GEPAConfig:
|
|
242
|
-
"""Parse GEPAConfig from dict."""
|
|
243
|
-
mutation_provider = data.get("mutation_provider", "groq")
|
|
244
|
-
if isinstance(mutation_provider, str):
|
|
245
|
-
mutation_provider = ModelProvider(mutation_provider)
|
|
246
|
-
|
|
247
|
-
return GEPAConfig(
|
|
248
|
-
mutation_model=data.get("mutation_model", "openai/gpt-oss-120b"),
|
|
249
|
-
mutation_provider=mutation_provider,
|
|
250
|
-
mutation_temperature=data.get("mutation_temperature", 0.7),
|
|
251
|
-
mutation_max_tokens=data.get("mutation_max_tokens", 8192),
|
|
252
|
-
population_size=data.get("population_size", 20),
|
|
253
|
-
num_generations=data.get("num_generations", 10),
|
|
254
|
-
elite_fraction=data.get("elite_fraction", 0.2),
|
|
255
|
-
proposer_type=data.get("proposer_type", "dspy"),
|
|
256
|
-
proposer_effort=data.get("proposer_effort", "MEDIUM"),
|
|
257
|
-
proposer_output_tokens=data.get("proposer_output_tokens", "FAST"),
|
|
258
|
-
spec_path=data.get("spec_path"),
|
|
259
|
-
train_size=data.get("train_size"),
|
|
260
|
-
val_size=data.get("val_size"),
|
|
261
|
-
reference_size=data.get("reference_size"),
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
def _parse_mipro_config(data: Dict[str, Any]) -> MIPROConfig:
|
|
266
|
-
"""Parse MIPROConfig from dict."""
|
|
267
|
-
meta_provider = data.get("meta_provider", "groq")
|
|
268
|
-
if isinstance(meta_provider, str):
|
|
269
|
-
meta_provider = ModelProvider(meta_provider)
|
|
270
|
-
|
|
271
|
-
return MIPROConfig(
|
|
272
|
-
meta_model=data.get("meta_model", "llama-3.3-70b-versatile"),
|
|
273
|
-
meta_provider=meta_provider,
|
|
274
|
-
meta_temperature=data.get("meta_temperature", 0.7),
|
|
275
|
-
meta_max_tokens=data.get("meta_max_tokens", 4096),
|
|
276
|
-
num_candidates=data.get("num_candidates", 20),
|
|
277
|
-
num_trials=data.get("num_trials", 10),
|
|
278
|
-
proposer_effort=data.get("proposer_effort", "MEDIUM"),
|
|
279
|
-
proposer_output_tokens=data.get("proposer_output_tokens", "FAST"),
|
|
280
|
-
train_size=data.get("train_size"),
|
|
281
|
-
val_size=data.get("val_size"),
|
|
282
|
-
reference_size=data.get("reference_size"),
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
@dataclass
|
|
287
|
-
class PollOutcome:
|
|
288
|
-
"""Result of polling a job."""
|
|
289
|
-
|
|
290
|
-
status: str
|
|
291
|
-
data: Dict[str, Any]
|
|
292
|
-
is_terminal: bool = False
|
|
293
|
-
error: Optional[str] = None
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
class ResearchAgentJobPoller:
|
|
297
|
-
"""Poller for research agent jobs."""
|
|
298
|
-
|
|
299
|
-
def __init__(self, backend_url: str, api_key: str) -> None:
|
|
300
|
-
self.backend_url = backend_url.rstrip("/")
|
|
301
|
-
self.api_key = api_key
|
|
302
|
-
|
|
303
|
-
def poll(self, job_id: str) -> PollOutcome:
|
|
304
|
-
"""Poll job status."""
|
|
305
|
-
url = f"{self.backend_url}/api/research-agent/jobs/{job_id}"
|
|
306
|
-
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
307
|
-
|
|
308
|
-
try:
|
|
309
|
-
response = httpx.get(url, headers=headers, timeout=30.0)
|
|
310
|
-
response.raise_for_status()
|
|
311
|
-
data = response.json()
|
|
312
|
-
|
|
313
|
-
status = data.get("status", "unknown")
|
|
314
|
-
is_terminal = status in ("succeeded", "failed", "canceled")
|
|
315
|
-
|
|
316
|
-
return PollOutcome(
|
|
317
|
-
status=status,
|
|
318
|
-
data=data,
|
|
319
|
-
is_terminal=is_terminal,
|
|
320
|
-
error=data.get("error"),
|
|
321
|
-
)
|
|
322
|
-
except httpx.HTTPStatusError as e:
|
|
323
|
-
return PollOutcome(
|
|
324
|
-
status="error",
|
|
325
|
-
data={},
|
|
326
|
-
is_terminal=False,
|
|
327
|
-
error=f"HTTP {e.response.status_code}: {e.response.text[:200]}",
|
|
328
|
-
)
|
|
329
|
-
except Exception as e:
|
|
330
|
-
return PollOutcome(
|
|
331
|
-
status="error",
|
|
332
|
-
data={},
|
|
333
|
-
is_terminal=False,
|
|
334
|
-
error=str(e),
|
|
335
|
-
)
|
|
336
|
-
|
|
337
|
-
def stream_events(
|
|
338
|
-
self, job_id: str, since_seq: int = 0
|
|
339
|
-
) -> Iterator[Dict[str, Any]]:
|
|
340
|
-
"""Stream events from a job."""
|
|
341
|
-
url = f"{self.backend_url}/api/research-agent/jobs/{job_id}/events"
|
|
342
|
-
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
343
|
-
params = {"since_seq": since_seq}
|
|
344
|
-
|
|
345
|
-
try:
|
|
346
|
-
response = httpx.get(url, headers=headers, params=params, timeout=30.0)
|
|
347
|
-
response.raise_for_status()
|
|
348
|
-
events = response.json()
|
|
349
|
-
yield from events
|
|
350
|
-
except Exception:
|
|
351
|
-
pass
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
class ResearchAgentJob:
|
|
355
|
-
"""High-level SDK class for running research agent jobs.
|
|
356
|
-
|
|
357
|
-
Research agent jobs use AI to optimize prompts/pipelines using MIPRO or GEPA algorithms.
|
|
358
|
-
|
|
359
|
-
Example:
|
|
360
|
-
>>> from synth_ai.sdk.api.research_agent import (
|
|
361
|
-
... ResearchAgentJob,
|
|
362
|
-
... ResearchAgentJobConfig,
|
|
363
|
-
... ResearchConfig,
|
|
364
|
-
... DatasetSource,
|
|
365
|
-
... OptimizationTool,
|
|
366
|
-
... )
|
|
367
|
-
>>>
|
|
368
|
-
>>> # Create typed config
|
|
369
|
-
>>> research_config = ResearchConfig(
|
|
370
|
-
... task_description="Optimize prompt for banking classification",
|
|
371
|
-
... tools=[OptimizationTool.MIPRO],
|
|
372
|
-
... datasets=[
|
|
373
|
-
... DatasetSource(
|
|
374
|
-
... source_type="huggingface",
|
|
375
|
-
... hf_repo_id="PolyAI/banking77",
|
|
376
|
-
... )
|
|
377
|
-
... ],
|
|
378
|
-
... )
|
|
379
|
-
>>>
|
|
380
|
-
>>> job_config = ResearchAgentJobConfig(
|
|
381
|
-
... research=research_config,
|
|
382
|
-
... repo_url="https://github.com/my-org/my-pipeline",
|
|
383
|
-
... model="gpt-5.1-codex-mini",
|
|
384
|
-
... max_agent_spend_usd=25.0,
|
|
385
|
-
... )
|
|
386
|
-
>>>
|
|
387
|
-
>>> job = ResearchAgentJob(config=job_config)
|
|
388
|
-
>>> job_id = job.submit()
|
|
389
|
-
>>> result = job.poll_until_complete()
|
|
390
|
-
"""
|
|
391
|
-
|
|
392
|
-
def __init__(
|
|
393
|
-
self,
|
|
394
|
-
config: ResearchAgentJobConfig,
|
|
395
|
-
job_id: Optional[str] = None,
|
|
396
|
-
) -> None:
|
|
397
|
-
"""Initialize a research agent job.
|
|
398
|
-
|
|
399
|
-
Args:
|
|
400
|
-
config: Job configuration
|
|
401
|
-
job_id: Existing job ID (if resuming)
|
|
402
|
-
"""
|
|
403
|
-
self.config = config
|
|
404
|
-
self._job_id = job_id
|
|
405
|
-
self._poller = ResearchAgentJobPoller(config.backend_url, config.api_key)
|
|
406
|
-
|
|
407
|
-
@classmethod
|
|
408
|
-
def from_config(
|
|
409
|
-
cls,
|
|
410
|
-
config_path: str | Path,
|
|
411
|
-
backend_url: Optional[str] = None,
|
|
412
|
-
api_key: Optional[str] = None,
|
|
413
|
-
) -> ResearchAgentJob:
|
|
414
|
-
"""Create a research agent job from a TOML config file.
|
|
415
|
-
|
|
416
|
-
Args:
|
|
417
|
-
config_path: Path to TOML config file
|
|
418
|
-
backend_url: Override backend URL (defaults to env or production)
|
|
419
|
-
api_key: Override API key (defaults to SYNTH_API_KEY env var)
|
|
420
|
-
|
|
421
|
-
Returns:
|
|
422
|
-
ResearchAgentJob instance configured from the file
|
|
423
|
-
|
|
424
|
-
Raises:
|
|
425
|
-
FileNotFoundError: If config file doesn't exist
|
|
426
|
-
ValueError: If config is invalid or missing required fields
|
|
427
|
-
"""
|
|
428
|
-
config = ResearchAgentJobConfig.from_toml(config_path)
|
|
429
|
-
|
|
430
|
-
if backend_url:
|
|
431
|
-
config.backend_url = backend_url
|
|
432
|
-
if api_key:
|
|
433
|
-
config.api_key = api_key
|
|
434
|
-
|
|
435
|
-
return cls(config=config)
|
|
436
|
-
|
|
437
|
-
@classmethod
|
|
438
|
-
def from_id(
|
|
439
|
-
cls,
|
|
440
|
-
job_id: str,
|
|
441
|
-
backend_url: Optional[str] = None,
|
|
442
|
-
api_key: Optional[str] = None,
|
|
443
|
-
) -> ResearchAgentJob:
|
|
444
|
-
"""Resume a job by ID.
|
|
445
|
-
|
|
446
|
-
Args:
|
|
447
|
-
job_id: Existing job ID
|
|
448
|
-
backend_url: Backend URL (defaults to env)
|
|
449
|
-
api_key: API key (defaults to env)
|
|
450
|
-
|
|
451
|
-
Returns:
|
|
452
|
-
ResearchAgentJob instance
|
|
453
|
-
"""
|
|
454
|
-
# Create minimal config for polling
|
|
455
|
-
# Use a placeholder ResearchConfig since we're just polling
|
|
456
|
-
research = ResearchConfig(task_description="_placeholder")
|
|
457
|
-
config = ResearchAgentJobConfig(
|
|
458
|
-
research=research,
|
|
459
|
-
inline_files={"_placeholder": ""},
|
|
460
|
-
backend_url=backend_url or "",
|
|
461
|
-
api_key=api_key or "",
|
|
462
|
-
)
|
|
463
|
-
return cls(config=config, job_id=job_id)
|
|
464
|
-
|
|
465
|
-
@classmethod
|
|
466
|
-
def from_research_config(
|
|
467
|
-
cls,
|
|
468
|
-
research: ResearchConfig,
|
|
469
|
-
repo_url: str = "",
|
|
470
|
-
repo_branch: str = "main",
|
|
471
|
-
repo_commit: Optional[str] = None,
|
|
472
|
-
inline_files: Optional[Dict[str, str]] = None,
|
|
473
|
-
model: str = "gpt-4o",
|
|
474
|
-
backend: BackendType = "daytona",
|
|
475
|
-
max_agent_spend_usd: float = 10.0,
|
|
476
|
-
max_synth_spend_usd: float = 100.0,
|
|
477
|
-
reasoning_effort: Optional[ReasoningEffort] = None,
|
|
478
|
-
backend_url: Optional[str] = None,
|
|
479
|
-
api_key: Optional[str] = None,
|
|
480
|
-
use_synth_proxy: bool = True,
|
|
481
|
-
metadata: Optional[Dict[str, Any]] = None,
|
|
482
|
-
) -> ResearchAgentJob:
|
|
483
|
-
"""Create a job from a ResearchConfig.
|
|
484
|
-
|
|
485
|
-
This is a convenience method for creating jobs programmatically.
|
|
486
|
-
|
|
487
|
-
Args:
|
|
488
|
-
research: Research configuration
|
|
489
|
-
repo_url: Git repository URL
|
|
490
|
-
repo_branch: Branch to clone
|
|
491
|
-
repo_commit: Specific commit to checkout
|
|
492
|
-
inline_files: Files to include in workspace
|
|
493
|
-
model: Model for the agent to use
|
|
494
|
-
backend: Container backend (daytona, modal, docker)
|
|
495
|
-
max_agent_spend_usd: Max spend for agent inference
|
|
496
|
-
max_synth_spend_usd: Max spend for Synth API calls
|
|
497
|
-
reasoning_effort: Reasoning effort level (low, medium, high)
|
|
498
|
-
backend_url: Override backend URL
|
|
499
|
-
api_key: Override API key
|
|
500
|
-
use_synth_proxy: Route LLM calls through Synth proxy
|
|
501
|
-
metadata: Additional metadata
|
|
502
|
-
|
|
503
|
-
Returns:
|
|
504
|
-
ResearchAgentJob instance
|
|
505
|
-
"""
|
|
506
|
-
config = ResearchAgentJobConfig(
|
|
507
|
-
research=research,
|
|
508
|
-
repo_url=repo_url,
|
|
509
|
-
repo_branch=repo_branch,
|
|
510
|
-
repo_commit=repo_commit,
|
|
511
|
-
inline_files=inline_files,
|
|
512
|
-
backend=backend,
|
|
513
|
-
model=model,
|
|
514
|
-
use_synth_proxy=use_synth_proxy,
|
|
515
|
-
max_agent_spend_usd=max_agent_spend_usd,
|
|
516
|
-
max_synth_spend_usd=max_synth_spend_usd,
|
|
517
|
-
reasoning_effort=reasoning_effort,
|
|
518
|
-
backend_url=backend_url or "",
|
|
519
|
-
api_key=api_key or "",
|
|
520
|
-
metadata=metadata or {},
|
|
521
|
-
)
|
|
522
|
-
return cls(config=config)
|
|
523
|
-
|
|
524
|
-
@property
|
|
525
|
-
def job_id(self) -> Optional[str]:
|
|
526
|
-
"""Get the job ID."""
|
|
527
|
-
return self._job_id
|
|
528
|
-
|
|
529
|
-
def submit(self) -> str:
|
|
530
|
-
"""Submit the job to the backend.
|
|
531
|
-
|
|
532
|
-
Returns:
|
|
533
|
-
Job ID
|
|
534
|
-
|
|
535
|
-
Raises:
|
|
536
|
-
RuntimeError: If submission fails
|
|
537
|
-
NotImplementedError: If GEPA is requested (not yet supported)
|
|
538
|
-
"""
|
|
539
|
-
if self._job_id:
|
|
540
|
-
raise RuntimeError(f"Job already submitted: {self._job_id}")
|
|
541
|
-
|
|
542
|
-
# Check for GEPA - not yet fully supported
|
|
543
|
-
if OptimizationTool.GEPA in self.config.research.tools:
|
|
544
|
-
raise NotImplementedError(
|
|
545
|
-
"GEPA optimization is not yet fully supported in the Research Agent SDK. "
|
|
546
|
-
"Please use MIPRO for now. GEPA support is coming soon."
|
|
547
|
-
)
|
|
548
|
-
|
|
549
|
-
url = f"{self.config.backend_url.rstrip('/')}/api/research-agent/jobs"
|
|
550
|
-
headers = {
|
|
551
|
-
"Authorization": f"Bearer {self.config.api_key}",
|
|
552
|
-
"Content-Type": "application/json",
|
|
553
|
-
}
|
|
554
|
-
|
|
555
|
-
# Build request payload
|
|
556
|
-
payload: Dict[str, Any] = {
|
|
557
|
-
"algorithm": "research",
|
|
558
|
-
"backend": self.config.backend,
|
|
559
|
-
"model": self.config.model,
|
|
560
|
-
"use_synth_proxy": self.config.use_synth_proxy,
|
|
561
|
-
"max_agent_spend_usd": self.config.max_agent_spend_usd,
|
|
562
|
-
"max_synth_spend_usd": self.config.max_synth_spend_usd,
|
|
563
|
-
"metadata": self.config.metadata,
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
# Add reasoning_effort if set
|
|
567
|
-
if self.config.reasoning_effort:
|
|
568
|
-
payload["reasoning_effort"] = self.config.reasoning_effort
|
|
569
|
-
|
|
570
|
-
# Add repo_url if provided
|
|
571
|
-
if self.config.repo_url:
|
|
572
|
-
payload["repo_url"] = self.config.repo_url
|
|
573
|
-
payload["repo_branch"] = self.config.repo_branch
|
|
574
|
-
if self.config.repo_commit:
|
|
575
|
-
payload["repo_commit"] = self.config.repo_commit
|
|
576
|
-
|
|
577
|
-
# Add inline_files if provided
|
|
578
|
-
if self.config.inline_files:
|
|
579
|
-
payload["inline_files"] = self.config.inline_files
|
|
580
|
-
|
|
581
|
-
# Add research config
|
|
582
|
-
payload["research"] = self.config.research.to_dict()
|
|
583
|
-
|
|
584
|
-
try:
|
|
585
|
-
response = httpx.post(url, json=payload, headers=headers, timeout=60.0)
|
|
586
|
-
response.raise_for_status()
|
|
587
|
-
data = response.json()
|
|
588
|
-
self._job_id = data["job_id"]
|
|
589
|
-
return self._job_id
|
|
590
|
-
except httpx.HTTPStatusError as e:
|
|
591
|
-
raise RuntimeError(
|
|
592
|
-
f"Failed to submit job: HTTP {e.response.status_code} - {e.response.text[:500]}"
|
|
593
|
-
) from e
|
|
594
|
-
except Exception as e:
|
|
595
|
-
raise RuntimeError(f"Failed to submit job: {e}") from e
|
|
596
|
-
|
|
597
|
-
def get_status(self) -> Dict[str, Any]:
|
|
598
|
-
"""Get current job status.
|
|
599
|
-
|
|
600
|
-
Returns:
|
|
601
|
-
Status dict with keys: status, current_iteration, best_metric_value, etc.
|
|
602
|
-
|
|
603
|
-
Raises:
|
|
604
|
-
RuntimeError: If job not submitted
|
|
605
|
-
"""
|
|
606
|
-
if not self._job_id:
|
|
607
|
-
raise RuntimeError("Job not submitted yet")
|
|
608
|
-
|
|
609
|
-
outcome = self._poller.poll(self._job_id)
|
|
610
|
-
if outcome.error:
|
|
611
|
-
raise RuntimeError(f"Failed to get status: {outcome.error}")
|
|
612
|
-
return outcome.data
|
|
613
|
-
|
|
614
|
-
def get_events(self, since_seq: int = 0) -> List[Dict[str, Any]]:
|
|
615
|
-
"""Get job events.
|
|
616
|
-
|
|
617
|
-
Args:
|
|
618
|
-
since_seq: Return events after this sequence number
|
|
619
|
-
|
|
620
|
-
Returns:
|
|
621
|
-
List of event dicts
|
|
622
|
-
"""
|
|
623
|
-
if not self._job_id:
|
|
624
|
-
raise RuntimeError("Job not submitted yet")
|
|
625
|
-
|
|
626
|
-
return list(self._poller.stream_events(self._job_id, since_seq))
|
|
627
|
-
|
|
628
|
-
def poll_until_complete(
|
|
629
|
-
self,
|
|
630
|
-
timeout: float = 3600.0,
|
|
631
|
-
poll_interval: float = 5.0,
|
|
632
|
-
on_event: Optional[Callable[[Dict[str, Any]], None]] = None,
|
|
633
|
-
) -> Dict[str, Any]:
|
|
634
|
-
"""Poll until job completes.
|
|
635
|
-
|
|
636
|
-
Args:
|
|
637
|
-
timeout: Maximum time to wait (seconds)
|
|
638
|
-
poll_interval: Time between polls (seconds)
|
|
639
|
-
on_event: Callback for each new event
|
|
640
|
-
|
|
641
|
-
Returns:
|
|
642
|
-
Final job data
|
|
643
|
-
|
|
644
|
-
Raises:
|
|
645
|
-
TimeoutError: If timeout exceeded
|
|
646
|
-
RuntimeError: If job fails
|
|
647
|
-
"""
|
|
648
|
-
if not self._job_id:
|
|
649
|
-
raise RuntimeError("Job not submitted yet")
|
|
650
|
-
|
|
651
|
-
start_time = time.time()
|
|
652
|
-
last_seq = 0
|
|
653
|
-
|
|
654
|
-
while True:
|
|
655
|
-
elapsed = time.time() - start_time
|
|
656
|
-
if elapsed > timeout:
|
|
657
|
-
raise TimeoutError(f"Job {self._job_id} timed out after {timeout}s")
|
|
658
|
-
|
|
659
|
-
# Get events if callback provided
|
|
660
|
-
if on_event:
|
|
661
|
-
for event in self._poller.stream_events(self._job_id, last_seq):
|
|
662
|
-
on_event(event)
|
|
663
|
-
last_seq = max(last_seq, event.get("seq", 0))
|
|
664
|
-
|
|
665
|
-
# Check status
|
|
666
|
-
outcome = self._poller.poll(self._job_id)
|
|
667
|
-
|
|
668
|
-
if outcome.is_terminal:
|
|
669
|
-
if outcome.status == "failed":
|
|
670
|
-
raise RuntimeError(
|
|
671
|
-
f"Job {self._job_id} failed: {outcome.error or 'Unknown error'}"
|
|
672
|
-
)
|
|
673
|
-
return outcome.data
|
|
674
|
-
|
|
675
|
-
time.sleep(poll_interval)
|
|
676
|
-
|
|
677
|
-
def cancel(self) -> bool:
|
|
678
|
-
"""Cancel the job.
|
|
679
|
-
|
|
680
|
-
Returns:
|
|
681
|
-
True if cancellation was requested
|
|
682
|
-
"""
|
|
683
|
-
if not self._job_id:
|
|
684
|
-
raise RuntimeError("Job not submitted yet")
|
|
685
|
-
|
|
686
|
-
url = f"{self.config.backend_url.rstrip('/')}/api/research-agent/jobs/{self._job_id}/cancel"
|
|
687
|
-
headers = {"Authorization": f"Bearer {self.config.api_key}"}
|
|
688
|
-
|
|
689
|
-
try:
|
|
690
|
-
response = httpx.post(url, headers=headers, timeout=30.0)
|
|
691
|
-
response.raise_for_status()
|
|
692
|
-
return True
|
|
693
|
-
except Exception:
|
|
694
|
-
return False
|
|
695
|
-
|
|
696
|
-
def get_results(self) -> Dict[str, Any]:
|
|
697
|
-
"""Get job results (when completed).
|
|
698
|
-
|
|
699
|
-
Returns:
|
|
700
|
-
Results dict with metrics, diff, artifacts, etc.
|
|
701
|
-
"""
|
|
702
|
-
if not self._job_id:
|
|
703
|
-
raise RuntimeError("Job not submitted yet")
|
|
704
|
-
|
|
705
|
-
url = f"{self.config.backend_url.rstrip('/')}/api/research-agent/jobs/{self._job_id}/results"
|
|
706
|
-
headers = {"Authorization": f"Bearer {self.config.api_key}"}
|
|
707
|
-
|
|
708
|
-
try:
|
|
709
|
-
response = httpx.get(url, headers=headers, timeout=60.0)
|
|
710
|
-
response.raise_for_status()
|
|
711
|
-
return response.json()
|
|
712
|
-
except httpx.HTTPStatusError as e:
|
|
713
|
-
raise RuntimeError(
|
|
714
|
-
f"Failed to get results: HTTP {e.response.status_code}"
|
|
715
|
-
) from e
|
|
716
|
-
except Exception as e:
|
|
717
|
-
raise RuntimeError(f"Failed to get results: {e}") from e
|