synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show
  1. synth_ai/__init__.py +13 -13
  2. synth_ai/cli/__init__.py +6 -15
  3. synth_ai/cli/commands/eval/__init__.py +6 -15
  4. synth_ai/cli/commands/eval/config.py +338 -0
  5. synth_ai/cli/commands/eval/core.py +236 -1091
  6. synth_ai/cli/commands/eval/runner.py +704 -0
  7. synth_ai/cli/commands/eval/validation.py +44 -117
  8. synth_ai/cli/commands/filter/core.py +7 -7
  9. synth_ai/cli/commands/filter/validation.py +2 -2
  10. synth_ai/cli/commands/smoke/core.py +7 -17
  11. synth_ai/cli/commands/status/__init__.py +1 -64
  12. synth_ai/cli/commands/status/client.py +50 -151
  13. synth_ai/cli/commands/status/config.py +3 -83
  14. synth_ai/cli/commands/status/errors.py +4 -13
  15. synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
  16. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  17. synth_ai/cli/commands/status/subcommands/files.py +18 -63
  18. synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
  19. synth_ai/cli/commands/status/subcommands/models.py +18 -62
  20. synth_ai/cli/commands/status/subcommands/runs.py +16 -63
  21. synth_ai/cli/commands/status/subcommands/session.py +67 -172
  22. synth_ai/cli/commands/status/subcommands/summary.py +24 -32
  23. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  24. synth_ai/cli/commands/status/utils.py +16 -107
  25. synth_ai/cli/commands/train/__init__.py +18 -20
  26. synth_ai/cli/commands/train/errors.py +3 -3
  27. synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
  28. synth_ai/cli/commands/train/validation.py +7 -7
  29. synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
  30. synth_ai/cli/commands/train/verifier_validation.py +235 -0
  31. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
  32. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
  33. synth_ai/cli/demo_apps/math/config.toml +0 -1
  34. synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
  35. synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
  36. synth_ai/cli/lib/apps/task_app.py +12 -13
  37. synth_ai/cli/lib/task_app_discovery.py +6 -6
  38. synth_ai/cli/lib/train_cfgs.py +10 -10
  39. synth_ai/cli/task_apps/__init__.py +11 -0
  40. synth_ai/cli/task_apps/commands.py +7 -15
  41. synth_ai/core/env.py +12 -1
  42. synth_ai/core/errors.py +1 -2
  43. synth_ai/core/integrations/cloudflare.py +209 -33
  44. synth_ai/core/tracing_v3/abstractions.py +46 -0
  45. synth_ai/data/__init__.py +3 -30
  46. synth_ai/data/enums.py +1 -20
  47. synth_ai/data/rewards.py +100 -3
  48. synth_ai/products/graph_evolve/__init__.py +1 -2
  49. synth_ai/products/graph_evolve/config.py +16 -16
  50. synth_ai/products/graph_evolve/converters/__init__.py +3 -3
  51. synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
  52. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
  53. synth_ai/products/graph_gepa/__init__.py +23 -0
  54. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  55. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  56. synth_ai/sdk/__init__.py +45 -35
  57. synth_ai/sdk/api/eval/__init__.py +33 -0
  58. synth_ai/sdk/api/eval/job.py +732 -0
  59. synth_ai/sdk/api/research_agent/__init__.py +276 -66
  60. synth_ai/sdk/api/train/builders.py +181 -0
  61. synth_ai/sdk/api/train/cli.py +41 -33
  62. synth_ai/sdk/api/train/configs/__init__.py +6 -4
  63. synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
  64. synth_ai/sdk/api/train/configs/rl.py +264 -16
  65. synth_ai/sdk/api/train/configs/sft.py +165 -1
  66. synth_ai/sdk/api/train/graph_validators.py +12 -12
  67. synth_ai/sdk/api/train/graphgen.py +169 -51
  68. synth_ai/sdk/api/train/graphgen_models.py +95 -45
  69. synth_ai/sdk/api/train/local_api.py +10 -0
  70. synth_ai/sdk/api/train/pollers.py +36 -0
  71. synth_ai/sdk/api/train/prompt_learning.py +390 -60
  72. synth_ai/sdk/api/train/rl.py +41 -5
  73. synth_ai/sdk/api/train/sft.py +2 -0
  74. synth_ai/sdk/api/train/task_app.py +20 -0
  75. synth_ai/sdk/api/train/validators.py +17 -17
  76. synth_ai/sdk/graphs/completions.py +239 -33
  77. synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
  78. synth_ai/sdk/learning/__init__.py +35 -5
  79. synth_ai/sdk/learning/context_learning_client.py +531 -0
  80. synth_ai/sdk/learning/context_learning_types.py +294 -0
  81. synth_ai/sdk/learning/prompt_learning_client.py +1 -1
  82. synth_ai/sdk/learning/prompt_learning_types.py +2 -1
  83. synth_ai/sdk/learning/rl/__init__.py +0 -4
  84. synth_ai/sdk/learning/rl/contracts.py +0 -4
  85. synth_ai/sdk/localapi/__init__.py +40 -0
  86. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  87. synth_ai/sdk/localapi/client.py +10 -0
  88. synth_ai/sdk/localapi/contracts.py +10 -0
  89. synth_ai/sdk/localapi/helpers.py +519 -0
  90. synth_ai/sdk/localapi/rollouts.py +93 -0
  91. synth_ai/sdk/localapi/server.py +29 -0
  92. synth_ai/sdk/localapi/template.py +49 -0
  93. synth_ai/sdk/streaming/handlers.py +6 -6
  94. synth_ai/sdk/streaming/streamer.py +10 -6
  95. synth_ai/sdk/task/__init__.py +18 -5
  96. synth_ai/sdk/task/apps/__init__.py +37 -1
  97. synth_ai/sdk/task/client.py +9 -1
  98. synth_ai/sdk/task/config.py +6 -11
  99. synth_ai/sdk/task/contracts.py +137 -95
  100. synth_ai/sdk/task/in_process.py +32 -22
  101. synth_ai/sdk/task/in_process_runner.py +9 -4
  102. synth_ai/sdk/task/rubrics/__init__.py +2 -3
  103. synth_ai/sdk/task/rubrics/loaders.py +4 -4
  104. synth_ai/sdk/task/rubrics/strict.py +3 -4
  105. synth_ai/sdk/task/server.py +76 -16
  106. synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
  107. synth_ai/sdk/task/validators.py +34 -49
  108. synth_ai/sdk/training/__init__.py +7 -16
  109. synth_ai/sdk/tunnels/__init__.py +118 -0
  110. synth_ai/sdk/tunnels/cleanup.py +83 -0
  111. synth_ai/sdk/tunnels/ports.py +120 -0
  112. synth_ai/sdk/tunnels/tunneled_api.py +363 -0
  113. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
  114. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
  115. synth_ai/cli/commands/baseline/__init__.py +0 -12
  116. synth_ai/cli/commands/baseline/core.py +0 -636
  117. synth_ai/cli/commands/baseline/list.py +0 -94
  118. synth_ai/cli/commands/eval/errors.py +0 -81
  119. synth_ai/cli/commands/status/formatters.py +0 -164
  120. synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
  121. synth_ai/cli/commands/status/subcommands/usage.py +0 -203
  122. synth_ai/cli/commands/train/judge_validation.py +0 -305
  123. synth_ai/cli/usage.py +0 -159
  124. synth_ai/data/specs.py +0 -36
  125. synth_ai/sdk/api/research_agent/cli.py +0 -428
  126. synth_ai/sdk/api/research_agent/config.py +0 -357
  127. synth_ai/sdk/api/research_agent/job.py +0 -717
  128. synth_ai/sdk/baseline/__init__.py +0 -25
  129. synth_ai/sdk/baseline/config.py +0 -209
  130. synth_ai/sdk/baseline/discovery.py +0 -216
  131. synth_ai/sdk/baseline/execution.py +0 -154
  132. synth_ai/sdk/judging/__init__.py +0 -15
  133. synth_ai/sdk/judging/base.py +0 -24
  134. synth_ai/sdk/judging/client.py +0 -191
  135. synth_ai/sdk/judging/types.py +0 -42
  136. synth_ai/sdk/research_agent/__init__.py +0 -34
  137. synth_ai/sdk/research_agent/container_builder.py +0 -328
  138. synth_ai/sdk/research_agent/container_spec.py +0 -198
  139. synth_ai/sdk/research_agent/defaults.py +0 -34
  140. synth_ai/sdk/research_agent/results_collector.py +0 -69
  141. synth_ai/sdk/specs/__init__.py +0 -46
  142. synth_ai/sdk/specs/dataclasses.py +0 -149
  143. synth_ai/sdk/specs/loader.py +0 -144
  144. synth_ai/sdk/specs/serializer.py +0 -199
  145. synth_ai/sdk/specs/validation.py +0 -250
  146. synth_ai/sdk/tracing/__init__.py +0 -39
  147. synth_ai/sdk/usage/__init__.py +0 -37
  148. synth_ai/sdk/usage/client.py +0 -171
  149. synth_ai/sdk/usage/models.py +0 -261
  150. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
  151. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
  152. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
  153. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,717 +0,0 @@
1
- """Research Agent Job SDK.
2
-
3
- Provides high-level abstractions for running research agent jobs via the Synth API.
4
- """
5
-
6
- from __future__ import annotations
7
-
8
- import os
9
- import time
10
- from dataclasses import dataclass, field
11
- from pathlib import Path
12
- from typing import Any, Callable, Dict, Iterator, List, Literal, Optional
13
-
14
- import httpx
15
-
16
- from .config import (
17
- DatasetSource,
18
- GEPAConfig,
19
- MIPROConfig,
20
- ModelProvider,
21
- OptimizationTool,
22
- PermittedModel,
23
- PermittedModelsConfig,
24
- ReasoningEffort,
25
- ResearchConfig,
26
- )
27
-
28
- # Backend type
29
- BackendType = Literal["daytona", "modal", "docker"]
30
-
31
-
32
- @dataclass
33
- class ResearchAgentJobConfig:
34
- """Configuration for a research agent job.
35
-
36
- Example:
37
- >>> config = ResearchAgentJobConfig(
38
- ... research=ResearchConfig(
39
- ... task_description="Optimize prompt for banking classification",
40
- ... tools=[OptimizationTool.MIPRO],
41
- ... datasets=[DatasetSource(source_type="huggingface", hf_repo_id="PolyAI/banking77")],
42
- ... ),
43
- ... repo_url="https://github.com/my-org/my-pipeline",
44
- ... model="gpt-5.1-codex-mini",
45
- ... max_agent_spend_usd=25.0,
46
- ... )
47
- """
48
-
49
- # Research config (typed)
50
- research: ResearchConfig
51
-
52
- # Repository (optional if inline_files provided)
53
- repo_url: str = ""
54
- repo_branch: str = "main"
55
- repo_commit: Optional[str] = None
56
-
57
- # Inline files - alternative to repo_url
58
- # Dict of filepath -> content (e.g., {"pipeline.py": "...", "eval.py": "..."})
59
- inline_files: Optional[Dict[str, str]] = None
60
-
61
- # Execution
62
- backend: BackendType = "daytona"
63
- model: str = "gpt-4o"
64
- use_synth_proxy: bool = True
65
-
66
- # Spend limits
67
- max_agent_spend_usd: float = 10.0
68
- """Maximum spend in USD for agent inference and sandbox time. Default: $10."""
69
-
70
- max_synth_spend_usd: float = 100.0
71
- """Maximum spend in USD for Synth API calls (experiments, evals). Default: $100."""
72
-
73
- # Reasoning effort (for models that support it)
74
- reasoning_effort: Optional[ReasoningEffort] = None
75
- """Reasoning effort level: low, medium, high. Only for supported models (o1, o3, gpt-5 family, synth-*)."""
76
-
77
- # API configuration
78
- backend_url: str = ""
79
- api_key: str = ""
80
-
81
- # Metadata
82
- metadata: Dict[str, Any] = field(default_factory=dict)
83
-
84
- def __post_init__(self) -> None:
85
- """Validate and resolve defaults."""
86
- if not self.backend_url:
87
- self.backend_url = os.environ.get(
88
- "SYNTH_BACKEND_URL", "https://api.usesynth.ai"
89
- )
90
- if not self.api_key:
91
- self.api_key = os.environ.get("SYNTH_API_KEY", "")
92
- if not self.api_key:
93
- raise ValueError(
94
- "api_key is required (provide explicitly or set SYNTH_API_KEY env var)"
95
- )
96
- if not self.repo_url and not self.inline_files:
97
- raise ValueError("Either repo_url or inline_files must be provided")
98
-
99
- @classmethod
100
- def from_toml(cls, config_path: str | Path) -> ResearchAgentJobConfig:
101
- """Load configuration from a TOML file.
102
-
103
- Expected TOML structure:
104
- [research_agent]
105
- repo_url = "https://github.com/your-org/repo"
106
- repo_branch = "main"
107
- backend = "daytona"
108
- model = "gpt-5.1-codex-mini"
109
- max_agent_spend_usd = 25.0
110
- max_synth_spend_usd = 150.0
111
- reasoning_effort = "medium"
112
-
113
- [research_agent.research]
114
- task_description = "Optimize prompt for accuracy"
115
- tools = ["mipro"]
116
- primary_metric = "accuracy"
117
- num_iterations = 10
118
-
119
- [[research_agent.research.datasets]]
120
- source_type = "huggingface"
121
- hf_repo_id = "PolyAI/banking77"
122
-
123
- [research_agent.research.mipro_config]
124
- meta_model = "llama-3.3-70b-versatile"
125
- num_trials = 15
126
- """
127
- import tomllib
128
-
129
- path = Path(config_path)
130
- if not path.exists():
131
- raise FileNotFoundError(f"Config file not found: {path}")
132
-
133
- with open(path, "rb") as f:
134
- data = tomllib.load(f)
135
-
136
- ra_config = data.get("research_agent", {})
137
- if not ra_config:
138
- raise ValueError("Config must have [research_agent] section")
139
-
140
- # Parse research config
141
- research_data = ra_config.get("research", {})
142
- if not research_data:
143
- raise ValueError("research_agent.research config is required")
144
-
145
- research = _parse_research_config(research_data)
146
-
147
- return cls(
148
- research=research,
149
- repo_url=ra_config.get("repo_url", ""),
150
- repo_branch=ra_config.get("repo_branch", "main"),
151
- repo_commit=ra_config.get("repo_commit"),
152
- inline_files=ra_config.get("inline_files"),
153
- backend=ra_config.get("backend", "daytona"),
154
- model=ra_config.get("model", "gpt-4o"),
155
- use_synth_proxy=ra_config.get("use_synth_proxy", True),
156
- max_agent_spend_usd=ra_config.get("max_agent_spend_usd", 10.0),
157
- max_synth_spend_usd=ra_config.get("max_synth_spend_usd", 100.0),
158
- reasoning_effort=ra_config.get("reasoning_effort"),
159
- backend_url=ra_config.get("backend_url", ""),
160
- api_key=ra_config.get("api_key", ""),
161
- metadata=ra_config.get("metadata", {}),
162
- )
163
-
164
-
165
- def _parse_research_config(data: Dict[str, Any]) -> ResearchConfig:
166
- """Parse ResearchConfig from dict (e.g., from TOML)."""
167
- # Parse tools
168
- tools_raw = data.get("tools", ["mipro"])
169
- tools = [
170
- OptimizationTool(t) if isinstance(t, str) else t
171
- for t in tools_raw
172
- ]
173
-
174
- # Parse datasets
175
- datasets_raw = data.get("datasets", [])
176
- datasets = [_parse_dataset_source(d) for d in datasets_raw]
177
-
178
- # Parse permitted_models
179
- permitted_models = None
180
- if "permitted_models" in data:
181
- permitted_models = _parse_permitted_models(data["permitted_models"])
182
-
183
- # Parse GEPA config
184
- gepa_config = None
185
- if "gepa_config" in data:
186
- gepa_config = _parse_gepa_config(data["gepa_config"])
187
-
188
- # Parse MIPRO config
189
- mipro_config = None
190
- if "mipro_config" in data:
191
- mipro_config = _parse_mipro_config(data["mipro_config"])
192
-
193
- return ResearchConfig(
194
- task_description=data.get("task_description", ""),
195
- tools=tools,
196
- datasets=datasets,
197
- primary_metric=data.get("primary_metric", "accuracy"),
198
- secondary_metrics=data.get("secondary_metrics", []),
199
- num_iterations=data.get("num_iterations", 10),
200
- population_size=data.get("population_size", 20),
201
- timeout_minutes=data.get("timeout_minutes", 60),
202
- max_eval_samples=data.get("max_eval_samples"),
203
- permitted_models=permitted_models,
204
- gepa_config=gepa_config,
205
- mipro_config=mipro_config,
206
- initial_prompt=data.get("initial_prompt"),
207
- pipeline_entrypoint=data.get("pipeline_entrypoint"),
208
- )
209
-
210
-
211
- def _parse_dataset_source(data: Dict[str, Any]) -> DatasetSource:
212
- """Parse DatasetSource from dict."""
213
- return DatasetSource(
214
- source_type=data["source_type"],
215
- description=data.get("description"),
216
- hf_repo_id=data.get("hf_repo_id"),
217
- hf_split=data.get("hf_split", "train"),
218
- hf_subset=data.get("hf_subset"),
219
- file_ids=data.get("file_ids"),
220
- inline_data=data.get("inline_data"),
221
- )
222
-
223
-
224
- def _parse_permitted_models(data: Dict[str, Any]) -> PermittedModelsConfig:
225
- """Parse PermittedModelsConfig from dict."""
226
- models_raw = data.get("models", [])
227
- models = [
228
- PermittedModel(
229
- model=m["model"],
230
- provider=ModelProvider(m["provider"]) if isinstance(m["provider"], str) else m["provider"],
231
- )
232
- for m in models_raw
233
- ]
234
- return PermittedModelsConfig(
235
- models=models,
236
- default_temperature=data.get("default_temperature", 0.7),
237
- default_max_tokens=data.get("default_max_tokens", 4096),
238
- )
239
-
240
-
241
- def _parse_gepa_config(data: Dict[str, Any]) -> GEPAConfig:
242
- """Parse GEPAConfig from dict."""
243
- mutation_provider = data.get("mutation_provider", "groq")
244
- if isinstance(mutation_provider, str):
245
- mutation_provider = ModelProvider(mutation_provider)
246
-
247
- return GEPAConfig(
248
- mutation_model=data.get("mutation_model", "openai/gpt-oss-120b"),
249
- mutation_provider=mutation_provider,
250
- mutation_temperature=data.get("mutation_temperature", 0.7),
251
- mutation_max_tokens=data.get("mutation_max_tokens", 8192),
252
- population_size=data.get("population_size", 20),
253
- num_generations=data.get("num_generations", 10),
254
- elite_fraction=data.get("elite_fraction", 0.2),
255
- proposer_type=data.get("proposer_type", "dspy"),
256
- proposer_effort=data.get("proposer_effort", "MEDIUM"),
257
- proposer_output_tokens=data.get("proposer_output_tokens", "FAST"),
258
- spec_path=data.get("spec_path"),
259
- train_size=data.get("train_size"),
260
- val_size=data.get("val_size"),
261
- reference_size=data.get("reference_size"),
262
- )
263
-
264
-
265
- def _parse_mipro_config(data: Dict[str, Any]) -> MIPROConfig:
266
- """Parse MIPROConfig from dict."""
267
- meta_provider = data.get("meta_provider", "groq")
268
- if isinstance(meta_provider, str):
269
- meta_provider = ModelProvider(meta_provider)
270
-
271
- return MIPROConfig(
272
- meta_model=data.get("meta_model", "llama-3.3-70b-versatile"),
273
- meta_provider=meta_provider,
274
- meta_temperature=data.get("meta_temperature", 0.7),
275
- meta_max_tokens=data.get("meta_max_tokens", 4096),
276
- num_candidates=data.get("num_candidates", 20),
277
- num_trials=data.get("num_trials", 10),
278
- proposer_effort=data.get("proposer_effort", "MEDIUM"),
279
- proposer_output_tokens=data.get("proposer_output_tokens", "FAST"),
280
- train_size=data.get("train_size"),
281
- val_size=data.get("val_size"),
282
- reference_size=data.get("reference_size"),
283
- )
284
-
285
-
286
- @dataclass
287
- class PollOutcome:
288
- """Result of polling a job."""
289
-
290
- status: str
291
- data: Dict[str, Any]
292
- is_terminal: bool = False
293
- error: Optional[str] = None
294
-
295
-
296
- class ResearchAgentJobPoller:
297
- """Poller for research agent jobs."""
298
-
299
- def __init__(self, backend_url: str, api_key: str) -> None:
300
- self.backend_url = backend_url.rstrip("/")
301
- self.api_key = api_key
302
-
303
- def poll(self, job_id: str) -> PollOutcome:
304
- """Poll job status."""
305
- url = f"{self.backend_url}/api/research-agent/jobs/{job_id}"
306
- headers = {"Authorization": f"Bearer {self.api_key}"}
307
-
308
- try:
309
- response = httpx.get(url, headers=headers, timeout=30.0)
310
- response.raise_for_status()
311
- data = response.json()
312
-
313
- status = data.get("status", "unknown")
314
- is_terminal = status in ("succeeded", "failed", "canceled")
315
-
316
- return PollOutcome(
317
- status=status,
318
- data=data,
319
- is_terminal=is_terminal,
320
- error=data.get("error"),
321
- )
322
- except httpx.HTTPStatusError as e:
323
- return PollOutcome(
324
- status="error",
325
- data={},
326
- is_terminal=False,
327
- error=f"HTTP {e.response.status_code}: {e.response.text[:200]}",
328
- )
329
- except Exception as e:
330
- return PollOutcome(
331
- status="error",
332
- data={},
333
- is_terminal=False,
334
- error=str(e),
335
- )
336
-
337
- def stream_events(
338
- self, job_id: str, since_seq: int = 0
339
- ) -> Iterator[Dict[str, Any]]:
340
- """Stream events from a job."""
341
- url = f"{self.backend_url}/api/research-agent/jobs/{job_id}/events"
342
- headers = {"Authorization": f"Bearer {self.api_key}"}
343
- params = {"since_seq": since_seq}
344
-
345
- try:
346
- response = httpx.get(url, headers=headers, params=params, timeout=30.0)
347
- response.raise_for_status()
348
- events = response.json()
349
- yield from events
350
- except Exception:
351
- pass
352
-
353
-
354
- class ResearchAgentJob:
355
- """High-level SDK class for running research agent jobs.
356
-
357
- Research agent jobs use AI to optimize prompts/pipelines using MIPRO or GEPA algorithms.
358
-
359
- Example:
360
- >>> from synth_ai.sdk.api.research_agent import (
361
- ... ResearchAgentJob,
362
- ... ResearchAgentJobConfig,
363
- ... ResearchConfig,
364
- ... DatasetSource,
365
- ... OptimizationTool,
366
- ... )
367
- >>>
368
- >>> # Create typed config
369
- >>> research_config = ResearchConfig(
370
- ... task_description="Optimize prompt for banking classification",
371
- ... tools=[OptimizationTool.MIPRO],
372
- ... datasets=[
373
- ... DatasetSource(
374
- ... source_type="huggingface",
375
- ... hf_repo_id="PolyAI/banking77",
376
- ... )
377
- ... ],
378
- ... )
379
- >>>
380
- >>> job_config = ResearchAgentJobConfig(
381
- ... research=research_config,
382
- ... repo_url="https://github.com/my-org/my-pipeline",
383
- ... model="gpt-5.1-codex-mini",
384
- ... max_agent_spend_usd=25.0,
385
- ... )
386
- >>>
387
- >>> job = ResearchAgentJob(config=job_config)
388
- >>> job_id = job.submit()
389
- >>> result = job.poll_until_complete()
390
- """
391
-
392
- def __init__(
393
- self,
394
- config: ResearchAgentJobConfig,
395
- job_id: Optional[str] = None,
396
- ) -> None:
397
- """Initialize a research agent job.
398
-
399
- Args:
400
- config: Job configuration
401
- job_id: Existing job ID (if resuming)
402
- """
403
- self.config = config
404
- self._job_id = job_id
405
- self._poller = ResearchAgentJobPoller(config.backend_url, config.api_key)
406
-
407
- @classmethod
408
- def from_config(
409
- cls,
410
- config_path: str | Path,
411
- backend_url: Optional[str] = None,
412
- api_key: Optional[str] = None,
413
- ) -> ResearchAgentJob:
414
- """Create a research agent job from a TOML config file.
415
-
416
- Args:
417
- config_path: Path to TOML config file
418
- backend_url: Override backend URL (defaults to env or production)
419
- api_key: Override API key (defaults to SYNTH_API_KEY env var)
420
-
421
- Returns:
422
- ResearchAgentJob instance configured from the file
423
-
424
- Raises:
425
- FileNotFoundError: If config file doesn't exist
426
- ValueError: If config is invalid or missing required fields
427
- """
428
- config = ResearchAgentJobConfig.from_toml(config_path)
429
-
430
- if backend_url:
431
- config.backend_url = backend_url
432
- if api_key:
433
- config.api_key = api_key
434
-
435
- return cls(config=config)
436
-
437
- @classmethod
438
- def from_id(
439
- cls,
440
- job_id: str,
441
- backend_url: Optional[str] = None,
442
- api_key: Optional[str] = None,
443
- ) -> ResearchAgentJob:
444
- """Resume a job by ID.
445
-
446
- Args:
447
- job_id: Existing job ID
448
- backend_url: Backend URL (defaults to env)
449
- api_key: API key (defaults to env)
450
-
451
- Returns:
452
- ResearchAgentJob instance
453
- """
454
- # Create minimal config for polling
455
- # Use a placeholder ResearchConfig since we're just polling
456
- research = ResearchConfig(task_description="_placeholder")
457
- config = ResearchAgentJobConfig(
458
- research=research,
459
- inline_files={"_placeholder": ""},
460
- backend_url=backend_url or "",
461
- api_key=api_key or "",
462
- )
463
- return cls(config=config, job_id=job_id)
464
-
465
- @classmethod
466
- def from_research_config(
467
- cls,
468
- research: ResearchConfig,
469
- repo_url: str = "",
470
- repo_branch: str = "main",
471
- repo_commit: Optional[str] = None,
472
- inline_files: Optional[Dict[str, str]] = None,
473
- model: str = "gpt-4o",
474
- backend: BackendType = "daytona",
475
- max_agent_spend_usd: float = 10.0,
476
- max_synth_spend_usd: float = 100.0,
477
- reasoning_effort: Optional[ReasoningEffort] = None,
478
- backend_url: Optional[str] = None,
479
- api_key: Optional[str] = None,
480
- use_synth_proxy: bool = True,
481
- metadata: Optional[Dict[str, Any]] = None,
482
- ) -> ResearchAgentJob:
483
- """Create a job from a ResearchConfig.
484
-
485
- This is a convenience method for creating jobs programmatically.
486
-
487
- Args:
488
- research: Research configuration
489
- repo_url: Git repository URL
490
- repo_branch: Branch to clone
491
- repo_commit: Specific commit to checkout
492
- inline_files: Files to include in workspace
493
- model: Model for the agent to use
494
- backend: Container backend (daytona, modal, docker)
495
- max_agent_spend_usd: Max spend for agent inference
496
- max_synth_spend_usd: Max spend for Synth API calls
497
- reasoning_effort: Reasoning effort level (low, medium, high)
498
- backend_url: Override backend URL
499
- api_key: Override API key
500
- use_synth_proxy: Route LLM calls through Synth proxy
501
- metadata: Additional metadata
502
-
503
- Returns:
504
- ResearchAgentJob instance
505
- """
506
- config = ResearchAgentJobConfig(
507
- research=research,
508
- repo_url=repo_url,
509
- repo_branch=repo_branch,
510
- repo_commit=repo_commit,
511
- inline_files=inline_files,
512
- backend=backend,
513
- model=model,
514
- use_synth_proxy=use_synth_proxy,
515
- max_agent_spend_usd=max_agent_spend_usd,
516
- max_synth_spend_usd=max_synth_spend_usd,
517
- reasoning_effort=reasoning_effort,
518
- backend_url=backend_url or "",
519
- api_key=api_key or "",
520
- metadata=metadata or {},
521
- )
522
- return cls(config=config)
523
-
524
- @property
525
- def job_id(self) -> Optional[str]:
526
- """Get the job ID."""
527
- return self._job_id
528
-
529
- def submit(self) -> str:
530
- """Submit the job to the backend.
531
-
532
- Returns:
533
- Job ID
534
-
535
- Raises:
536
- RuntimeError: If submission fails
537
- NotImplementedError: If GEPA is requested (not yet supported)
538
- """
539
- if self._job_id:
540
- raise RuntimeError(f"Job already submitted: {self._job_id}")
541
-
542
- # Check for GEPA - not yet fully supported
543
- if OptimizationTool.GEPA in self.config.research.tools:
544
- raise NotImplementedError(
545
- "GEPA optimization is not yet fully supported in the Research Agent SDK. "
546
- "Please use MIPRO for now. GEPA support is coming soon."
547
- )
548
-
549
- url = f"{self.config.backend_url.rstrip('/')}/api/research-agent/jobs"
550
- headers = {
551
- "Authorization": f"Bearer {self.config.api_key}",
552
- "Content-Type": "application/json",
553
- }
554
-
555
- # Build request payload
556
- payload: Dict[str, Any] = {
557
- "algorithm": "research",
558
- "backend": self.config.backend,
559
- "model": self.config.model,
560
- "use_synth_proxy": self.config.use_synth_proxy,
561
- "max_agent_spend_usd": self.config.max_agent_spend_usd,
562
- "max_synth_spend_usd": self.config.max_synth_spend_usd,
563
- "metadata": self.config.metadata,
564
- }
565
-
566
- # Add reasoning_effort if set
567
- if self.config.reasoning_effort:
568
- payload["reasoning_effort"] = self.config.reasoning_effort
569
-
570
- # Add repo_url if provided
571
- if self.config.repo_url:
572
- payload["repo_url"] = self.config.repo_url
573
- payload["repo_branch"] = self.config.repo_branch
574
- if self.config.repo_commit:
575
- payload["repo_commit"] = self.config.repo_commit
576
-
577
- # Add inline_files if provided
578
- if self.config.inline_files:
579
- payload["inline_files"] = self.config.inline_files
580
-
581
- # Add research config
582
- payload["research"] = self.config.research.to_dict()
583
-
584
- try:
585
- response = httpx.post(url, json=payload, headers=headers, timeout=60.0)
586
- response.raise_for_status()
587
- data = response.json()
588
- self._job_id = data["job_id"]
589
- return self._job_id
590
- except httpx.HTTPStatusError as e:
591
- raise RuntimeError(
592
- f"Failed to submit job: HTTP {e.response.status_code} - {e.response.text[:500]}"
593
- ) from e
594
- except Exception as e:
595
- raise RuntimeError(f"Failed to submit job: {e}") from e
596
-
597
- def get_status(self) -> Dict[str, Any]:
598
- """Get current job status.
599
-
600
- Returns:
601
- Status dict with keys: status, current_iteration, best_metric_value, etc.
602
-
603
- Raises:
604
- RuntimeError: If job not submitted
605
- """
606
- if not self._job_id:
607
- raise RuntimeError("Job not submitted yet")
608
-
609
- outcome = self._poller.poll(self._job_id)
610
- if outcome.error:
611
- raise RuntimeError(f"Failed to get status: {outcome.error}")
612
- return outcome.data
613
-
614
- def get_events(self, since_seq: int = 0) -> List[Dict[str, Any]]:
615
- """Get job events.
616
-
617
- Args:
618
- since_seq: Return events after this sequence number
619
-
620
- Returns:
621
- List of event dicts
622
- """
623
- if not self._job_id:
624
- raise RuntimeError("Job not submitted yet")
625
-
626
- return list(self._poller.stream_events(self._job_id, since_seq))
627
-
628
- def poll_until_complete(
629
- self,
630
- timeout: float = 3600.0,
631
- poll_interval: float = 5.0,
632
- on_event: Optional[Callable[[Dict[str, Any]], None]] = None,
633
- ) -> Dict[str, Any]:
634
- """Poll until job completes.
635
-
636
- Args:
637
- timeout: Maximum time to wait (seconds)
638
- poll_interval: Time between polls (seconds)
639
- on_event: Callback for each new event
640
-
641
- Returns:
642
- Final job data
643
-
644
- Raises:
645
- TimeoutError: If timeout exceeded
646
- RuntimeError: If job fails
647
- """
648
- if not self._job_id:
649
- raise RuntimeError("Job not submitted yet")
650
-
651
- start_time = time.time()
652
- last_seq = 0
653
-
654
- while True:
655
- elapsed = time.time() - start_time
656
- if elapsed > timeout:
657
- raise TimeoutError(f"Job {self._job_id} timed out after {timeout}s")
658
-
659
- # Get events if callback provided
660
- if on_event:
661
- for event in self._poller.stream_events(self._job_id, last_seq):
662
- on_event(event)
663
- last_seq = max(last_seq, event.get("seq", 0))
664
-
665
- # Check status
666
- outcome = self._poller.poll(self._job_id)
667
-
668
- if outcome.is_terminal:
669
- if outcome.status == "failed":
670
- raise RuntimeError(
671
- f"Job {self._job_id} failed: {outcome.error or 'Unknown error'}"
672
- )
673
- return outcome.data
674
-
675
- time.sleep(poll_interval)
676
-
677
- def cancel(self) -> bool:
678
- """Cancel the job.
679
-
680
- Returns:
681
- True if cancellation was requested
682
- """
683
- if not self._job_id:
684
- raise RuntimeError("Job not submitted yet")
685
-
686
- url = f"{self.config.backend_url.rstrip('/')}/api/research-agent/jobs/{self._job_id}/cancel"
687
- headers = {"Authorization": f"Bearer {self.config.api_key}"}
688
-
689
- try:
690
- response = httpx.post(url, headers=headers, timeout=30.0)
691
- response.raise_for_status()
692
- return True
693
- except Exception:
694
- return False
695
-
696
- def get_results(self) -> Dict[str, Any]:
697
- """Get job results (when completed).
698
-
699
- Returns:
700
- Results dict with metrics, diff, artifacts, etc.
701
- """
702
- if not self._job_id:
703
- raise RuntimeError("Job not submitted yet")
704
-
705
- url = f"{self.config.backend_url.rstrip('/')}/api/research-agent/jobs/{self._job_id}/results"
706
- headers = {"Authorization": f"Bearer {self.config.api_key}"}
707
-
708
- try:
709
- response = httpx.get(url, headers=headers, timeout=60.0)
710
- response.raise_for_status()
711
- return response.json()
712
- except httpx.HTTPStatusError as e:
713
- raise RuntimeError(
714
- f"Failed to get results: HTTP {e.response.status_code}"
715
- ) from e
716
- except Exception as e:
717
- raise RuntimeError(f"Failed to get results: {e}") from e