synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +13 -13
- synth_ai/cli/__init__.py +6 -15
- synth_ai/cli/commands/eval/__init__.py +6 -15
- synth_ai/cli/commands/eval/config.py +338 -0
- synth_ai/cli/commands/eval/core.py +236 -1091
- synth_ai/cli/commands/eval/runner.py +704 -0
- synth_ai/cli/commands/eval/validation.py +44 -117
- synth_ai/cli/commands/filter/core.py +7 -7
- synth_ai/cli/commands/filter/validation.py +2 -2
- synth_ai/cli/commands/smoke/core.py +7 -17
- synth_ai/cli/commands/status/__init__.py +1 -64
- synth_ai/cli/commands/status/client.py +50 -151
- synth_ai/cli/commands/status/config.py +3 -83
- synth_ai/cli/commands/status/errors.py +4 -13
- synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
- synth_ai/cli/commands/status/subcommands/config.py +13 -0
- synth_ai/cli/commands/status/subcommands/files.py +18 -63
- synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
- synth_ai/cli/commands/status/subcommands/models.py +18 -62
- synth_ai/cli/commands/status/subcommands/runs.py +16 -63
- synth_ai/cli/commands/status/subcommands/session.py +67 -172
- synth_ai/cli/commands/status/subcommands/summary.py +24 -32
- synth_ai/cli/commands/status/subcommands/utils.py +41 -0
- synth_ai/cli/commands/status/utils.py +16 -107
- synth_ai/cli/commands/train/__init__.py +18 -20
- synth_ai/cli/commands/train/errors.py +3 -3
- synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
- synth_ai/cli/commands/train/validation.py +7 -7
- synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
- synth_ai/cli/commands/train/verifier_validation.py +235 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
- synth_ai/cli/lib/apps/task_app.py +12 -13
- synth_ai/cli/lib/task_app_discovery.py +6 -6
- synth_ai/cli/lib/train_cfgs.py +10 -10
- synth_ai/cli/task_apps/__init__.py +11 -0
- synth_ai/cli/task_apps/commands.py +7 -15
- synth_ai/core/env.py +12 -1
- synth_ai/core/errors.py +1 -2
- synth_ai/core/integrations/cloudflare.py +209 -33
- synth_ai/core/tracing_v3/abstractions.py +46 -0
- synth_ai/data/__init__.py +3 -30
- synth_ai/data/enums.py +1 -20
- synth_ai/data/rewards.py +100 -3
- synth_ai/products/graph_evolve/__init__.py +1 -2
- synth_ai/products/graph_evolve/config.py +16 -16
- synth_ai/products/graph_evolve/converters/__init__.py +3 -3
- synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
- synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
- synth_ai/products/graph_gepa/__init__.py +23 -0
- synth_ai/products/graph_gepa/converters/__init__.py +19 -0
- synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
- synth_ai/sdk/__init__.py +45 -35
- synth_ai/sdk/api/eval/__init__.py +33 -0
- synth_ai/sdk/api/eval/job.py +732 -0
- synth_ai/sdk/api/research_agent/__init__.py +276 -66
- synth_ai/sdk/api/train/builders.py +181 -0
- synth_ai/sdk/api/train/cli.py +41 -33
- synth_ai/sdk/api/train/configs/__init__.py +6 -4
- synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
- synth_ai/sdk/api/train/configs/rl.py +264 -16
- synth_ai/sdk/api/train/configs/sft.py +165 -1
- synth_ai/sdk/api/train/graph_validators.py +12 -12
- synth_ai/sdk/api/train/graphgen.py +169 -51
- synth_ai/sdk/api/train/graphgen_models.py +95 -45
- synth_ai/sdk/api/train/local_api.py +10 -0
- synth_ai/sdk/api/train/pollers.py +36 -0
- synth_ai/sdk/api/train/prompt_learning.py +390 -60
- synth_ai/sdk/api/train/rl.py +41 -5
- synth_ai/sdk/api/train/sft.py +2 -0
- synth_ai/sdk/api/train/task_app.py +20 -0
- synth_ai/sdk/api/train/validators.py +17 -17
- synth_ai/sdk/graphs/completions.py +239 -33
- synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
- synth_ai/sdk/learning/__init__.py +35 -5
- synth_ai/sdk/learning/context_learning_client.py +531 -0
- synth_ai/sdk/learning/context_learning_types.py +294 -0
- synth_ai/sdk/learning/prompt_learning_client.py +1 -1
- synth_ai/sdk/learning/prompt_learning_types.py +2 -1
- synth_ai/sdk/learning/rl/__init__.py +0 -4
- synth_ai/sdk/learning/rl/contracts.py +0 -4
- synth_ai/sdk/localapi/__init__.py +40 -0
- synth_ai/sdk/localapi/apps/__init__.py +28 -0
- synth_ai/sdk/localapi/client.py +10 -0
- synth_ai/sdk/localapi/contracts.py +10 -0
- synth_ai/sdk/localapi/helpers.py +519 -0
- synth_ai/sdk/localapi/rollouts.py +93 -0
- synth_ai/sdk/localapi/server.py +29 -0
- synth_ai/sdk/localapi/template.py +49 -0
- synth_ai/sdk/streaming/handlers.py +6 -6
- synth_ai/sdk/streaming/streamer.py +10 -6
- synth_ai/sdk/task/__init__.py +18 -5
- synth_ai/sdk/task/apps/__init__.py +37 -1
- synth_ai/sdk/task/client.py +9 -1
- synth_ai/sdk/task/config.py +6 -11
- synth_ai/sdk/task/contracts.py +137 -95
- synth_ai/sdk/task/in_process.py +32 -22
- synth_ai/sdk/task/in_process_runner.py +9 -4
- synth_ai/sdk/task/rubrics/__init__.py +2 -3
- synth_ai/sdk/task/rubrics/loaders.py +4 -4
- synth_ai/sdk/task/rubrics/strict.py +3 -4
- synth_ai/sdk/task/server.py +76 -16
- synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
- synth_ai/sdk/task/validators.py +34 -49
- synth_ai/sdk/training/__init__.py +7 -16
- synth_ai/sdk/tunnels/__init__.py +118 -0
- synth_ai/sdk/tunnels/cleanup.py +83 -0
- synth_ai/sdk/tunnels/ports.py +120 -0
- synth_ai/sdk/tunnels/tunneled_api.py +363 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
- synth_ai/cli/commands/baseline/__init__.py +0 -12
- synth_ai/cli/commands/baseline/core.py +0 -636
- synth_ai/cli/commands/baseline/list.py +0 -94
- synth_ai/cli/commands/eval/errors.py +0 -81
- synth_ai/cli/commands/status/formatters.py +0 -164
- synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
- synth_ai/cli/commands/status/subcommands/usage.py +0 -203
- synth_ai/cli/commands/train/judge_validation.py +0 -305
- synth_ai/cli/usage.py +0 -159
- synth_ai/data/specs.py +0 -36
- synth_ai/sdk/api/research_agent/cli.py +0 -428
- synth_ai/sdk/api/research_agent/config.py +0 -357
- synth_ai/sdk/api/research_agent/job.py +0 -717
- synth_ai/sdk/baseline/__init__.py +0 -25
- synth_ai/sdk/baseline/config.py +0 -209
- synth_ai/sdk/baseline/discovery.py +0 -216
- synth_ai/sdk/baseline/execution.py +0 -154
- synth_ai/sdk/judging/__init__.py +0 -15
- synth_ai/sdk/judging/base.py +0 -24
- synth_ai/sdk/judging/client.py +0 -191
- synth_ai/sdk/judging/types.py +0 -42
- synth_ai/sdk/research_agent/__init__.py +0 -34
- synth_ai/sdk/research_agent/container_builder.py +0 -328
- synth_ai/sdk/research_agent/container_spec.py +0 -198
- synth_ai/sdk/research_agent/defaults.py +0 -34
- synth_ai/sdk/research_agent/results_collector.py +0 -69
- synth_ai/sdk/specs/__init__.py +0 -46
- synth_ai/sdk/specs/dataclasses.py +0 -149
- synth_ai/sdk/specs/loader.py +0 -144
- synth_ai/sdk/specs/serializer.py +0 -199
- synth_ai/sdk/specs/validation.py +0 -250
- synth_ai/sdk/tracing/__init__.py +0 -39
- synth_ai/sdk/usage/__init__.py +0 -37
- synth_ai/sdk/usage/client.py +0 -171
- synth_ai/sdk/usage/models.py +0 -261
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0
|
@@ -1,357 +0,0 @@
|
|
|
1
|
-
"""Typed configuration models for Research Agent jobs.
|
|
2
|
-
|
|
3
|
-
These models mirror the backend Pydantic models in:
|
|
4
|
-
backend/app/routes/research_agent/models.py
|
|
5
|
-
|
|
6
|
-
This provides type safety and IDE autocomplete for SDK users.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from __future__ import annotations
|
|
10
|
-
|
|
11
|
-
from dataclasses import dataclass, field
|
|
12
|
-
from enum import Enum
|
|
13
|
-
from typing import Any, Dict, List, Literal, Optional
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class ModelProvider(str, Enum):
|
|
17
|
-
"""Supported model providers for prompt optimization."""
|
|
18
|
-
|
|
19
|
-
OPENAI = "openai"
|
|
20
|
-
GROQ = "groq"
|
|
21
|
-
GOOGLE = "google"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class OptimizationTool(str, Enum):
|
|
25
|
-
"""Available optimization tools."""
|
|
26
|
-
|
|
27
|
-
MIPRO = "mipro"
|
|
28
|
-
GEPA = "gepa"
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
# Type aliases for Literal types
|
|
32
|
-
ProposerEffort = Literal["LOW_CONTEXT", "LOW", "MEDIUM", "HIGH"]
|
|
33
|
-
ProposerOutputTokens = Literal["RAPID", "FAST", "SLOW"]
|
|
34
|
-
ReasoningEffort = Literal["low", "medium", "high"]
|
|
35
|
-
DatasetSourceType = Literal["huggingface", "upload", "inline"]
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
@dataclass
|
|
39
|
-
class PermittedModel:
|
|
40
|
-
"""A single permitted model configuration."""
|
|
41
|
-
|
|
42
|
-
model: str
|
|
43
|
-
"""Model name (e.g., 'gpt-4o-mini', 'llama-3.3-70b-versatile')"""
|
|
44
|
-
|
|
45
|
-
provider: ModelProvider
|
|
46
|
-
"""Model provider: openai, groq, or google"""
|
|
47
|
-
|
|
48
|
-
def to_dict(self) -> Dict[str, Any]:
|
|
49
|
-
return {
|
|
50
|
-
"model": self.model,
|
|
51
|
-
"provider": self.provider.value if isinstance(self.provider, Enum) else self.provider,
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
@dataclass
|
|
56
|
-
class PermittedModelsConfig:
|
|
57
|
-
"""Configuration for permitted models in the optimization pipeline.
|
|
58
|
-
|
|
59
|
-
The user specifies which models the agent is ALLOWED to use during optimization.
|
|
60
|
-
The agent decides which models to use for which pipeline stages.
|
|
61
|
-
"""
|
|
62
|
-
|
|
63
|
-
models: List[PermittedModel] = field(default_factory=list)
|
|
64
|
-
"""List of models the agent is permitted to use in the pipeline"""
|
|
65
|
-
|
|
66
|
-
default_temperature: float = 0.7
|
|
67
|
-
"""Default sampling temperature"""
|
|
68
|
-
|
|
69
|
-
default_max_tokens: int = 4096
|
|
70
|
-
"""Default max tokens per response"""
|
|
71
|
-
|
|
72
|
-
def to_dict(self) -> Dict[str, Any]:
|
|
73
|
-
return {
|
|
74
|
-
"models": [m.to_dict() for m in self.models],
|
|
75
|
-
"default_temperature": self.default_temperature,
|
|
76
|
-
"default_max_tokens": self.default_max_tokens,
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
@dataclass
|
|
81
|
-
class DatasetSource:
|
|
82
|
-
"""Configuration for dataset injection into the sandbox."""
|
|
83
|
-
|
|
84
|
-
source_type: DatasetSourceType
|
|
85
|
-
"""Type of dataset source: huggingface, upload, or inline"""
|
|
86
|
-
|
|
87
|
-
description: Optional[str] = None
|
|
88
|
-
"""Optional description of the dataset"""
|
|
89
|
-
|
|
90
|
-
# For source_type="huggingface"
|
|
91
|
-
hf_repo_id: Optional[str] = None
|
|
92
|
-
"""HuggingFace dataset repo ID (e.g., 'PolyAI/banking77')"""
|
|
93
|
-
|
|
94
|
-
hf_split: str = "train"
|
|
95
|
-
"""Dataset split to use"""
|
|
96
|
-
|
|
97
|
-
hf_subset: Optional[str] = None
|
|
98
|
-
"""Dataset subset/config name"""
|
|
99
|
-
|
|
100
|
-
# For source_type="upload"
|
|
101
|
-
file_ids: Optional[List[str]] = None
|
|
102
|
-
"""List of uploaded file IDs"""
|
|
103
|
-
|
|
104
|
-
# For source_type="inline"
|
|
105
|
-
inline_data: Optional[Dict[str, str]] = None
|
|
106
|
-
"""Dict of filename -> content"""
|
|
107
|
-
|
|
108
|
-
def to_dict(self) -> Dict[str, Any]:
|
|
109
|
-
result: Dict[str, Any] = {"source_type": self.source_type}
|
|
110
|
-
if self.description:
|
|
111
|
-
result["description"] = self.description
|
|
112
|
-
if self.source_type == "huggingface":
|
|
113
|
-
if self.hf_repo_id:
|
|
114
|
-
result["hf_repo_id"] = self.hf_repo_id
|
|
115
|
-
result["hf_split"] = self.hf_split
|
|
116
|
-
if self.hf_subset:
|
|
117
|
-
result["hf_subset"] = self.hf_subset
|
|
118
|
-
elif self.source_type == "upload":
|
|
119
|
-
if self.file_ids:
|
|
120
|
-
result["file_ids"] = self.file_ids
|
|
121
|
-
elif self.source_type == "inline":
|
|
122
|
-
if self.inline_data:
|
|
123
|
-
result["inline_data"] = self.inline_data
|
|
124
|
-
return result
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
@dataclass
|
|
128
|
-
class GEPAConfig:
|
|
129
|
-
"""GEPA-specific model configuration.
|
|
130
|
-
|
|
131
|
-
GEPA uses a mutation model to generate prompt variations/mutations.
|
|
132
|
-
"""
|
|
133
|
-
|
|
134
|
-
# Mutation model (for generating prompt mutations)
|
|
135
|
-
mutation_model: str = "openai/gpt-oss-120b"
|
|
136
|
-
"""Model for generating prompt mutations"""
|
|
137
|
-
|
|
138
|
-
mutation_provider: ModelProvider = ModelProvider.GROQ
|
|
139
|
-
"""Provider for mutation model"""
|
|
140
|
-
|
|
141
|
-
mutation_temperature: float = 0.7
|
|
142
|
-
"""Temperature for mutation generation"""
|
|
143
|
-
|
|
144
|
-
mutation_max_tokens: int = 8192
|
|
145
|
-
"""Max tokens for mutation responses"""
|
|
146
|
-
|
|
147
|
-
# Advanced GEPA settings
|
|
148
|
-
population_size: int = 20
|
|
149
|
-
"""Population size for genetic algorithm"""
|
|
150
|
-
|
|
151
|
-
num_generations: int = 10
|
|
152
|
-
"""Number of generations to evolve"""
|
|
153
|
-
|
|
154
|
-
elite_fraction: float = 0.2
|
|
155
|
-
"""Fraction of population to keep as elite"""
|
|
156
|
-
|
|
157
|
-
# Proposer settings
|
|
158
|
-
proposer_type: Literal["dspy", "spec", "synth", "gepa-ai"] = "dspy"
|
|
159
|
-
"""Type of proposer to use"""
|
|
160
|
-
|
|
161
|
-
proposer_effort: ProposerEffort = "MEDIUM"
|
|
162
|
-
"""Effort level for proposal generation"""
|
|
163
|
-
|
|
164
|
-
proposer_output_tokens: ProposerOutputTokens = "FAST"
|
|
165
|
-
"""Output token budget for proposer"""
|
|
166
|
-
|
|
167
|
-
spec_path: Optional[str] = None
|
|
168
|
-
"""Path to spec file (for proposer_type='spec')"""
|
|
169
|
-
|
|
170
|
-
# Seed pool sizes (optional - agent decides if not set)
|
|
171
|
-
train_size: Optional[int] = None
|
|
172
|
-
"""Training set size"""
|
|
173
|
-
|
|
174
|
-
val_size: Optional[int] = None
|
|
175
|
-
"""Validation set size"""
|
|
176
|
-
|
|
177
|
-
reference_size: Optional[int] = None
|
|
178
|
-
"""Reference set size"""
|
|
179
|
-
|
|
180
|
-
def to_dict(self) -> Dict[str, Any]:
|
|
181
|
-
result: Dict[str, Any] = {
|
|
182
|
-
"mutation_model": self.mutation_model,
|
|
183
|
-
"mutation_provider": self.mutation_provider.value if isinstance(self.mutation_provider, Enum) else self.mutation_provider,
|
|
184
|
-
"mutation_temperature": self.mutation_temperature,
|
|
185
|
-
"mutation_max_tokens": self.mutation_max_tokens,
|
|
186
|
-
"population_size": self.population_size,
|
|
187
|
-
"num_generations": self.num_generations,
|
|
188
|
-
"elite_fraction": self.elite_fraction,
|
|
189
|
-
"proposer_type": self.proposer_type,
|
|
190
|
-
"proposer_effort": self.proposer_effort,
|
|
191
|
-
"proposer_output_tokens": self.proposer_output_tokens,
|
|
192
|
-
}
|
|
193
|
-
if self.spec_path:
|
|
194
|
-
result["spec_path"] = self.spec_path
|
|
195
|
-
if self.train_size is not None:
|
|
196
|
-
result["train_size"] = self.train_size
|
|
197
|
-
if self.val_size is not None:
|
|
198
|
-
result["val_size"] = self.val_size
|
|
199
|
-
if self.reference_size is not None:
|
|
200
|
-
result["reference_size"] = self.reference_size
|
|
201
|
-
return result
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
@dataclass
|
|
205
|
-
class MIPROConfig:
|
|
206
|
-
"""MIPRO-specific model configuration.
|
|
207
|
-
|
|
208
|
-
MIPRO uses a meta model to generate instruction/prompt proposals.
|
|
209
|
-
"""
|
|
210
|
-
|
|
211
|
-
# Meta model (for generating instruction proposals)
|
|
212
|
-
meta_model: str = "llama-3.3-70b-versatile"
|
|
213
|
-
"""Model for generating instruction proposals"""
|
|
214
|
-
|
|
215
|
-
meta_provider: ModelProvider = ModelProvider.GROQ
|
|
216
|
-
"""Provider for meta model"""
|
|
217
|
-
|
|
218
|
-
meta_temperature: float = 0.7
|
|
219
|
-
"""Temperature for proposal generation"""
|
|
220
|
-
|
|
221
|
-
meta_max_tokens: int = 4096
|
|
222
|
-
"""Max tokens for proposal responses"""
|
|
223
|
-
|
|
224
|
-
# Advanced MIPRO settings
|
|
225
|
-
num_candidates: int = 20
|
|
226
|
-
"""Number of instruction candidates to generate"""
|
|
227
|
-
|
|
228
|
-
num_trials: int = 10
|
|
229
|
-
"""Number of optimization trials"""
|
|
230
|
-
|
|
231
|
-
# Proposer settings
|
|
232
|
-
proposer_effort: ProposerEffort = "MEDIUM"
|
|
233
|
-
"""Effort level for proposal generation"""
|
|
234
|
-
|
|
235
|
-
proposer_output_tokens: ProposerOutputTokens = "FAST"
|
|
236
|
-
"""Output token budget for proposer"""
|
|
237
|
-
|
|
238
|
-
# Seed pool sizes (optional - agent decides if not set)
|
|
239
|
-
train_size: Optional[int] = None
|
|
240
|
-
"""Training set size"""
|
|
241
|
-
|
|
242
|
-
val_size: Optional[int] = None
|
|
243
|
-
"""Validation set size"""
|
|
244
|
-
|
|
245
|
-
reference_size: Optional[int] = None
|
|
246
|
-
"""Reference set size"""
|
|
247
|
-
|
|
248
|
-
def to_dict(self) -> Dict[str, Any]:
|
|
249
|
-
result: Dict[str, Any] = {
|
|
250
|
-
"meta_model": self.meta_model,
|
|
251
|
-
"meta_provider": self.meta_provider.value if isinstance(self.meta_provider, Enum) else self.meta_provider,
|
|
252
|
-
"meta_temperature": self.meta_temperature,
|
|
253
|
-
"meta_max_tokens": self.meta_max_tokens,
|
|
254
|
-
"num_candidates": self.num_candidates,
|
|
255
|
-
"num_trials": self.num_trials,
|
|
256
|
-
"proposer_effort": self.proposer_effort,
|
|
257
|
-
"proposer_output_tokens": self.proposer_output_tokens,
|
|
258
|
-
}
|
|
259
|
-
if self.train_size is not None:
|
|
260
|
-
result["train_size"] = self.train_size
|
|
261
|
-
if self.val_size is not None:
|
|
262
|
-
result["val_size"] = self.val_size
|
|
263
|
-
if self.reference_size is not None:
|
|
264
|
-
result["reference_size"] = self.reference_size
|
|
265
|
-
return result
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
@dataclass
|
|
269
|
-
class ResearchConfig:
|
|
270
|
-
"""Configuration for prompt/pipeline research optimization.
|
|
271
|
-
|
|
272
|
-
This is the main configuration for the "research" algorithm, which uses
|
|
273
|
-
MIPRO or GEPA to optimize prompts/pipelines.
|
|
274
|
-
"""
|
|
275
|
-
|
|
276
|
-
task_description: str
|
|
277
|
-
"""What to optimize (e.g., 'Improve accuracy on banking intent classification')"""
|
|
278
|
-
|
|
279
|
-
tools: List[OptimizationTool] = field(default_factory=lambda: [OptimizationTool.MIPRO])
|
|
280
|
-
"""Optimization tools to use (mipro, gepa)"""
|
|
281
|
-
|
|
282
|
-
# Datasets
|
|
283
|
-
datasets: List[DatasetSource] = field(default_factory=list)
|
|
284
|
-
"""Datasets for training/evaluation"""
|
|
285
|
-
|
|
286
|
-
# Metrics
|
|
287
|
-
primary_metric: str = "accuracy"
|
|
288
|
-
"""Main metric to optimize"""
|
|
289
|
-
|
|
290
|
-
secondary_metrics: List[str] = field(default_factory=list)
|
|
291
|
-
"""Additional metrics to track"""
|
|
292
|
-
|
|
293
|
-
# Optimization parameters
|
|
294
|
-
num_iterations: int = 10
|
|
295
|
-
"""Number of optimization iterations"""
|
|
296
|
-
|
|
297
|
-
population_size: int = 20
|
|
298
|
-
"""Population size (GEPA) or candidates (MIPRO)"""
|
|
299
|
-
|
|
300
|
-
timeout_minutes: int = 60
|
|
301
|
-
"""Maximum runtime in minutes"""
|
|
302
|
-
|
|
303
|
-
max_eval_samples: Optional[int] = None
|
|
304
|
-
"""Max samples to evaluate per iteration"""
|
|
305
|
-
|
|
306
|
-
# Model configurations
|
|
307
|
-
permitted_models: Optional[PermittedModelsConfig] = None
|
|
308
|
-
"""Models the agent is allowed to use in the pipeline"""
|
|
309
|
-
|
|
310
|
-
gepa_config: Optional[GEPAConfig] = None
|
|
311
|
-
"""GEPA-specific settings"""
|
|
312
|
-
|
|
313
|
-
mipro_config: Optional[MIPROConfig] = None
|
|
314
|
-
"""MIPRO-specific settings"""
|
|
315
|
-
|
|
316
|
-
# Initial prompt/pipeline
|
|
317
|
-
initial_prompt: Optional[str] = None
|
|
318
|
-
"""Initial prompt template to optimize"""
|
|
319
|
-
|
|
320
|
-
pipeline_entrypoint: Optional[str] = None
|
|
321
|
-
"""Path to pipeline script (e.g., 'pipeline.py')"""
|
|
322
|
-
|
|
323
|
-
def to_dict(self) -> Dict[str, Any]:
|
|
324
|
-
result: Dict[str, Any] = {
|
|
325
|
-
"task_description": self.task_description,
|
|
326
|
-
"tools": [t.value if isinstance(t, Enum) else t for t in self.tools],
|
|
327
|
-
"primary_metric": self.primary_metric,
|
|
328
|
-
"num_iterations": self.num_iterations,
|
|
329
|
-
"population_size": self.population_size,
|
|
330
|
-
"timeout_minutes": self.timeout_minutes,
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
if self.datasets:
|
|
334
|
-
result["datasets"] = [d.to_dict() for d in self.datasets]
|
|
335
|
-
|
|
336
|
-
if self.secondary_metrics:
|
|
337
|
-
result["secondary_metrics"] = self.secondary_metrics
|
|
338
|
-
|
|
339
|
-
if self.max_eval_samples is not None:
|
|
340
|
-
result["max_eval_samples"] = self.max_eval_samples
|
|
341
|
-
|
|
342
|
-
if self.permitted_models:
|
|
343
|
-
result["permitted_models"] = self.permitted_models.to_dict()
|
|
344
|
-
|
|
345
|
-
if self.gepa_config:
|
|
346
|
-
result["gepa_config"] = self.gepa_config.to_dict()
|
|
347
|
-
|
|
348
|
-
if self.mipro_config:
|
|
349
|
-
result["mipro_config"] = self.mipro_config.to_dict()
|
|
350
|
-
|
|
351
|
-
if self.initial_prompt:
|
|
352
|
-
result["initial_prompt"] = self.initial_prompt
|
|
353
|
-
|
|
354
|
-
if self.pipeline_entrypoint:
|
|
355
|
-
result["pipeline_entrypoint"] = self.pipeline_entrypoint
|
|
356
|
-
|
|
357
|
-
return result
|