synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show
  1. synth_ai/__init__.py +13 -13
  2. synth_ai/cli/__init__.py +6 -15
  3. synth_ai/cli/commands/eval/__init__.py +6 -15
  4. synth_ai/cli/commands/eval/config.py +338 -0
  5. synth_ai/cli/commands/eval/core.py +236 -1091
  6. synth_ai/cli/commands/eval/runner.py +704 -0
  7. synth_ai/cli/commands/eval/validation.py +44 -117
  8. synth_ai/cli/commands/filter/core.py +7 -7
  9. synth_ai/cli/commands/filter/validation.py +2 -2
  10. synth_ai/cli/commands/smoke/core.py +7 -17
  11. synth_ai/cli/commands/status/__init__.py +1 -64
  12. synth_ai/cli/commands/status/client.py +50 -151
  13. synth_ai/cli/commands/status/config.py +3 -83
  14. synth_ai/cli/commands/status/errors.py +4 -13
  15. synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
  16. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  17. synth_ai/cli/commands/status/subcommands/files.py +18 -63
  18. synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
  19. synth_ai/cli/commands/status/subcommands/models.py +18 -62
  20. synth_ai/cli/commands/status/subcommands/runs.py +16 -63
  21. synth_ai/cli/commands/status/subcommands/session.py +67 -172
  22. synth_ai/cli/commands/status/subcommands/summary.py +24 -32
  23. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  24. synth_ai/cli/commands/status/utils.py +16 -107
  25. synth_ai/cli/commands/train/__init__.py +18 -20
  26. synth_ai/cli/commands/train/errors.py +3 -3
  27. synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
  28. synth_ai/cli/commands/train/validation.py +7 -7
  29. synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
  30. synth_ai/cli/commands/train/verifier_validation.py +235 -0
  31. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
  32. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
  33. synth_ai/cli/demo_apps/math/config.toml +0 -1
  34. synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
  35. synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
  36. synth_ai/cli/lib/apps/task_app.py +12 -13
  37. synth_ai/cli/lib/task_app_discovery.py +6 -6
  38. synth_ai/cli/lib/train_cfgs.py +10 -10
  39. synth_ai/cli/task_apps/__init__.py +11 -0
  40. synth_ai/cli/task_apps/commands.py +7 -15
  41. synth_ai/core/env.py +12 -1
  42. synth_ai/core/errors.py +1 -2
  43. synth_ai/core/integrations/cloudflare.py +209 -33
  44. synth_ai/core/tracing_v3/abstractions.py +46 -0
  45. synth_ai/data/__init__.py +3 -30
  46. synth_ai/data/enums.py +1 -20
  47. synth_ai/data/rewards.py +100 -3
  48. synth_ai/products/graph_evolve/__init__.py +1 -2
  49. synth_ai/products/graph_evolve/config.py +16 -16
  50. synth_ai/products/graph_evolve/converters/__init__.py +3 -3
  51. synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
  52. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
  53. synth_ai/products/graph_gepa/__init__.py +23 -0
  54. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  55. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  56. synth_ai/sdk/__init__.py +45 -35
  57. synth_ai/sdk/api/eval/__init__.py +33 -0
  58. synth_ai/sdk/api/eval/job.py +732 -0
  59. synth_ai/sdk/api/research_agent/__init__.py +276 -66
  60. synth_ai/sdk/api/train/builders.py +181 -0
  61. synth_ai/sdk/api/train/cli.py +41 -33
  62. synth_ai/sdk/api/train/configs/__init__.py +6 -4
  63. synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
  64. synth_ai/sdk/api/train/configs/rl.py +264 -16
  65. synth_ai/sdk/api/train/configs/sft.py +165 -1
  66. synth_ai/sdk/api/train/graph_validators.py +12 -12
  67. synth_ai/sdk/api/train/graphgen.py +169 -51
  68. synth_ai/sdk/api/train/graphgen_models.py +95 -45
  69. synth_ai/sdk/api/train/local_api.py +10 -0
  70. synth_ai/sdk/api/train/pollers.py +36 -0
  71. synth_ai/sdk/api/train/prompt_learning.py +390 -60
  72. synth_ai/sdk/api/train/rl.py +41 -5
  73. synth_ai/sdk/api/train/sft.py +2 -0
  74. synth_ai/sdk/api/train/task_app.py +20 -0
  75. synth_ai/sdk/api/train/validators.py +17 -17
  76. synth_ai/sdk/graphs/completions.py +239 -33
  77. synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
  78. synth_ai/sdk/learning/__init__.py +35 -5
  79. synth_ai/sdk/learning/context_learning_client.py +531 -0
  80. synth_ai/sdk/learning/context_learning_types.py +294 -0
  81. synth_ai/sdk/learning/prompt_learning_client.py +1 -1
  82. synth_ai/sdk/learning/prompt_learning_types.py +2 -1
  83. synth_ai/sdk/learning/rl/__init__.py +0 -4
  84. synth_ai/sdk/learning/rl/contracts.py +0 -4
  85. synth_ai/sdk/localapi/__init__.py +40 -0
  86. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  87. synth_ai/sdk/localapi/client.py +10 -0
  88. synth_ai/sdk/localapi/contracts.py +10 -0
  89. synth_ai/sdk/localapi/helpers.py +519 -0
  90. synth_ai/sdk/localapi/rollouts.py +93 -0
  91. synth_ai/sdk/localapi/server.py +29 -0
  92. synth_ai/sdk/localapi/template.py +49 -0
  93. synth_ai/sdk/streaming/handlers.py +6 -6
  94. synth_ai/sdk/streaming/streamer.py +10 -6
  95. synth_ai/sdk/task/__init__.py +18 -5
  96. synth_ai/sdk/task/apps/__init__.py +37 -1
  97. synth_ai/sdk/task/client.py +9 -1
  98. synth_ai/sdk/task/config.py +6 -11
  99. synth_ai/sdk/task/contracts.py +137 -95
  100. synth_ai/sdk/task/in_process.py +32 -22
  101. synth_ai/sdk/task/in_process_runner.py +9 -4
  102. synth_ai/sdk/task/rubrics/__init__.py +2 -3
  103. synth_ai/sdk/task/rubrics/loaders.py +4 -4
  104. synth_ai/sdk/task/rubrics/strict.py +3 -4
  105. synth_ai/sdk/task/server.py +76 -16
  106. synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
  107. synth_ai/sdk/task/validators.py +34 -49
  108. synth_ai/sdk/training/__init__.py +7 -16
  109. synth_ai/sdk/tunnels/__init__.py +118 -0
  110. synth_ai/sdk/tunnels/cleanup.py +83 -0
  111. synth_ai/sdk/tunnels/ports.py +120 -0
  112. synth_ai/sdk/tunnels/tunneled_api.py +363 -0
  113. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
  114. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
  115. synth_ai/cli/commands/baseline/__init__.py +0 -12
  116. synth_ai/cli/commands/baseline/core.py +0 -636
  117. synth_ai/cli/commands/baseline/list.py +0 -94
  118. synth_ai/cli/commands/eval/errors.py +0 -81
  119. synth_ai/cli/commands/status/formatters.py +0 -164
  120. synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
  121. synth_ai/cli/commands/status/subcommands/usage.py +0 -203
  122. synth_ai/cli/commands/train/judge_validation.py +0 -305
  123. synth_ai/cli/usage.py +0 -159
  124. synth_ai/data/specs.py +0 -36
  125. synth_ai/sdk/api/research_agent/cli.py +0 -428
  126. synth_ai/sdk/api/research_agent/config.py +0 -357
  127. synth_ai/sdk/api/research_agent/job.py +0 -717
  128. synth_ai/sdk/baseline/__init__.py +0 -25
  129. synth_ai/sdk/baseline/config.py +0 -209
  130. synth_ai/sdk/baseline/discovery.py +0 -216
  131. synth_ai/sdk/baseline/execution.py +0 -154
  132. synth_ai/sdk/judging/__init__.py +0 -15
  133. synth_ai/sdk/judging/base.py +0 -24
  134. synth_ai/sdk/judging/client.py +0 -191
  135. synth_ai/sdk/judging/types.py +0 -42
  136. synth_ai/sdk/research_agent/__init__.py +0 -34
  137. synth_ai/sdk/research_agent/container_builder.py +0 -328
  138. synth_ai/sdk/research_agent/container_spec.py +0 -198
  139. synth_ai/sdk/research_agent/defaults.py +0 -34
  140. synth_ai/sdk/research_agent/results_collector.py +0 -69
  141. synth_ai/sdk/specs/__init__.py +0 -46
  142. synth_ai/sdk/specs/dataclasses.py +0 -149
  143. synth_ai/sdk/specs/loader.py +0 -144
  144. synth_ai/sdk/specs/serializer.py +0 -199
  145. synth_ai/sdk/specs/validation.py +0 -250
  146. synth_ai/sdk/tracing/__init__.py +0 -39
  147. synth_ai/sdk/usage/__init__.py +0 -37
  148. synth_ai/sdk/usage/client.py +0 -171
  149. synth_ai/sdk/usage/models.py +0 -261
  150. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
  151. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
  152. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
  153. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,357 +0,0 @@
1
- """Typed configuration models for Research Agent jobs.
2
-
3
- These models mirror the backend Pydantic models in:
4
- backend/app/routes/research_agent/models.py
5
-
6
- This provides type safety and IDE autocomplete for SDK users.
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- from dataclasses import dataclass, field
12
- from enum import Enum
13
- from typing import Any, Dict, List, Literal, Optional
14
-
15
-
16
- class ModelProvider(str, Enum):
17
- """Supported model providers for prompt optimization."""
18
-
19
- OPENAI = "openai"
20
- GROQ = "groq"
21
- GOOGLE = "google"
22
-
23
-
24
- class OptimizationTool(str, Enum):
25
- """Available optimization tools."""
26
-
27
- MIPRO = "mipro"
28
- GEPA = "gepa"
29
-
30
-
31
- # Type aliases for Literal types
32
- ProposerEffort = Literal["LOW_CONTEXT", "LOW", "MEDIUM", "HIGH"]
33
- ProposerOutputTokens = Literal["RAPID", "FAST", "SLOW"]
34
- ReasoningEffort = Literal["low", "medium", "high"]
35
- DatasetSourceType = Literal["huggingface", "upload", "inline"]
36
-
37
-
38
- @dataclass
39
- class PermittedModel:
40
- """A single permitted model configuration."""
41
-
42
- model: str
43
- """Model name (e.g., 'gpt-4o-mini', 'llama-3.3-70b-versatile')"""
44
-
45
- provider: ModelProvider
46
- """Model provider: openai, groq, or google"""
47
-
48
- def to_dict(self) -> Dict[str, Any]:
49
- return {
50
- "model": self.model,
51
- "provider": self.provider.value if isinstance(self.provider, Enum) else self.provider,
52
- }
53
-
54
-
55
- @dataclass
56
- class PermittedModelsConfig:
57
- """Configuration for permitted models in the optimization pipeline.
58
-
59
- The user specifies which models the agent is ALLOWED to use during optimization.
60
- The agent decides which models to use for which pipeline stages.
61
- """
62
-
63
- models: List[PermittedModel] = field(default_factory=list)
64
- """List of models the agent is permitted to use in the pipeline"""
65
-
66
- default_temperature: float = 0.7
67
- """Default sampling temperature"""
68
-
69
- default_max_tokens: int = 4096
70
- """Default max tokens per response"""
71
-
72
- def to_dict(self) -> Dict[str, Any]:
73
- return {
74
- "models": [m.to_dict() for m in self.models],
75
- "default_temperature": self.default_temperature,
76
- "default_max_tokens": self.default_max_tokens,
77
- }
78
-
79
-
80
- @dataclass
81
- class DatasetSource:
82
- """Configuration for dataset injection into the sandbox."""
83
-
84
- source_type: DatasetSourceType
85
- """Type of dataset source: huggingface, upload, or inline"""
86
-
87
- description: Optional[str] = None
88
- """Optional description of the dataset"""
89
-
90
- # For source_type="huggingface"
91
- hf_repo_id: Optional[str] = None
92
- """HuggingFace dataset repo ID (e.g., 'PolyAI/banking77')"""
93
-
94
- hf_split: str = "train"
95
- """Dataset split to use"""
96
-
97
- hf_subset: Optional[str] = None
98
- """Dataset subset/config name"""
99
-
100
- # For source_type="upload"
101
- file_ids: Optional[List[str]] = None
102
- """List of uploaded file IDs"""
103
-
104
- # For source_type="inline"
105
- inline_data: Optional[Dict[str, str]] = None
106
- """Dict of filename -> content"""
107
-
108
- def to_dict(self) -> Dict[str, Any]:
109
- result: Dict[str, Any] = {"source_type": self.source_type}
110
- if self.description:
111
- result["description"] = self.description
112
- if self.source_type == "huggingface":
113
- if self.hf_repo_id:
114
- result["hf_repo_id"] = self.hf_repo_id
115
- result["hf_split"] = self.hf_split
116
- if self.hf_subset:
117
- result["hf_subset"] = self.hf_subset
118
- elif self.source_type == "upload":
119
- if self.file_ids:
120
- result["file_ids"] = self.file_ids
121
- elif self.source_type == "inline":
122
- if self.inline_data:
123
- result["inline_data"] = self.inline_data
124
- return result
125
-
126
-
127
- @dataclass
128
- class GEPAConfig:
129
- """GEPA-specific model configuration.
130
-
131
- GEPA uses a mutation model to generate prompt variations/mutations.
132
- """
133
-
134
- # Mutation model (for generating prompt mutations)
135
- mutation_model: str = "openai/gpt-oss-120b"
136
- """Model for generating prompt mutations"""
137
-
138
- mutation_provider: ModelProvider = ModelProvider.GROQ
139
- """Provider for mutation model"""
140
-
141
- mutation_temperature: float = 0.7
142
- """Temperature for mutation generation"""
143
-
144
- mutation_max_tokens: int = 8192
145
- """Max tokens for mutation responses"""
146
-
147
- # Advanced GEPA settings
148
- population_size: int = 20
149
- """Population size for genetic algorithm"""
150
-
151
- num_generations: int = 10
152
- """Number of generations to evolve"""
153
-
154
- elite_fraction: float = 0.2
155
- """Fraction of population to keep as elite"""
156
-
157
- # Proposer settings
158
- proposer_type: Literal["dspy", "spec", "synth", "gepa-ai"] = "dspy"
159
- """Type of proposer to use"""
160
-
161
- proposer_effort: ProposerEffort = "MEDIUM"
162
- """Effort level for proposal generation"""
163
-
164
- proposer_output_tokens: ProposerOutputTokens = "FAST"
165
- """Output token budget for proposer"""
166
-
167
- spec_path: Optional[str] = None
168
- """Path to spec file (for proposer_type='spec')"""
169
-
170
- # Seed pool sizes (optional - agent decides if not set)
171
- train_size: Optional[int] = None
172
- """Training set size"""
173
-
174
- val_size: Optional[int] = None
175
- """Validation set size"""
176
-
177
- reference_size: Optional[int] = None
178
- """Reference set size"""
179
-
180
- def to_dict(self) -> Dict[str, Any]:
181
- result: Dict[str, Any] = {
182
- "mutation_model": self.mutation_model,
183
- "mutation_provider": self.mutation_provider.value if isinstance(self.mutation_provider, Enum) else self.mutation_provider,
184
- "mutation_temperature": self.mutation_temperature,
185
- "mutation_max_tokens": self.mutation_max_tokens,
186
- "population_size": self.population_size,
187
- "num_generations": self.num_generations,
188
- "elite_fraction": self.elite_fraction,
189
- "proposer_type": self.proposer_type,
190
- "proposer_effort": self.proposer_effort,
191
- "proposer_output_tokens": self.proposer_output_tokens,
192
- }
193
- if self.spec_path:
194
- result["spec_path"] = self.spec_path
195
- if self.train_size is not None:
196
- result["train_size"] = self.train_size
197
- if self.val_size is not None:
198
- result["val_size"] = self.val_size
199
- if self.reference_size is not None:
200
- result["reference_size"] = self.reference_size
201
- return result
202
-
203
-
204
- @dataclass
205
- class MIPROConfig:
206
- """MIPRO-specific model configuration.
207
-
208
- MIPRO uses a meta model to generate instruction/prompt proposals.
209
- """
210
-
211
- # Meta model (for generating instruction proposals)
212
- meta_model: str = "llama-3.3-70b-versatile"
213
- """Model for generating instruction proposals"""
214
-
215
- meta_provider: ModelProvider = ModelProvider.GROQ
216
- """Provider for meta model"""
217
-
218
- meta_temperature: float = 0.7
219
- """Temperature for proposal generation"""
220
-
221
- meta_max_tokens: int = 4096
222
- """Max tokens for proposal responses"""
223
-
224
- # Advanced MIPRO settings
225
- num_candidates: int = 20
226
- """Number of instruction candidates to generate"""
227
-
228
- num_trials: int = 10
229
- """Number of optimization trials"""
230
-
231
- # Proposer settings
232
- proposer_effort: ProposerEffort = "MEDIUM"
233
- """Effort level for proposal generation"""
234
-
235
- proposer_output_tokens: ProposerOutputTokens = "FAST"
236
- """Output token budget for proposer"""
237
-
238
- # Seed pool sizes (optional - agent decides if not set)
239
- train_size: Optional[int] = None
240
- """Training set size"""
241
-
242
- val_size: Optional[int] = None
243
- """Validation set size"""
244
-
245
- reference_size: Optional[int] = None
246
- """Reference set size"""
247
-
248
- def to_dict(self) -> Dict[str, Any]:
249
- result: Dict[str, Any] = {
250
- "meta_model": self.meta_model,
251
- "meta_provider": self.meta_provider.value if isinstance(self.meta_provider, Enum) else self.meta_provider,
252
- "meta_temperature": self.meta_temperature,
253
- "meta_max_tokens": self.meta_max_tokens,
254
- "num_candidates": self.num_candidates,
255
- "num_trials": self.num_trials,
256
- "proposer_effort": self.proposer_effort,
257
- "proposer_output_tokens": self.proposer_output_tokens,
258
- }
259
- if self.train_size is not None:
260
- result["train_size"] = self.train_size
261
- if self.val_size is not None:
262
- result["val_size"] = self.val_size
263
- if self.reference_size is not None:
264
- result["reference_size"] = self.reference_size
265
- return result
266
-
267
-
268
- @dataclass
269
- class ResearchConfig:
270
- """Configuration for prompt/pipeline research optimization.
271
-
272
- This is the main configuration for the "research" algorithm, which uses
273
- MIPRO or GEPA to optimize prompts/pipelines.
274
- """
275
-
276
- task_description: str
277
- """What to optimize (e.g., 'Improve accuracy on banking intent classification')"""
278
-
279
- tools: List[OptimizationTool] = field(default_factory=lambda: [OptimizationTool.MIPRO])
280
- """Optimization tools to use (mipro, gepa)"""
281
-
282
- # Datasets
283
- datasets: List[DatasetSource] = field(default_factory=list)
284
- """Datasets for training/evaluation"""
285
-
286
- # Metrics
287
- primary_metric: str = "accuracy"
288
- """Main metric to optimize"""
289
-
290
- secondary_metrics: List[str] = field(default_factory=list)
291
- """Additional metrics to track"""
292
-
293
- # Optimization parameters
294
- num_iterations: int = 10
295
- """Number of optimization iterations"""
296
-
297
- population_size: int = 20
298
- """Population size (GEPA) or candidates (MIPRO)"""
299
-
300
- timeout_minutes: int = 60
301
- """Maximum runtime in minutes"""
302
-
303
- max_eval_samples: Optional[int] = None
304
- """Max samples to evaluate per iteration"""
305
-
306
- # Model configurations
307
- permitted_models: Optional[PermittedModelsConfig] = None
308
- """Models the agent is allowed to use in the pipeline"""
309
-
310
- gepa_config: Optional[GEPAConfig] = None
311
- """GEPA-specific settings"""
312
-
313
- mipro_config: Optional[MIPROConfig] = None
314
- """MIPRO-specific settings"""
315
-
316
- # Initial prompt/pipeline
317
- initial_prompt: Optional[str] = None
318
- """Initial prompt template to optimize"""
319
-
320
- pipeline_entrypoint: Optional[str] = None
321
- """Path to pipeline script (e.g., 'pipeline.py')"""
322
-
323
- def to_dict(self) -> Dict[str, Any]:
324
- result: Dict[str, Any] = {
325
- "task_description": self.task_description,
326
- "tools": [t.value if isinstance(t, Enum) else t for t in self.tools],
327
- "primary_metric": self.primary_metric,
328
- "num_iterations": self.num_iterations,
329
- "population_size": self.population_size,
330
- "timeout_minutes": self.timeout_minutes,
331
- }
332
-
333
- if self.datasets:
334
- result["datasets"] = [d.to_dict() for d in self.datasets]
335
-
336
- if self.secondary_metrics:
337
- result["secondary_metrics"] = self.secondary_metrics
338
-
339
- if self.max_eval_samples is not None:
340
- result["max_eval_samples"] = self.max_eval_samples
341
-
342
- if self.permitted_models:
343
- result["permitted_models"] = self.permitted_models.to_dict()
344
-
345
- if self.gepa_config:
346
- result["gepa_config"] = self.gepa_config.to_dict()
347
-
348
- if self.mipro_config:
349
- result["mipro_config"] = self.mipro_config.to_dict()
350
-
351
- if self.initial_prompt:
352
- result["initial_prompt"] = self.initial_prompt
353
-
354
- if self.pipeline_entrypoint:
355
- result["pipeline_entrypoint"] = self.pipeline_entrypoint
356
-
357
- return result