synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +13 -13
- synth_ai/cli/__init__.py +6 -15
- synth_ai/cli/commands/eval/__init__.py +6 -15
- synth_ai/cli/commands/eval/config.py +338 -0
- synth_ai/cli/commands/eval/core.py +236 -1091
- synth_ai/cli/commands/eval/runner.py +704 -0
- synth_ai/cli/commands/eval/validation.py +44 -117
- synth_ai/cli/commands/filter/core.py +7 -7
- synth_ai/cli/commands/filter/validation.py +2 -2
- synth_ai/cli/commands/smoke/core.py +7 -17
- synth_ai/cli/commands/status/__init__.py +1 -64
- synth_ai/cli/commands/status/client.py +50 -151
- synth_ai/cli/commands/status/config.py +3 -83
- synth_ai/cli/commands/status/errors.py +4 -13
- synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
- synth_ai/cli/commands/status/subcommands/config.py +13 -0
- synth_ai/cli/commands/status/subcommands/files.py +18 -63
- synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
- synth_ai/cli/commands/status/subcommands/models.py +18 -62
- synth_ai/cli/commands/status/subcommands/runs.py +16 -63
- synth_ai/cli/commands/status/subcommands/session.py +67 -172
- synth_ai/cli/commands/status/subcommands/summary.py +24 -32
- synth_ai/cli/commands/status/subcommands/utils.py +41 -0
- synth_ai/cli/commands/status/utils.py +16 -107
- synth_ai/cli/commands/train/__init__.py +18 -20
- synth_ai/cli/commands/train/errors.py +3 -3
- synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
- synth_ai/cli/commands/train/validation.py +7 -7
- synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
- synth_ai/cli/commands/train/verifier_validation.py +235 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
- synth_ai/cli/lib/apps/task_app.py +12 -13
- synth_ai/cli/lib/task_app_discovery.py +6 -6
- synth_ai/cli/lib/train_cfgs.py +10 -10
- synth_ai/cli/task_apps/__init__.py +11 -0
- synth_ai/cli/task_apps/commands.py +7 -15
- synth_ai/core/env.py +12 -1
- synth_ai/core/errors.py +1 -2
- synth_ai/core/integrations/cloudflare.py +209 -33
- synth_ai/core/tracing_v3/abstractions.py +46 -0
- synth_ai/data/__init__.py +3 -30
- synth_ai/data/enums.py +1 -20
- synth_ai/data/rewards.py +100 -3
- synth_ai/products/graph_evolve/__init__.py +1 -2
- synth_ai/products/graph_evolve/config.py +16 -16
- synth_ai/products/graph_evolve/converters/__init__.py +3 -3
- synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
- synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
- synth_ai/products/graph_gepa/__init__.py +23 -0
- synth_ai/products/graph_gepa/converters/__init__.py +19 -0
- synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
- synth_ai/sdk/__init__.py +45 -35
- synth_ai/sdk/api/eval/__init__.py +33 -0
- synth_ai/sdk/api/eval/job.py +732 -0
- synth_ai/sdk/api/research_agent/__init__.py +276 -66
- synth_ai/sdk/api/train/builders.py +181 -0
- synth_ai/sdk/api/train/cli.py +41 -33
- synth_ai/sdk/api/train/configs/__init__.py +6 -4
- synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
- synth_ai/sdk/api/train/configs/rl.py +264 -16
- synth_ai/sdk/api/train/configs/sft.py +165 -1
- synth_ai/sdk/api/train/graph_validators.py +12 -12
- synth_ai/sdk/api/train/graphgen.py +169 -51
- synth_ai/sdk/api/train/graphgen_models.py +95 -45
- synth_ai/sdk/api/train/local_api.py +10 -0
- synth_ai/sdk/api/train/pollers.py +36 -0
- synth_ai/sdk/api/train/prompt_learning.py +390 -60
- synth_ai/sdk/api/train/rl.py +41 -5
- synth_ai/sdk/api/train/sft.py +2 -0
- synth_ai/sdk/api/train/task_app.py +20 -0
- synth_ai/sdk/api/train/validators.py +17 -17
- synth_ai/sdk/graphs/completions.py +239 -33
- synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
- synth_ai/sdk/learning/__init__.py +35 -5
- synth_ai/sdk/learning/context_learning_client.py +531 -0
- synth_ai/sdk/learning/context_learning_types.py +294 -0
- synth_ai/sdk/learning/prompt_learning_client.py +1 -1
- synth_ai/sdk/learning/prompt_learning_types.py +2 -1
- synth_ai/sdk/learning/rl/__init__.py +0 -4
- synth_ai/sdk/learning/rl/contracts.py +0 -4
- synth_ai/sdk/localapi/__init__.py +40 -0
- synth_ai/sdk/localapi/apps/__init__.py +28 -0
- synth_ai/sdk/localapi/client.py +10 -0
- synth_ai/sdk/localapi/contracts.py +10 -0
- synth_ai/sdk/localapi/helpers.py +519 -0
- synth_ai/sdk/localapi/rollouts.py +93 -0
- synth_ai/sdk/localapi/server.py +29 -0
- synth_ai/sdk/localapi/template.py +49 -0
- synth_ai/sdk/streaming/handlers.py +6 -6
- synth_ai/sdk/streaming/streamer.py +10 -6
- synth_ai/sdk/task/__init__.py +18 -5
- synth_ai/sdk/task/apps/__init__.py +37 -1
- synth_ai/sdk/task/client.py +9 -1
- synth_ai/sdk/task/config.py +6 -11
- synth_ai/sdk/task/contracts.py +137 -95
- synth_ai/sdk/task/in_process.py +32 -22
- synth_ai/sdk/task/in_process_runner.py +9 -4
- synth_ai/sdk/task/rubrics/__init__.py +2 -3
- synth_ai/sdk/task/rubrics/loaders.py +4 -4
- synth_ai/sdk/task/rubrics/strict.py +3 -4
- synth_ai/sdk/task/server.py +76 -16
- synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
- synth_ai/sdk/task/validators.py +34 -49
- synth_ai/sdk/training/__init__.py +7 -16
- synth_ai/sdk/tunnels/__init__.py +118 -0
- synth_ai/sdk/tunnels/cleanup.py +83 -0
- synth_ai/sdk/tunnels/ports.py +120 -0
- synth_ai/sdk/tunnels/tunneled_api.py +363 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
- synth_ai/cli/commands/baseline/__init__.py +0 -12
- synth_ai/cli/commands/baseline/core.py +0 -636
- synth_ai/cli/commands/baseline/list.py +0 -94
- synth_ai/cli/commands/eval/errors.py +0 -81
- synth_ai/cli/commands/status/formatters.py +0 -164
- synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
- synth_ai/cli/commands/status/subcommands/usage.py +0 -203
- synth_ai/cli/commands/train/judge_validation.py +0 -305
- synth_ai/cli/usage.py +0 -159
- synth_ai/data/specs.py +0 -36
- synth_ai/sdk/api/research_agent/cli.py +0 -428
- synth_ai/sdk/api/research_agent/config.py +0 -357
- synth_ai/sdk/api/research_agent/job.py +0 -717
- synth_ai/sdk/baseline/__init__.py +0 -25
- synth_ai/sdk/baseline/config.py +0 -209
- synth_ai/sdk/baseline/discovery.py +0 -216
- synth_ai/sdk/baseline/execution.py +0 -154
- synth_ai/sdk/judging/__init__.py +0 -15
- synth_ai/sdk/judging/base.py +0 -24
- synth_ai/sdk/judging/client.py +0 -191
- synth_ai/sdk/judging/types.py +0 -42
- synth_ai/sdk/research_agent/__init__.py +0 -34
- synth_ai/sdk/research_agent/container_builder.py +0 -328
- synth_ai/sdk/research_agent/container_spec.py +0 -198
- synth_ai/sdk/research_agent/defaults.py +0 -34
- synth_ai/sdk/research_agent/results_collector.py +0 -69
- synth_ai/sdk/specs/__init__.py +0 -46
- synth_ai/sdk/specs/dataclasses.py +0 -149
- synth_ai/sdk/specs/loader.py +0 -144
- synth_ai/sdk/specs/serializer.py +0 -199
- synth_ai/sdk/specs/validation.py +0 -250
- synth_ai/sdk/tracing/__init__.py +0 -39
- synth_ai/sdk/usage/__init__.py +0 -37
- synth_ai/sdk/usage/client.py +0 -171
- synth_ai/sdk/usage/models.py +0 -261
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0
|
@@ -196,11 +196,11 @@ class ParetoFloorsConfig(BaseModel):
|
|
|
196
196
|
|
|
197
197
|
|
|
198
198
|
# ============================================================================
|
|
199
|
-
#
|
|
199
|
+
# Graph Opt Dataset Format Models
|
|
200
200
|
# ============================================================================
|
|
201
201
|
|
|
202
202
|
class TaskInput(BaseModel):
|
|
203
|
-
"""A single task/example in
|
|
203
|
+
"""A single task/example in a Graph Opt dataset.
|
|
204
204
|
|
|
205
205
|
For POLICY graphs: Contains the problem to solve.
|
|
206
206
|
For VERIFIER graphs: Contains a trace to evaluate.
|
|
@@ -267,8 +267,8 @@ class GoldOutput(BaseModel):
|
|
|
267
267
|
score: Optional[float] = Field(default=None, ge=0.0, le=1.0, description="Gold score (0.0-1.0)")
|
|
268
268
|
|
|
269
269
|
|
|
270
|
-
class
|
|
271
|
-
"""Metadata about
|
|
270
|
+
class GraphOptDatasetMetadata(BaseModel):
|
|
271
|
+
"""Metadata about a Graph Opt dataset.
|
|
272
272
|
|
|
273
273
|
Provides context for graph generation and optimization.
|
|
274
274
|
"""
|
|
@@ -279,8 +279,8 @@ class ADASDatasetMetadata(BaseModel):
|
|
|
279
279
|
domain: Optional[str] = Field(default=None, description="Domain (qa, code, games, etc.)")
|
|
280
280
|
|
|
281
281
|
|
|
282
|
-
class
|
|
283
|
-
"""Complete
|
|
282
|
+
class GraphOptDataset(BaseModel):
|
|
283
|
+
"""Complete Graph Opt dataset format for inline upload.
|
|
284
284
|
|
|
285
285
|
This is the schema for the `dataset` field in GraphOptimizationConfig
|
|
286
286
|
when uploading data directly instead of using a pre-registered dataset.
|
|
@@ -303,7 +303,7 @@ class ADASDataset(BaseModel):
|
|
|
303
303
|
"""
|
|
304
304
|
tasks: List[TaskInput] = Field(..., min_length=1, description="List of tasks/examples")
|
|
305
305
|
gold_outputs: List[GoldOutput] = Field(..., min_length=1, description="Ground truth for each task")
|
|
306
|
-
metadata:
|
|
306
|
+
metadata: GraphOptDatasetMetadata = Field(default_factory=GraphOptDatasetMetadata)
|
|
307
307
|
|
|
308
308
|
@field_validator("tasks", mode="before")
|
|
309
309
|
@classmethod
|
|
@@ -443,7 +443,7 @@ class GraphOptimizationConfig(BaseModel):
|
|
|
443
443
|
# Format: {"name": str, "task_description": str, "examples": [...]}
|
|
444
444
|
dataset: Optional[Dict[str, Any]] = Field(
|
|
445
445
|
default=None,
|
|
446
|
-
description="Inline dataset for upload (
|
|
446
|
+
description="Inline dataset for upload (GraphGen format). If provided, dataset_name is used as identifier."
|
|
447
447
|
)
|
|
448
448
|
|
|
449
449
|
# Task context for initial graph generation (when dataset doesn't provide it)
|
|
@@ -464,8 +464,8 @@ class GraphOptimizationConfig(BaseModel):
|
|
|
464
464
|
)
|
|
465
465
|
|
|
466
466
|
# Scoring configuration
|
|
467
|
-
|
|
468
|
-
|
|
467
|
+
verifier_mode: str = Field(default="rubric", description="Verifier mode: 'rubric', 'contrastive', 'fewshot'")
|
|
468
|
+
verifier_model: str = Field(default="gpt-4o-mini", description="Model for LLM verifier scoring")
|
|
469
469
|
|
|
470
470
|
@field_validator("graph_type", mode="before")
|
|
471
471
|
@classmethod
|
|
@@ -529,8 +529,8 @@ class GraphOptimizationConfig(BaseModel):
|
|
|
529
529
|
"graph_structure": self.graph_structure.value,
|
|
530
530
|
"allowed_policy_models": self.allowed_policy_models,
|
|
531
531
|
"dataset_config": self.dataset_config,
|
|
532
|
-
"
|
|
533
|
-
"
|
|
532
|
+
"verifier_mode": self.verifier_mode,
|
|
533
|
+
"verifier_model": self.verifier_model,
|
|
534
534
|
}
|
|
535
535
|
|
|
536
536
|
if self.max_llm_calls_per_run is not None:
|
|
@@ -551,19 +551,19 @@ class GraphOptimizationConfig(BaseModel):
|
|
|
551
551
|
if self.dataset:
|
|
552
552
|
# Validate dataset structure using Pydantic model
|
|
553
553
|
try:
|
|
554
|
-
validated =
|
|
554
|
+
validated = GraphOptDataset(**self.dataset)
|
|
555
555
|
# Check for task ID consistency (non-fatal warnings)
|
|
556
556
|
warnings = validated.validate_task_ids()
|
|
557
557
|
if warnings:
|
|
558
558
|
import logging
|
|
559
559
|
logger = logging.getLogger(__name__)
|
|
560
560
|
for w in warnings:
|
|
561
|
-
logger.warning(f"[
|
|
561
|
+
logger.warning(f"[GraphOptDataset] {w}")
|
|
562
562
|
except Exception as e:
|
|
563
563
|
raise ValueError(
|
|
564
|
-
f"Invalid
|
|
564
|
+
f"Invalid Graph Opt dataset format: {e}\n"
|
|
565
565
|
f"Expected format: {{'tasks': [...], 'gold_outputs': [...], 'metadata': {{...}}}}\n"
|
|
566
|
-
f"See
|
|
566
|
+
f"See GraphOptDataset model for full schema.\n"
|
|
567
567
|
f"Got keys: {list(self.dataset.keys())}"
|
|
568
568
|
)
|
|
569
569
|
request["dataset"] = self.dataset
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Dataset converters for Graph GEPA.
|
|
2
2
|
|
|
3
3
|
This module provides converters to transform common dataset formats
|
|
4
|
-
into
|
|
4
|
+
into Graph Opt format for use with Graph GEPA optimization.
|
|
5
5
|
|
|
6
6
|
Supported formats:
|
|
7
7
|
- OpenAI SFT: JSONL with messages array (system, user, assistant roles)
|
|
@@ -11,13 +11,13 @@ Example:
|
|
|
11
11
|
>>>
|
|
12
12
|
>>> # Convert from file
|
|
13
13
|
>>> result = convert_openai_sft("training_data.jsonl")
|
|
14
|
-
>>>
|
|
14
|
+
>>> graph_opt_dataset = result.dataset
|
|
15
15
|
>>>
|
|
16
16
|
>>> # Use in GraphOptimizationConfig
|
|
17
17
|
>>> from synth_ai.products.graph_gepa import GraphOptimizationConfig
|
|
18
18
|
>>> config = GraphOptimizationConfig(
|
|
19
19
|
... dataset_name="my_qa_task",
|
|
20
|
-
... dataset=
|
|
20
|
+
... dataset=graph_opt_dataset,
|
|
21
21
|
... graph_type="policy",
|
|
22
22
|
... ...
|
|
23
23
|
... )
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
"""OpenAI SFT format to
|
|
1
|
+
"""OpenAI SFT format to Graph Opt dataset converter.
|
|
2
2
|
|
|
3
|
-
This module converts OpenAI SFT format (JSONL with messages array) to
|
|
3
|
+
This module converts OpenAI SFT format (JSONL with messages array) to Graph Opt format
|
|
4
4
|
for use with Graph GEPA optimization.
|
|
5
5
|
|
|
6
6
|
Example OpenAI SFT format:
|
|
@@ -10,7 +10,7 @@ Example OpenAI SFT format:
|
|
|
10
10
|
{"role": "assistant", "content": "Paris"}
|
|
11
11
|
]}
|
|
12
12
|
|
|
13
|
-
Example
|
|
13
|
+
Example Graph Opt output:
|
|
14
14
|
{
|
|
15
15
|
"tasks": [{"task_id": "sft_0000", "input": {"user_message": "..."}}],
|
|
16
16
|
"gold_outputs": [{"task_id": "sft_0000", "output": {"response": "..."}, "score": 1.0}],
|
|
@@ -59,10 +59,10 @@ class ConversionWarning:
|
|
|
59
59
|
|
|
60
60
|
@dataclass
|
|
61
61
|
class ConversionResult:
|
|
62
|
-
"""Result of converting SFT to
|
|
62
|
+
"""Result of converting SFT to Graph Opt.
|
|
63
63
|
|
|
64
64
|
Attributes:
|
|
65
|
-
dataset: The
|
|
65
|
+
dataset: The Graph Opt dataset dict
|
|
66
66
|
warnings: Non-fatal issues encountered
|
|
67
67
|
stats: Conversion statistics
|
|
68
68
|
"""
|
|
@@ -343,7 +343,7 @@ def convert_openai_sft(
|
|
|
343
343
|
detect_template: bool = True,
|
|
344
344
|
max_examples: int | None = None,
|
|
345
345
|
) -> ConversionResult:
|
|
346
|
-
"""Convert OpenAI SFT format to
|
|
346
|
+
"""Convert OpenAI SFT format to Graph Opt dataset.
|
|
347
347
|
|
|
348
348
|
Args:
|
|
349
349
|
source: Path to JSONL file, or list of SFT example dicts
|
|
@@ -352,7 +352,7 @@ def convert_openai_sft(
|
|
|
352
352
|
max_examples: Maximum number of examples to include (None for all)
|
|
353
353
|
|
|
354
354
|
Returns:
|
|
355
|
-
ConversionResult containing the
|
|
355
|
+
ConversionResult containing the Graph Opt dataset, warnings, and stats
|
|
356
356
|
|
|
357
357
|
Raises:
|
|
358
358
|
ConversionError: If no valid examples found
|
|
@@ -7,7 +7,7 @@ algorithm = "graph_gepa"
|
|
|
7
7
|
|
|
8
8
|
# What we're optimizing
|
|
9
9
|
dataset_name = "hotpotqa"
|
|
10
|
-
graph_type = "policy" # "policy" (solves tasks), "verifier" (
|
|
10
|
+
graph_type = "policy" # "policy" (solves tasks), "verifier" (verifies results), or "rlm" (massive context via tools)
|
|
11
11
|
graph_structure = "dag" # "single_prompt", "dag", or "conditional"
|
|
12
12
|
|
|
13
13
|
# Custom topology guidance (optional - adds detail to graph_structure)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Backward-compatible Graph GEPA package alias."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from synth_ai.products.graph_evolve import (
|
|
6
|
+
GraphOptimizationClient,
|
|
7
|
+
GraphOptimizationConfig,
|
|
8
|
+
ConversionError,
|
|
9
|
+
ConversionResult,
|
|
10
|
+
ConversionWarning,
|
|
11
|
+
convert_openai_sft,
|
|
12
|
+
preview_conversion,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"GraphOptimizationConfig",
|
|
17
|
+
"GraphOptimizationClient",
|
|
18
|
+
"convert_openai_sft",
|
|
19
|
+
"preview_conversion",
|
|
20
|
+
"ConversionResult",
|
|
21
|
+
"ConversionWarning",
|
|
22
|
+
"ConversionError",
|
|
23
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Graph GEPA converters (compatibility layer)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from synth_ai.products.graph_evolve.converters import (
|
|
6
|
+
ConversionError,
|
|
7
|
+
ConversionResult,
|
|
8
|
+
ConversionWarning,
|
|
9
|
+
convert_openai_sft,
|
|
10
|
+
preview_conversion,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"convert_openai_sft",
|
|
15
|
+
"preview_conversion",
|
|
16
|
+
"ConversionResult",
|
|
17
|
+
"ConversionWarning",
|
|
18
|
+
"ConversionError",
|
|
19
|
+
]
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Compatibility wrapper for OpenAI SFT converters."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from synth_ai.products.graph_evolve.converters.openai_sft import (
|
|
6
|
+
ConversionError,
|
|
7
|
+
ConversionResult,
|
|
8
|
+
ConversionWarning,
|
|
9
|
+
convert_openai_sft,
|
|
10
|
+
detect_system_prompt,
|
|
11
|
+
extract_fields,
|
|
12
|
+
infer_template,
|
|
13
|
+
parse_sft_example,
|
|
14
|
+
preview_conversion,
|
|
15
|
+
validate_sft_examples,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"ConversionError",
|
|
20
|
+
"ConversionResult",
|
|
21
|
+
"ConversionWarning",
|
|
22
|
+
"convert_openai_sft",
|
|
23
|
+
"detect_system_prompt",
|
|
24
|
+
"extract_fields",
|
|
25
|
+
"infer_template",
|
|
26
|
+
"parse_sft_example",
|
|
27
|
+
"preview_conversion",
|
|
28
|
+
"validate_sft_examples",
|
|
29
|
+
]
|
synth_ai/sdk/__init__.py
CHANGED
|
@@ -1,19 +1,16 @@
|
|
|
1
1
|
"""Synth AI SDK Layer.
|
|
2
2
|
|
|
3
3
|
This module provides the user-facing programmatic API for:
|
|
4
|
-
- Training (prompt learning, SFT, RL)
|
|
4
|
+
- Training (prompt learning, SFT, RL, graph generation)
|
|
5
5
|
- Task apps (in-process, deployed, Modal)
|
|
6
|
-
-
|
|
6
|
+
- Graphs (verifiers, completions)
|
|
7
7
|
- Inference (model inference via Synth)
|
|
8
|
-
- Tracing (session traces)
|
|
9
|
-
- Specs (system specifications)
|
|
10
|
-
- Research agents (scaffold tuning, evaluation)
|
|
11
8
|
|
|
12
9
|
Usage:
|
|
13
10
|
from synth_ai.sdk import (
|
|
14
11
|
PromptLearningJob,
|
|
15
12
|
InProcessTaskApp,
|
|
16
|
-
|
|
13
|
+
VerifierClient,
|
|
17
14
|
InferenceClient,
|
|
18
15
|
)
|
|
19
16
|
|
|
@@ -24,32 +21,22 @@ Dependency rules:
|
|
|
24
21
|
|
|
25
22
|
from __future__ import annotations
|
|
26
23
|
|
|
27
|
-
# Research Agent
|
|
28
|
-
from synth_ai.sdk.api.research_agent import ResearchAgentJob, ResearchAgentJobConfig
|
|
29
|
-
|
|
30
24
|
# Inference
|
|
31
25
|
from synth_ai.sdk.inference import InferenceClient
|
|
32
26
|
|
|
33
27
|
# Jobs API Client
|
|
34
28
|
from synth_ai.sdk.jobs import JobsClient
|
|
35
29
|
|
|
36
|
-
#
|
|
37
|
-
from synth_ai.sdk.judging import JudgeClient, JudgeOptions, JudgeScoreResponse
|
|
30
|
+
# Verifier types and graph clients
|
|
38
31
|
from synth_ai.sdk.graphs import GraphCompletionsClient, GraphTarget, VerifierClient
|
|
39
|
-
|
|
40
|
-
# Specs
|
|
41
|
-
from synth_ai.sdk.specs import (
|
|
42
|
-
load_spec_from_dict,
|
|
43
|
-
load_spec_from_file,
|
|
44
|
-
spec_to_prompt_context,
|
|
45
|
-
validate_spec_dict,
|
|
46
|
-
validate_spec_file,
|
|
47
|
-
)
|
|
32
|
+
from synth_ai.sdk.graphs.verifier_schemas import VerifierOptions, VerifierScoreResponse
|
|
48
33
|
|
|
49
34
|
# Task Apps
|
|
50
35
|
from synth_ai.sdk.task import (
|
|
51
36
|
InProcessJobResult,
|
|
52
37
|
InProcessTaskApp,
|
|
38
|
+
LocalAPIClient,
|
|
39
|
+
LocalAPIConfig,
|
|
53
40
|
TaskAppConfig,
|
|
54
41
|
create_task_app,
|
|
55
42
|
merge_dot_overrides,
|
|
@@ -69,10 +56,27 @@ from synth_ai.sdk.training import (
|
|
|
69
56
|
GraphGenTask,
|
|
70
57
|
GraphGenGoldOutput,
|
|
71
58
|
GraphGenRubric,
|
|
72
|
-
|
|
59
|
+
GraphGenVerifierConfig,
|
|
73
60
|
load_graphgen_taskset,
|
|
74
61
|
)
|
|
75
62
|
|
|
63
|
+
# Evaluation
|
|
64
|
+
from synth_ai.sdk.api.eval import EvalJob, EvalJobConfig
|
|
65
|
+
|
|
66
|
+
# Tunnels - commonly used functions for notebook/script usage
|
|
67
|
+
from synth_ai.sdk.tunnels import (
|
|
68
|
+
rotate_tunnel,
|
|
69
|
+
open_managed_tunnel,
|
|
70
|
+
stop_tunnel,
|
|
71
|
+
track_process,
|
|
72
|
+
cleanup_all,
|
|
73
|
+
verify_tunnel_dns_resolution,
|
|
74
|
+
wait_for_health_check,
|
|
75
|
+
kill_port,
|
|
76
|
+
is_port_available,
|
|
77
|
+
find_available_port,
|
|
78
|
+
)
|
|
79
|
+
|
|
76
80
|
__all__ = [
|
|
77
81
|
# Training
|
|
78
82
|
"PromptLearningJob",
|
|
@@ -85,8 +89,11 @@ __all__ = [
|
|
|
85
89
|
"GraphGenTask",
|
|
86
90
|
"GraphGenGoldOutput",
|
|
87
91
|
"GraphGenRubric",
|
|
88
|
-
"
|
|
92
|
+
"GraphGenVerifierConfig",
|
|
89
93
|
"load_graphgen_taskset",
|
|
94
|
+
# Evaluation
|
|
95
|
+
"EvalJob",
|
|
96
|
+
"EvalJobConfig",
|
|
90
97
|
# Task Apps
|
|
91
98
|
"InProcessTaskApp",
|
|
92
99
|
"InProcessJobResult",
|
|
@@ -94,26 +101,29 @@ __all__ = [
|
|
|
94
101
|
"resolve_backend_api_base",
|
|
95
102
|
"run_in_process_job",
|
|
96
103
|
"run_in_process_job_sync",
|
|
104
|
+
"LocalAPIClient",
|
|
105
|
+
"LocalAPIConfig",
|
|
97
106
|
"TaskAppConfig",
|
|
98
107
|
"create_task_app",
|
|
99
|
-
#
|
|
100
|
-
"JudgeClient",
|
|
108
|
+
# Graphs / Verifier
|
|
101
109
|
"VerifierClient",
|
|
102
|
-
"
|
|
103
|
-
"
|
|
110
|
+
"VerifierOptions",
|
|
111
|
+
"VerifierScoreResponse",
|
|
104
112
|
"GraphCompletionsClient",
|
|
105
113
|
"GraphTarget",
|
|
106
114
|
# Inference
|
|
107
115
|
"InferenceClient",
|
|
108
|
-
# Specs
|
|
109
|
-
"load_spec_from_dict",
|
|
110
|
-
"load_spec_from_file",
|
|
111
|
-
"spec_to_prompt_context",
|
|
112
|
-
"validate_spec_dict",
|
|
113
|
-
"validate_spec_file",
|
|
114
|
-
# Research Agent
|
|
115
|
-
"ResearchAgentJob",
|
|
116
|
-
"ResearchAgentJobConfig",
|
|
117
116
|
# Jobs API Client
|
|
118
117
|
"JobsClient",
|
|
118
|
+
# Tunnels
|
|
119
|
+
"rotate_tunnel",
|
|
120
|
+
"open_managed_tunnel",
|
|
121
|
+
"stop_tunnel",
|
|
122
|
+
"track_process",
|
|
123
|
+
"cleanup_all",
|
|
124
|
+
"verify_tunnel_dns_resolution",
|
|
125
|
+
"wait_for_health_check",
|
|
126
|
+
"kill_port",
|
|
127
|
+
"is_port_available",
|
|
128
|
+
"find_available_port",
|
|
119
129
|
]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""First-class SDK API for evaluation jobs.
|
|
2
|
+
|
|
3
|
+
This module provides high-level abstractions for running evaluation jobs
|
|
4
|
+
both via CLI and programmatically in Python scripts.
|
|
5
|
+
|
|
6
|
+
Example CLI usage:
|
|
7
|
+
python -m synth_ai.cli eval --config banking77_eval.toml --backend http://localhost:8000
|
|
8
|
+
|
|
9
|
+
Example SDK usage:
|
|
10
|
+
from synth_ai.sdk.api.eval import EvalJob, EvalResult
|
|
11
|
+
|
|
12
|
+
job = EvalJob(config)
|
|
13
|
+
job.submit()
|
|
14
|
+
|
|
15
|
+
# progress=True provides built-in status printing:
|
|
16
|
+
# [00:05] running | 3/10 completed
|
|
17
|
+
# [00:10] running | 7/10 completed
|
|
18
|
+
# [00:15] completed | mean_score: 0.85
|
|
19
|
+
result = job.poll_until_complete(progress=True)
|
|
20
|
+
|
|
21
|
+
# Typed result access (not raw dict)
|
|
22
|
+
if result.succeeded:
|
|
23
|
+
print(f"Mean score: {result.mean_score}")
|
|
24
|
+
print(f"Total cost: ${result.total_cost_usd:.4f}")
|
|
25
|
+
|
|
26
|
+
See Also:
|
|
27
|
+
- `synth_ai.cli.commands.eval`: CLI implementation
|
|
28
|
+
- Backend API: POST /api/eval/jobs
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from .job import EvalJob, EvalJobConfig, EvalResult, EvalStatus
|
|
32
|
+
|
|
33
|
+
__all__ = ["EvalJob", "EvalJobConfig", "EvalResult", "EvalStatus"]
|