synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +13 -13
- synth_ai/cli/__init__.py +6 -15
- synth_ai/cli/commands/eval/__init__.py +6 -15
- synth_ai/cli/commands/eval/config.py +338 -0
- synth_ai/cli/commands/eval/core.py +236 -1091
- synth_ai/cli/commands/eval/runner.py +704 -0
- synth_ai/cli/commands/eval/validation.py +44 -117
- synth_ai/cli/commands/filter/core.py +7 -7
- synth_ai/cli/commands/filter/validation.py +2 -2
- synth_ai/cli/commands/smoke/core.py +7 -17
- synth_ai/cli/commands/status/__init__.py +1 -64
- synth_ai/cli/commands/status/client.py +50 -151
- synth_ai/cli/commands/status/config.py +3 -83
- synth_ai/cli/commands/status/errors.py +4 -13
- synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
- synth_ai/cli/commands/status/subcommands/config.py +13 -0
- synth_ai/cli/commands/status/subcommands/files.py +18 -63
- synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
- synth_ai/cli/commands/status/subcommands/models.py +18 -62
- synth_ai/cli/commands/status/subcommands/runs.py +16 -63
- synth_ai/cli/commands/status/subcommands/session.py +67 -172
- synth_ai/cli/commands/status/subcommands/summary.py +24 -32
- synth_ai/cli/commands/status/subcommands/utils.py +41 -0
- synth_ai/cli/commands/status/utils.py +16 -107
- synth_ai/cli/commands/train/__init__.py +18 -20
- synth_ai/cli/commands/train/errors.py +3 -3
- synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
- synth_ai/cli/commands/train/validation.py +7 -7
- synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
- synth_ai/cli/commands/train/verifier_validation.py +235 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
- synth_ai/cli/lib/apps/task_app.py +12 -13
- synth_ai/cli/lib/task_app_discovery.py +6 -6
- synth_ai/cli/lib/train_cfgs.py +10 -10
- synth_ai/cli/task_apps/__init__.py +11 -0
- synth_ai/cli/task_apps/commands.py +7 -15
- synth_ai/core/env.py +12 -1
- synth_ai/core/errors.py +1 -2
- synth_ai/core/integrations/cloudflare.py +209 -33
- synth_ai/core/tracing_v3/abstractions.py +46 -0
- synth_ai/data/__init__.py +3 -30
- synth_ai/data/enums.py +1 -20
- synth_ai/data/rewards.py +100 -3
- synth_ai/products/graph_evolve/__init__.py +1 -2
- synth_ai/products/graph_evolve/config.py +16 -16
- synth_ai/products/graph_evolve/converters/__init__.py +3 -3
- synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
- synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
- synth_ai/products/graph_gepa/__init__.py +23 -0
- synth_ai/products/graph_gepa/converters/__init__.py +19 -0
- synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
- synth_ai/sdk/__init__.py +45 -35
- synth_ai/sdk/api/eval/__init__.py +33 -0
- synth_ai/sdk/api/eval/job.py +732 -0
- synth_ai/sdk/api/research_agent/__init__.py +276 -66
- synth_ai/sdk/api/train/builders.py +181 -0
- synth_ai/sdk/api/train/cli.py +41 -33
- synth_ai/sdk/api/train/configs/__init__.py +6 -4
- synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
- synth_ai/sdk/api/train/configs/rl.py +264 -16
- synth_ai/sdk/api/train/configs/sft.py +165 -1
- synth_ai/sdk/api/train/graph_validators.py +12 -12
- synth_ai/sdk/api/train/graphgen.py +169 -51
- synth_ai/sdk/api/train/graphgen_models.py +95 -45
- synth_ai/sdk/api/train/local_api.py +10 -0
- synth_ai/sdk/api/train/pollers.py +36 -0
- synth_ai/sdk/api/train/prompt_learning.py +390 -60
- synth_ai/sdk/api/train/rl.py +41 -5
- synth_ai/sdk/api/train/sft.py +2 -0
- synth_ai/sdk/api/train/task_app.py +20 -0
- synth_ai/sdk/api/train/validators.py +17 -17
- synth_ai/sdk/graphs/completions.py +239 -33
- synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
- synth_ai/sdk/learning/__init__.py +35 -5
- synth_ai/sdk/learning/context_learning_client.py +531 -0
- synth_ai/sdk/learning/context_learning_types.py +294 -0
- synth_ai/sdk/learning/prompt_learning_client.py +1 -1
- synth_ai/sdk/learning/prompt_learning_types.py +2 -1
- synth_ai/sdk/learning/rl/__init__.py +0 -4
- synth_ai/sdk/learning/rl/contracts.py +0 -4
- synth_ai/sdk/localapi/__init__.py +40 -0
- synth_ai/sdk/localapi/apps/__init__.py +28 -0
- synth_ai/sdk/localapi/client.py +10 -0
- synth_ai/sdk/localapi/contracts.py +10 -0
- synth_ai/sdk/localapi/helpers.py +519 -0
- synth_ai/sdk/localapi/rollouts.py +93 -0
- synth_ai/sdk/localapi/server.py +29 -0
- synth_ai/sdk/localapi/template.py +49 -0
- synth_ai/sdk/streaming/handlers.py +6 -6
- synth_ai/sdk/streaming/streamer.py +10 -6
- synth_ai/sdk/task/__init__.py +18 -5
- synth_ai/sdk/task/apps/__init__.py +37 -1
- synth_ai/sdk/task/client.py +9 -1
- synth_ai/sdk/task/config.py +6 -11
- synth_ai/sdk/task/contracts.py +137 -95
- synth_ai/sdk/task/in_process.py +32 -22
- synth_ai/sdk/task/in_process_runner.py +9 -4
- synth_ai/sdk/task/rubrics/__init__.py +2 -3
- synth_ai/sdk/task/rubrics/loaders.py +4 -4
- synth_ai/sdk/task/rubrics/strict.py +3 -4
- synth_ai/sdk/task/server.py +76 -16
- synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
- synth_ai/sdk/task/validators.py +34 -49
- synth_ai/sdk/training/__init__.py +7 -16
- synth_ai/sdk/tunnels/__init__.py +118 -0
- synth_ai/sdk/tunnels/cleanup.py +83 -0
- synth_ai/sdk/tunnels/ports.py +120 -0
- synth_ai/sdk/tunnels/tunneled_api.py +363 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
- synth_ai/cli/commands/baseline/__init__.py +0 -12
- synth_ai/cli/commands/baseline/core.py +0 -636
- synth_ai/cli/commands/baseline/list.py +0 -94
- synth_ai/cli/commands/eval/errors.py +0 -81
- synth_ai/cli/commands/status/formatters.py +0 -164
- synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
- synth_ai/cli/commands/status/subcommands/usage.py +0 -203
- synth_ai/cli/commands/train/judge_validation.py +0 -305
- synth_ai/cli/usage.py +0 -159
- synth_ai/data/specs.py +0 -36
- synth_ai/sdk/api/research_agent/cli.py +0 -428
- synth_ai/sdk/api/research_agent/config.py +0 -357
- synth_ai/sdk/api/research_agent/job.py +0 -717
- synth_ai/sdk/baseline/__init__.py +0 -25
- synth_ai/sdk/baseline/config.py +0 -209
- synth_ai/sdk/baseline/discovery.py +0 -216
- synth_ai/sdk/baseline/execution.py +0 -154
- synth_ai/sdk/judging/__init__.py +0 -15
- synth_ai/sdk/judging/base.py +0 -24
- synth_ai/sdk/judging/client.py +0 -191
- synth_ai/sdk/judging/types.py +0 -42
- synth_ai/sdk/research_agent/__init__.py +0 -34
- synth_ai/sdk/research_agent/container_builder.py +0 -328
- synth_ai/sdk/research_agent/container_spec.py +0 -198
- synth_ai/sdk/research_agent/defaults.py +0 -34
- synth_ai/sdk/research_agent/results_collector.py +0 -69
- synth_ai/sdk/specs/__init__.py +0 -46
- synth_ai/sdk/specs/dataclasses.py +0 -149
- synth_ai/sdk/specs/loader.py +0 -144
- synth_ai/sdk/specs/serializer.py +0 -199
- synth_ai/sdk/specs/validation.py +0 -250
- synth_ai/sdk/tracing/__init__.py +0 -39
- synth_ai/sdk/usage/__init__.py +0 -37
- synth_ai/sdk/usage/client.py +0 -171
- synth_ai/sdk/usage/models.py +0 -261
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
"""GraphGen (
|
|
1
|
+
"""GraphGen (Graph Opt) data models.
|
|
2
2
|
|
|
3
3
|
This module provides Pydantic models for defining GraphGen datasets and job configurations.
|
|
4
4
|
GraphGen is a simplified "Workflows API" for prompt optimization that wraps GEPA with
|
|
5
|
-
auto-generated task apps and built-in
|
|
5
|
+
auto-generated task apps and built-in verifier configurations.
|
|
6
6
|
|
|
7
7
|
Example:
|
|
8
8
|
from synth_ai.sdk.api.train.graphgen_models import (
|
|
@@ -11,6 +11,7 @@ Example:
|
|
|
11
11
|
GraphGenGoldOutput,
|
|
12
12
|
GraphGenRubric,
|
|
13
13
|
GraphGenJobConfig,
|
|
14
|
+
GraphGenVerifierConfig,
|
|
14
15
|
)
|
|
15
16
|
|
|
16
17
|
# Create a dataset
|
|
@@ -24,7 +25,7 @@ Example:
|
|
|
24
25
|
GraphGenGoldOutput(output={"answer": "4"}, task_id="task1"),
|
|
25
26
|
GraphGenGoldOutput(output={"answer": "Paris"}, task_id="task2"),
|
|
26
27
|
],
|
|
27
|
-
|
|
28
|
+
verifier_config=GraphGenVerifierConfig(mode="rubric"),
|
|
28
29
|
)
|
|
29
30
|
"""
|
|
30
31
|
|
|
@@ -201,7 +202,7 @@ class GraphGenGoldOutput(BaseModel):
|
|
|
201
202
|
"""A gold/reference output.
|
|
202
203
|
|
|
203
204
|
Can be linked to a specific task via task_id, or standalone (for reference examples).
|
|
204
|
-
Standalone gold outputs (no task_id) are used as reference pool for contrastive
|
|
205
|
+
Standalone gold outputs (no task_id) are used as reference pool for contrastive verification.
|
|
205
206
|
"""
|
|
206
207
|
|
|
207
208
|
output: Dict[str, Any] = Field(
|
|
@@ -217,16 +218,16 @@ class GraphGenGoldOutput(BaseModel):
|
|
|
217
218
|
|
|
218
219
|
|
|
219
220
|
# Improvement 4: Define supported providers as a Literal type
|
|
220
|
-
|
|
221
|
+
VerifierProviderType = Literal["groq", "openai", "google", "anthropic"]
|
|
221
222
|
|
|
222
223
|
|
|
223
|
-
class
|
|
224
|
-
"""Configuration for the
|
|
224
|
+
class GraphGenVerifierConfig(BaseModel):
|
|
225
|
+
"""Configuration for the verifier used during optimization."""
|
|
225
226
|
|
|
226
227
|
mode: Literal["rubric", "contrastive", "gold_examples"] = Field(
|
|
227
228
|
default="rubric",
|
|
228
229
|
description=(
|
|
229
|
-
"
|
|
230
|
+
"Verifier mode: "
|
|
230
231
|
"'rubric' = evaluate against criteria, "
|
|
231
232
|
"'contrastive' = compare to gold output, "
|
|
232
233
|
"'gold_examples' = use gold examples as few-shot context"
|
|
@@ -234,12 +235,12 @@ class GraphGenJudgeConfig(BaseModel):
|
|
|
234
235
|
)
|
|
235
236
|
model: str = Field(
|
|
236
237
|
default="llama-3.3-70b-versatile",
|
|
237
|
-
description="Model to use for
|
|
238
|
+
description="Model to use for verification",
|
|
238
239
|
)
|
|
239
240
|
# Improvement 4: Changed from str to Literal type for better type safety
|
|
240
|
-
provider:
|
|
241
|
+
provider: VerifierProviderType = Field(
|
|
241
242
|
default="groq",
|
|
242
|
-
description="Provider for
|
|
243
|
+
description="Provider for verifier model (groq, openai, google, anthropic)",
|
|
243
244
|
)
|
|
244
245
|
|
|
245
246
|
|
|
@@ -247,7 +248,7 @@ class GraphGenTaskSet(BaseModel):
|
|
|
247
248
|
"""The complete GraphGen dataset format.
|
|
248
249
|
|
|
249
250
|
Contains tasks with arbitrary JSON inputs, gold outputs (optionally linked to tasks),
|
|
250
|
-
rubrics (task-specific and/or default), and
|
|
251
|
+
rubrics (task-specific and/or default), and verifier configuration.
|
|
251
252
|
|
|
252
253
|
Example:
|
|
253
254
|
dataset = GraphGenTaskSet(
|
|
@@ -274,9 +275,9 @@ class GraphGenTaskSet(BaseModel):
|
|
|
274
275
|
default=None,
|
|
275
276
|
description="Default rubric applied to all tasks (merged with task-specific rubrics)",
|
|
276
277
|
)
|
|
277
|
-
|
|
278
|
-
default_factory=
|
|
279
|
-
description="Configuration for the
|
|
278
|
+
verifier_config: GraphGenVerifierConfig = Field(
|
|
279
|
+
default_factory=GraphGenVerifierConfig,
|
|
280
|
+
description="Configuration for the verifier",
|
|
280
281
|
)
|
|
281
282
|
# Optional schemas (also accepted at top-level for backward/forward compatibility).
|
|
282
283
|
input_schema: Optional[Dict[str, Any]] = Field(
|
|
@@ -417,7 +418,7 @@ class GraphGenTaskSet(BaseModel):
|
|
|
417
418
|
return None
|
|
418
419
|
|
|
419
420
|
def get_standalone_gold_outputs(self) -> List[GraphGenGoldOutput]:
|
|
420
|
-
"""Get gold outputs not linked to any task (reference pool for contrastive
|
|
421
|
+
"""Get gold outputs not linked to any task (reference pool for contrastive verifier)."""
|
|
421
422
|
return [gold for gold in self.gold_outputs if gold.task_id is None]
|
|
422
423
|
|
|
423
424
|
|
|
@@ -445,8 +446,8 @@ SUPPORTED_POLICY_MODELS = {
|
|
|
445
446
|
"claude-3-5-haiku-latest",
|
|
446
447
|
}
|
|
447
448
|
|
|
448
|
-
# Supported
|
|
449
|
-
|
|
449
|
+
# Supported verifier models
|
|
450
|
+
SUPPORTED_VERIFIER_MODELS = {
|
|
450
451
|
# Groq (fast, cheap)
|
|
451
452
|
"llama-3.3-70b-versatile",
|
|
452
453
|
"llama-3.1-70b-versatile",
|
|
@@ -457,8 +458,8 @@ SUPPORTED_JUDGE_MODELS = {
|
|
|
457
458
|
|
|
458
459
|
# Default models
|
|
459
460
|
DEFAULT_POLICY_MODEL = "gpt-4o-mini"
|
|
460
|
-
|
|
461
|
-
|
|
461
|
+
DEFAULT_VERIFIER_MODEL = "llama-3.3-70b-versatile"
|
|
462
|
+
DEFAULT_VERIFIER_PROVIDER = "groq"
|
|
462
463
|
|
|
463
464
|
|
|
464
465
|
class EventInput(BaseModel):
|
|
@@ -484,7 +485,7 @@ class SessionTimeStepInput(BaseModel):
|
|
|
484
485
|
|
|
485
486
|
|
|
486
487
|
class SessionTraceInput(BaseModel):
|
|
487
|
-
"""V3-compatible session trace input for
|
|
488
|
+
"""V3-compatible session trace input for verifier evaluation."""
|
|
488
489
|
|
|
489
490
|
model_config = ConfigDict(extra="allow")
|
|
490
491
|
|
|
@@ -505,7 +506,7 @@ class SessionTraceInput(BaseModel):
|
|
|
505
506
|
return data
|
|
506
507
|
|
|
507
508
|
|
|
508
|
-
class
|
|
509
|
+
class GraphGenGraphVerifierRequest(BaseModel):
|
|
509
510
|
"""Request for verifier graph inference."""
|
|
510
511
|
|
|
511
512
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -576,7 +577,7 @@ class OutcomeRewardResponse(BaseModel):
|
|
|
576
577
|
annotation: Optional[Dict[str, Any]] = Field(default=None, description="Additional annotations (free-form)")
|
|
577
578
|
|
|
578
579
|
|
|
579
|
-
class
|
|
580
|
+
class GraphGenGraphVerifierResponse(BaseModel):
|
|
580
581
|
"""Response from verifier graph inference."""
|
|
581
582
|
|
|
582
583
|
started_at: datetime = Field(..., description="When inference request started (UTC)")
|
|
@@ -589,32 +590,81 @@ class GraphGenGraphJudgeResponse(BaseModel):
|
|
|
589
590
|
event_rewards: List[EventRewardResponse] = Field(default_factory=list, description="Per-event rewards")
|
|
590
591
|
outcome_reward: Optional[OutcomeRewardResponse] = Field(default=None, description="Episode-level outcome reward")
|
|
591
592
|
|
|
592
|
-
# Legacy fields (kept for backward compatibility)
|
|
593
|
-
score: float = Field(..., ge=0.0, le=1.0, description="Evaluation score (0-1)")
|
|
594
|
-
reasoning: Optional[str] = Field(default=None, description="Explanation for the score")
|
|
595
|
-
sub_scores: Optional[Dict[str, float]] = Field(default=None, description="Breakdown scores by criteria")
|
|
596
593
|
raw_output: Optional[Dict[str, Any]] = Field(default=None, description="Full raw output from the verifier graph")
|
|
597
594
|
|
|
598
595
|
usage: List[GraphGenGraphCompletionsModelUsage] = Field(default_factory=list, description="Token usage per model")
|
|
599
596
|
|
|
600
597
|
|
|
601
|
-
class GraphGenGraphVerifierRequest(GraphGenGraphJudgeRequest):
|
|
602
|
-
"""Alias for GraphGenGraphJudgeRequest with verifier terminology."""
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
class GraphGenGraphVerifierResponse(GraphGenGraphJudgeResponse):
|
|
606
|
-
"""Alias for GraphGenGraphJudgeResponse with verifier terminology."""
|
|
607
598
|
|
|
608
599
|
|
|
609
600
|
class GraphGenJobConfig(BaseModel):
|
|
610
|
-
"""Configuration for
|
|
601
|
+
"""Configuration for a GraphGen (Graph Opt) optimization job.
|
|
602
|
+
|
|
603
|
+
GraphGen provides a simplified API for training optimized graphs/workflows without
|
|
604
|
+
managing task apps manually. It supports three graph types:
|
|
605
|
+
- **policy**: Standard input-to-output graphs for classification, QA, generation
|
|
606
|
+
- **verifier**: Trace-to-score graphs for verifying/evaluating agent behavior
|
|
607
|
+
- **rlm**: Recursive Language Model graphs for massive contexts via tool-based search
|
|
611
608
|
|
|
612
609
|
Example:
|
|
610
|
+
```python
|
|
611
|
+
from synth_ai.sdk.api.train.graphgen_models import GraphGenJobConfig
|
|
612
|
+
|
|
613
613
|
config = GraphGenJobConfig(
|
|
614
|
+
graph_type="policy",
|
|
614
615
|
policy_model="gpt-4o-mini",
|
|
615
616
|
rollout_budget=100,
|
|
616
617
|
proposer_effort="medium",
|
|
618
|
+
problem_spec="Classify customer support messages into categories.",
|
|
617
619
|
)
|
|
620
|
+
```
|
|
621
|
+
|
|
622
|
+
Attributes:
|
|
623
|
+
graph_type: Type of graph - "policy", "verifier", or "rlm".
|
|
624
|
+
policy_model: Model for policy inference (e.g., "gpt-4o-mini", "claude-3-5-sonnet").
|
|
625
|
+
policy_provider: Provider for policy model (auto-detected if not specified).
|
|
626
|
+
rollout_budget: Total rollouts (evaluations) for optimization. Range: 10-10000.
|
|
627
|
+
proposer_effort: Mutation quality/cost level - "medium" or "high".
|
|
628
|
+
Note: "low" is not allowed (gpt-4.1-mini too weak for graph generation).
|
|
629
|
+
verifier_model: Override verifier model from dataset.
|
|
630
|
+
verifier_provider: Override verifier provider from dataset.
|
|
631
|
+
population_size: GEPA population size. Range: 2-20. Default: 4.
|
|
632
|
+
num_generations: Number of generations (auto-calculated from budget if not specified).
|
|
633
|
+
num_parents: Number of parents for selection. Range: 1-10. Default: 2.
|
|
634
|
+
evaluation_seeds: Specific seeds for evaluation (auto-generated if not specified).
|
|
635
|
+
problem_spec: Detailed problem specification for the graph proposer.
|
|
636
|
+
Include domain info like valid output labels, constraints, format requirements.
|
|
637
|
+
target_llm_calls: Target LLM calls per graph run (1-10). Default: 5.
|
|
638
|
+
configured_tools: Tool bindings for RLM graphs. Required for graph_type="rlm".
|
|
639
|
+
|
|
640
|
+
Returns:
|
|
641
|
+
After training completes via GraphGenJob, you receive a result dict:
|
|
642
|
+
```python
|
|
643
|
+
{
|
|
644
|
+
"status": "succeeded",
|
|
645
|
+
"graphgen_job_id": "graphgen_abc123",
|
|
646
|
+
"best_score": 0.89,
|
|
647
|
+
"best_snapshot_id": "snap_xyz789",
|
|
648
|
+
"dataset_name": "My Classification Tasks",
|
|
649
|
+
"task_count": 50,
|
|
650
|
+
}
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
Events:
|
|
654
|
+
During training, you'll receive streaming events via GraphGenJob.stream_until_complete():
|
|
655
|
+
- `graphgen.created` - Job created
|
|
656
|
+
- `graphgen.running` - Training started
|
|
657
|
+
- `graphgen.generation.started` - New generation of candidates started
|
|
658
|
+
- `graphgen.candidate.evaluated` - A candidate graph was evaluated
|
|
659
|
+
- `graphgen.generation.completed` - Generation finished with metrics
|
|
660
|
+
- `graphgen.optimization.completed` - Training finished successfully
|
|
661
|
+
- `graphgen.failed` - Job encountered an error
|
|
662
|
+
|
|
663
|
+
See Also:
|
|
664
|
+
- GraphGenJob: High-level SDK class for running jobs
|
|
665
|
+
- GraphGenTaskSet: Dataset format for tasks and gold outputs
|
|
666
|
+
- Training reference: /training/graph-evolve
|
|
667
|
+
- Quickstart: /quickstart/graph-evolve
|
|
618
668
|
"""
|
|
619
669
|
|
|
620
670
|
# Graph type
|
|
@@ -656,14 +706,14 @@ class GraphGenJobConfig(BaseModel):
|
|
|
656
706
|
),
|
|
657
707
|
)
|
|
658
708
|
|
|
659
|
-
#
|
|
660
|
-
|
|
709
|
+
# Verifier settings (if not specified in dataset)
|
|
710
|
+
verifier_model: Optional[str] = Field(
|
|
661
711
|
default=None,
|
|
662
|
-
description="Override
|
|
712
|
+
description="Override verifier model from dataset",
|
|
663
713
|
)
|
|
664
|
-
|
|
714
|
+
verifier_provider: Optional[str] = Field(
|
|
665
715
|
default=None,
|
|
666
|
-
description="Override
|
|
716
|
+
description="Override verifier provider from dataset",
|
|
667
717
|
)
|
|
668
718
|
|
|
669
719
|
# Advanced settings
|
|
@@ -792,7 +842,7 @@ GraphGenRubric = GraphGenRubric
|
|
|
792
842
|
GraphGenRubricCriterion = GraphGenRubricCriterion
|
|
793
843
|
GraphGenRubricOutcome = GraphGenRubricOutcome
|
|
794
844
|
GraphGenRubricEvents = GraphGenRubricEvents
|
|
795
|
-
|
|
845
|
+
GraphGenVerifierConfig = GraphGenVerifierConfig
|
|
796
846
|
GraphGenJobConfig = GraphGenJobConfig
|
|
797
847
|
parse_graphgen_taskset = parse_graphgen_taskset
|
|
798
848
|
load_graphgen_taskset = load_graphgen_taskset
|
|
@@ -800,7 +850,7 @@ load_graphgen_taskset = load_graphgen_taskset
|
|
|
800
850
|
__all__ = [
|
|
801
851
|
# Core types (new)
|
|
802
852
|
"OutputConfig",
|
|
803
|
-
"
|
|
853
|
+
"VerifierProviderType",
|
|
804
854
|
# GraphGen names (preferred)
|
|
805
855
|
"GraphGenTaskSet",
|
|
806
856
|
"GraphGenTaskSetMetadata",
|
|
@@ -810,14 +860,14 @@ __all__ = [
|
|
|
810
860
|
"GraphGenRubricCriterion",
|
|
811
861
|
"GraphGenRubricOutcome",
|
|
812
862
|
"GraphGenRubricEvents",
|
|
813
|
-
"
|
|
863
|
+
"GraphGenVerifierConfig",
|
|
814
864
|
"GraphGenJobConfig",
|
|
815
865
|
"parse_graphgen_taskset",
|
|
816
866
|
"load_graphgen_taskset",
|
|
817
867
|
# Constants
|
|
818
868
|
"SUPPORTED_POLICY_MODELS",
|
|
819
|
-
"
|
|
869
|
+
"SUPPORTED_VERIFIER_MODELS",
|
|
820
870
|
"DEFAULT_POLICY_MODEL",
|
|
821
|
-
"
|
|
822
|
-
"
|
|
871
|
+
"DEFAULT_VERIFIER_MODEL",
|
|
872
|
+
"DEFAULT_VERIFIER_PROVIDER",
|
|
823
873
|
]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""LocalAPI health helpers.
|
|
2
|
+
|
|
3
|
+
Prefer this module over synth_ai.sdk.api.train.task_app for LocalAPI naming.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from synth_ai.sdk.api.train.task_app import LocalAPIHealth, check_local_api_health
|
|
9
|
+
|
|
10
|
+
__all__ = ["LocalAPIHealth", "check_local_api_health"]
|
|
@@ -116,9 +116,45 @@ class PromptLearningJobPoller(JobPoller):
|
|
|
116
116
|
return super().poll(f"/api/prompt-learning/online/jobs/{job_id}")
|
|
117
117
|
|
|
118
118
|
|
|
119
|
+
class EvalJobPoller(JobPoller):
|
|
120
|
+
"""Poller for evaluation jobs.
|
|
121
|
+
|
|
122
|
+
Polls the backend eval job API to check job status until completion.
|
|
123
|
+
|
|
124
|
+
Example:
|
|
125
|
+
>>> poller = EvalJobPoller(
|
|
126
|
+
... base_url="https://api.usesynth.ai",
|
|
127
|
+
... api_key="sk_live_...",
|
|
128
|
+
... interval=2.0,
|
|
129
|
+
... timeout=1200.0,
|
|
130
|
+
... )
|
|
131
|
+
>>> outcome = poller.poll_job("eval-abc123")
|
|
132
|
+
>>> if outcome.status == "completed":
|
|
133
|
+
... print(outcome.payload)
|
|
134
|
+
|
|
135
|
+
See Also:
|
|
136
|
+
- `synth_ai.sdk.api.eval.EvalJob`: High-level eval job API
|
|
137
|
+
- Backend API: GET /api/eval/jobs/{job_id}
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
def poll_job(self, job_id: str) -> PollOutcome:
|
|
141
|
+
"""Poll an eval job by ID.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
job_id: Job ID (e.g., "eval-abc123")
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
PollOutcome with status and payload
|
|
148
|
+
"""
|
|
149
|
+
ctx: dict[str, Any] = {"job_id": job_id, "job_type": "eval"}
|
|
150
|
+
log_info("EvalJobPoller.poll_job invoked", ctx=ctx)
|
|
151
|
+
return super().poll(f"/api/eval/jobs/{job_id}")
|
|
152
|
+
|
|
153
|
+
|
|
119
154
|
__all__ = [
|
|
120
155
|
"PollOutcome",
|
|
121
156
|
"RLJobPoller",
|
|
122
157
|
"SFTJobPoller",
|
|
123
158
|
"PromptLearningJobPoller",
|
|
159
|
+
"EvalJobPoller",
|
|
124
160
|
]
|