synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show
  1. synth_ai/__init__.py +13 -13
  2. synth_ai/cli/__init__.py +6 -15
  3. synth_ai/cli/commands/eval/__init__.py +6 -15
  4. synth_ai/cli/commands/eval/config.py +338 -0
  5. synth_ai/cli/commands/eval/core.py +236 -1091
  6. synth_ai/cli/commands/eval/runner.py +704 -0
  7. synth_ai/cli/commands/eval/validation.py +44 -117
  8. synth_ai/cli/commands/filter/core.py +7 -7
  9. synth_ai/cli/commands/filter/validation.py +2 -2
  10. synth_ai/cli/commands/smoke/core.py +7 -17
  11. synth_ai/cli/commands/status/__init__.py +1 -64
  12. synth_ai/cli/commands/status/client.py +50 -151
  13. synth_ai/cli/commands/status/config.py +3 -83
  14. synth_ai/cli/commands/status/errors.py +4 -13
  15. synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
  16. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  17. synth_ai/cli/commands/status/subcommands/files.py +18 -63
  18. synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
  19. synth_ai/cli/commands/status/subcommands/models.py +18 -62
  20. synth_ai/cli/commands/status/subcommands/runs.py +16 -63
  21. synth_ai/cli/commands/status/subcommands/session.py +67 -172
  22. synth_ai/cli/commands/status/subcommands/summary.py +24 -32
  23. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  24. synth_ai/cli/commands/status/utils.py +16 -107
  25. synth_ai/cli/commands/train/__init__.py +18 -20
  26. synth_ai/cli/commands/train/errors.py +3 -3
  27. synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
  28. synth_ai/cli/commands/train/validation.py +7 -7
  29. synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
  30. synth_ai/cli/commands/train/verifier_validation.py +235 -0
  31. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
  32. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
  33. synth_ai/cli/demo_apps/math/config.toml +0 -1
  34. synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
  35. synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
  36. synth_ai/cli/lib/apps/task_app.py +12 -13
  37. synth_ai/cli/lib/task_app_discovery.py +6 -6
  38. synth_ai/cli/lib/train_cfgs.py +10 -10
  39. synth_ai/cli/task_apps/__init__.py +11 -0
  40. synth_ai/cli/task_apps/commands.py +7 -15
  41. synth_ai/core/env.py +12 -1
  42. synth_ai/core/errors.py +1 -2
  43. synth_ai/core/integrations/cloudflare.py +209 -33
  44. synth_ai/core/tracing_v3/abstractions.py +46 -0
  45. synth_ai/data/__init__.py +3 -30
  46. synth_ai/data/enums.py +1 -20
  47. synth_ai/data/rewards.py +100 -3
  48. synth_ai/products/graph_evolve/__init__.py +1 -2
  49. synth_ai/products/graph_evolve/config.py +16 -16
  50. synth_ai/products/graph_evolve/converters/__init__.py +3 -3
  51. synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
  52. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
  53. synth_ai/products/graph_gepa/__init__.py +23 -0
  54. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  55. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  56. synth_ai/sdk/__init__.py +45 -35
  57. synth_ai/sdk/api/eval/__init__.py +33 -0
  58. synth_ai/sdk/api/eval/job.py +732 -0
  59. synth_ai/sdk/api/research_agent/__init__.py +276 -66
  60. synth_ai/sdk/api/train/builders.py +181 -0
  61. synth_ai/sdk/api/train/cli.py +41 -33
  62. synth_ai/sdk/api/train/configs/__init__.py +6 -4
  63. synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
  64. synth_ai/sdk/api/train/configs/rl.py +264 -16
  65. synth_ai/sdk/api/train/configs/sft.py +165 -1
  66. synth_ai/sdk/api/train/graph_validators.py +12 -12
  67. synth_ai/sdk/api/train/graphgen.py +169 -51
  68. synth_ai/sdk/api/train/graphgen_models.py +95 -45
  69. synth_ai/sdk/api/train/local_api.py +10 -0
  70. synth_ai/sdk/api/train/pollers.py +36 -0
  71. synth_ai/sdk/api/train/prompt_learning.py +390 -60
  72. synth_ai/sdk/api/train/rl.py +41 -5
  73. synth_ai/sdk/api/train/sft.py +2 -0
  74. synth_ai/sdk/api/train/task_app.py +20 -0
  75. synth_ai/sdk/api/train/validators.py +17 -17
  76. synth_ai/sdk/graphs/completions.py +239 -33
  77. synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
  78. synth_ai/sdk/learning/__init__.py +35 -5
  79. synth_ai/sdk/learning/context_learning_client.py +531 -0
  80. synth_ai/sdk/learning/context_learning_types.py +294 -0
  81. synth_ai/sdk/learning/prompt_learning_client.py +1 -1
  82. synth_ai/sdk/learning/prompt_learning_types.py +2 -1
  83. synth_ai/sdk/learning/rl/__init__.py +0 -4
  84. synth_ai/sdk/learning/rl/contracts.py +0 -4
  85. synth_ai/sdk/localapi/__init__.py +40 -0
  86. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  87. synth_ai/sdk/localapi/client.py +10 -0
  88. synth_ai/sdk/localapi/contracts.py +10 -0
  89. synth_ai/sdk/localapi/helpers.py +519 -0
  90. synth_ai/sdk/localapi/rollouts.py +93 -0
  91. synth_ai/sdk/localapi/server.py +29 -0
  92. synth_ai/sdk/localapi/template.py +49 -0
  93. synth_ai/sdk/streaming/handlers.py +6 -6
  94. synth_ai/sdk/streaming/streamer.py +10 -6
  95. synth_ai/sdk/task/__init__.py +18 -5
  96. synth_ai/sdk/task/apps/__init__.py +37 -1
  97. synth_ai/sdk/task/client.py +9 -1
  98. synth_ai/sdk/task/config.py +6 -11
  99. synth_ai/sdk/task/contracts.py +137 -95
  100. synth_ai/sdk/task/in_process.py +32 -22
  101. synth_ai/sdk/task/in_process_runner.py +9 -4
  102. synth_ai/sdk/task/rubrics/__init__.py +2 -3
  103. synth_ai/sdk/task/rubrics/loaders.py +4 -4
  104. synth_ai/sdk/task/rubrics/strict.py +3 -4
  105. synth_ai/sdk/task/server.py +76 -16
  106. synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
  107. synth_ai/sdk/task/validators.py +34 -49
  108. synth_ai/sdk/training/__init__.py +7 -16
  109. synth_ai/sdk/tunnels/__init__.py +118 -0
  110. synth_ai/sdk/tunnels/cleanup.py +83 -0
  111. synth_ai/sdk/tunnels/ports.py +120 -0
  112. synth_ai/sdk/tunnels/tunneled_api.py +363 -0
  113. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
  114. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
  115. synth_ai/cli/commands/baseline/__init__.py +0 -12
  116. synth_ai/cli/commands/baseline/core.py +0 -636
  117. synth_ai/cli/commands/baseline/list.py +0 -94
  118. synth_ai/cli/commands/eval/errors.py +0 -81
  119. synth_ai/cli/commands/status/formatters.py +0 -164
  120. synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
  121. synth_ai/cli/commands/status/subcommands/usage.py +0 -203
  122. synth_ai/cli/commands/train/judge_validation.py +0 -305
  123. synth_ai/cli/usage.py +0 -159
  124. synth_ai/data/specs.py +0 -36
  125. synth_ai/sdk/api/research_agent/cli.py +0 -428
  126. synth_ai/sdk/api/research_agent/config.py +0 -357
  127. synth_ai/sdk/api/research_agent/job.py +0 -717
  128. synth_ai/sdk/baseline/__init__.py +0 -25
  129. synth_ai/sdk/baseline/config.py +0 -209
  130. synth_ai/sdk/baseline/discovery.py +0 -216
  131. synth_ai/sdk/baseline/execution.py +0 -154
  132. synth_ai/sdk/judging/__init__.py +0 -15
  133. synth_ai/sdk/judging/base.py +0 -24
  134. synth_ai/sdk/judging/client.py +0 -191
  135. synth_ai/sdk/judging/types.py +0 -42
  136. synth_ai/sdk/research_agent/__init__.py +0 -34
  137. synth_ai/sdk/research_agent/container_builder.py +0 -328
  138. synth_ai/sdk/research_agent/container_spec.py +0 -198
  139. synth_ai/sdk/research_agent/defaults.py +0 -34
  140. synth_ai/sdk/research_agent/results_collector.py +0 -69
  141. synth_ai/sdk/specs/__init__.py +0 -46
  142. synth_ai/sdk/specs/dataclasses.py +0 -149
  143. synth_ai/sdk/specs/loader.py +0 -144
  144. synth_ai/sdk/specs/serializer.py +0 -199
  145. synth_ai/sdk/specs/validation.py +0 -250
  146. synth_ai/sdk/tracing/__init__.py +0 -39
  147. synth_ai/sdk/usage/__init__.py +0 -37
  148. synth_ai/sdk/usage/client.py +0 -171
  149. synth_ai/sdk/usage/models.py +0 -261
  150. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
  151. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
  152. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
  153. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
- """GraphGen (Automated Design of Agentic Systems) data models.
1
+ """GraphGen (Graph Opt) data models.
2
2
 
3
3
  This module provides Pydantic models for defining GraphGen datasets and job configurations.
4
4
  GraphGen is a simplified "Workflows API" for prompt optimization that wraps GEPA with
5
- auto-generated task apps and built-in judge configurations.
5
+ auto-generated task apps and built-in verifier configurations.
6
6
 
7
7
  Example:
8
8
  from synth_ai.sdk.api.train.graphgen_models import (
@@ -11,6 +11,7 @@ Example:
11
11
  GraphGenGoldOutput,
12
12
  GraphGenRubric,
13
13
  GraphGenJobConfig,
14
+ GraphGenVerifierConfig,
14
15
  )
15
16
 
16
17
  # Create a dataset
@@ -24,7 +25,7 @@ Example:
24
25
  GraphGenGoldOutput(output={"answer": "4"}, task_id="task1"),
25
26
  GraphGenGoldOutput(output={"answer": "Paris"}, task_id="task2"),
26
27
  ],
27
- judge_config=GraphGenJudgeConfig(mode="rubric"),
28
+ verifier_config=GraphGenVerifierConfig(mode="rubric"),
28
29
  )
29
30
  """
30
31
 
@@ -201,7 +202,7 @@ class GraphGenGoldOutput(BaseModel):
201
202
  """A gold/reference output.
202
203
 
203
204
  Can be linked to a specific task via task_id, or standalone (for reference examples).
204
- Standalone gold outputs (no task_id) are used as reference pool for contrastive judging.
205
+ Standalone gold outputs (no task_id) are used as reference pool for contrastive verification.
205
206
  """
206
207
 
207
208
  output: Dict[str, Any] = Field(
@@ -217,16 +218,16 @@ class GraphGenGoldOutput(BaseModel):
217
218
 
218
219
 
219
220
  # Improvement 4: Define supported providers as a Literal type
220
- JudgeProviderType = Literal["groq", "openai", "google", "anthropic"]
221
+ VerifierProviderType = Literal["groq", "openai", "google", "anthropic"]
221
222
 
222
223
 
223
- class GraphGenJudgeConfig(BaseModel):
224
- """Configuration for the judge used during optimization."""
224
+ class GraphGenVerifierConfig(BaseModel):
225
+ """Configuration for the verifier used during optimization."""
225
226
 
226
227
  mode: Literal["rubric", "contrastive", "gold_examples"] = Field(
227
228
  default="rubric",
228
229
  description=(
229
- "Judge mode: "
230
+ "Verifier mode: "
230
231
  "'rubric' = evaluate against criteria, "
231
232
  "'contrastive' = compare to gold output, "
232
233
  "'gold_examples' = use gold examples as few-shot context"
@@ -234,12 +235,12 @@ class GraphGenJudgeConfig(BaseModel):
234
235
  )
235
236
  model: str = Field(
236
237
  default="llama-3.3-70b-versatile",
237
- description="Model to use for judging",
238
+ description="Model to use for verification",
238
239
  )
239
240
  # Improvement 4: Changed from str to Literal type for better type safety
240
- provider: JudgeProviderType = Field(
241
+ provider: VerifierProviderType = Field(
241
242
  default="groq",
242
- description="Provider for judge model (groq, openai, google, anthropic)",
243
+ description="Provider for verifier model (groq, openai, google, anthropic)",
243
244
  )
244
245
 
245
246
 
@@ -247,7 +248,7 @@ class GraphGenTaskSet(BaseModel):
247
248
  """The complete GraphGen dataset format.
248
249
 
249
250
  Contains tasks with arbitrary JSON inputs, gold outputs (optionally linked to tasks),
250
- rubrics (task-specific and/or default), and judge configuration.
251
+ rubrics (task-specific and/or default), and verifier configuration.
251
252
 
252
253
  Example:
253
254
  dataset = GraphGenTaskSet(
@@ -274,9 +275,9 @@ class GraphGenTaskSet(BaseModel):
274
275
  default=None,
275
276
  description="Default rubric applied to all tasks (merged with task-specific rubrics)",
276
277
  )
277
- judge_config: GraphGenJudgeConfig = Field(
278
- default_factory=GraphGenJudgeConfig,
279
- description="Configuration for the judge",
278
+ verifier_config: GraphGenVerifierConfig = Field(
279
+ default_factory=GraphGenVerifierConfig,
280
+ description="Configuration for the verifier",
280
281
  )
281
282
  # Optional schemas (also accepted at top-level for backward/forward compatibility).
282
283
  input_schema: Optional[Dict[str, Any]] = Field(
@@ -417,7 +418,7 @@ class GraphGenTaskSet(BaseModel):
417
418
  return None
418
419
 
419
420
  def get_standalone_gold_outputs(self) -> List[GraphGenGoldOutput]:
420
- """Get gold outputs not linked to any task (reference pool for contrastive judge)."""
421
+ """Get gold outputs not linked to any task (reference pool for contrastive verifier)."""
421
422
  return [gold for gold in self.gold_outputs if gold.task_id is None]
422
423
 
423
424
 
@@ -445,8 +446,8 @@ SUPPORTED_POLICY_MODELS = {
445
446
  "claude-3-5-haiku-latest",
446
447
  }
447
448
 
448
- # Supported judge models
449
- SUPPORTED_JUDGE_MODELS = {
449
+ # Supported verifier models
450
+ SUPPORTED_VERIFIER_MODELS = {
450
451
  # Groq (fast, cheap)
451
452
  "llama-3.3-70b-versatile",
452
453
  "llama-3.1-70b-versatile",
@@ -457,8 +458,8 @@ SUPPORTED_JUDGE_MODELS = {
457
458
 
458
459
  # Default models
459
460
  DEFAULT_POLICY_MODEL = "gpt-4o-mini"
460
- DEFAULT_JUDGE_MODEL = "llama-3.3-70b-versatile"
461
- DEFAULT_JUDGE_PROVIDER = "groq"
461
+ DEFAULT_VERIFIER_MODEL = "llama-3.3-70b-versatile"
462
+ DEFAULT_VERIFIER_PROVIDER = "groq"
462
463
 
463
464
 
464
465
  class EventInput(BaseModel):
@@ -484,7 +485,7 @@ class SessionTimeStepInput(BaseModel):
484
485
 
485
486
 
486
487
  class SessionTraceInput(BaseModel):
487
- """V3-compatible session trace input for judge evaluation."""
488
+ """V3-compatible session trace input for verifier evaluation."""
488
489
 
489
490
  model_config = ConfigDict(extra="allow")
490
491
 
@@ -505,7 +506,7 @@ class SessionTraceInput(BaseModel):
505
506
  return data
506
507
 
507
508
 
508
- class GraphGenGraphJudgeRequest(BaseModel):
509
+ class GraphGenGraphVerifierRequest(BaseModel):
509
510
  """Request for verifier graph inference."""
510
511
 
511
512
  model_config = ConfigDict(extra="forbid")
@@ -576,7 +577,7 @@ class OutcomeRewardResponse(BaseModel):
576
577
  annotation: Optional[Dict[str, Any]] = Field(default=None, description="Additional annotations (free-form)")
577
578
 
578
579
 
579
- class GraphGenGraphJudgeResponse(BaseModel):
580
+ class GraphGenGraphVerifierResponse(BaseModel):
580
581
  """Response from verifier graph inference."""
581
582
 
582
583
  started_at: datetime = Field(..., description="When inference request started (UTC)")
@@ -589,32 +590,81 @@ class GraphGenGraphJudgeResponse(BaseModel):
589
590
  event_rewards: List[EventRewardResponse] = Field(default_factory=list, description="Per-event rewards")
590
591
  outcome_reward: Optional[OutcomeRewardResponse] = Field(default=None, description="Episode-level outcome reward")
591
592
 
592
- # Legacy fields (kept for backward compatibility)
593
- score: float = Field(..., ge=0.0, le=1.0, description="Evaluation score (0-1)")
594
- reasoning: Optional[str] = Field(default=None, description="Explanation for the score")
595
- sub_scores: Optional[Dict[str, float]] = Field(default=None, description="Breakdown scores by criteria")
596
593
  raw_output: Optional[Dict[str, Any]] = Field(default=None, description="Full raw output from the verifier graph")
597
594
 
598
595
  usage: List[GraphGenGraphCompletionsModelUsage] = Field(default_factory=list, description="Token usage per model")
599
596
 
600
597
 
601
- class GraphGenGraphVerifierRequest(GraphGenGraphJudgeRequest):
602
- """Alias for GraphGenGraphJudgeRequest with verifier terminology."""
603
-
604
-
605
- class GraphGenGraphVerifierResponse(GraphGenGraphJudgeResponse):
606
- """Alias for GraphGenGraphJudgeResponse with verifier terminology."""
607
598
 
608
599
 
609
600
  class GraphGenJobConfig(BaseModel):
610
- """Configuration for an GraphGen optimization job.
601
+ """Configuration for a GraphGen (Graph Opt) optimization job.
602
+
603
+ GraphGen provides a simplified API for training optimized graphs/workflows without
604
+ managing task apps manually. It supports three graph types:
605
+ - **policy**: Standard input-to-output graphs for classification, QA, generation
606
+ - **verifier**: Trace-to-score graphs for verifying/evaluating agent behavior
607
+ - **rlm**: Recursive Language Model graphs for massive contexts via tool-based search
611
608
 
612
609
  Example:
610
+ ```python
611
+ from synth_ai.sdk.api.train.graphgen_models import GraphGenJobConfig
612
+
613
613
  config = GraphGenJobConfig(
614
+ graph_type="policy",
614
615
  policy_model="gpt-4o-mini",
615
616
  rollout_budget=100,
616
617
  proposer_effort="medium",
618
+ problem_spec="Classify customer support messages into categories.",
617
619
  )
620
+ ```
621
+
622
+ Attributes:
623
+ graph_type: Type of graph - "policy", "verifier", or "rlm".
624
+ policy_model: Model for policy inference (e.g., "gpt-4o-mini", "claude-3-5-sonnet").
625
+ policy_provider: Provider for policy model (auto-detected if not specified).
626
+ rollout_budget: Total rollouts (evaluations) for optimization. Range: 10-10000.
627
+ proposer_effort: Mutation quality/cost level - "medium" or "high".
628
+ Note: "low" is not allowed (gpt-4.1-mini too weak for graph generation).
629
+ verifier_model: Override verifier model from dataset.
630
+ verifier_provider: Override verifier provider from dataset.
631
+ population_size: GEPA population size. Range: 2-20. Default: 4.
632
+ num_generations: Number of generations (auto-calculated from budget if not specified).
633
+ num_parents: Number of parents for selection. Range: 1-10. Default: 2.
634
+ evaluation_seeds: Specific seeds for evaluation (auto-generated if not specified).
635
+ problem_spec: Detailed problem specification for the graph proposer.
636
+ Include domain info like valid output labels, constraints, format requirements.
637
+ target_llm_calls: Target LLM calls per graph run (1-10). Default: 5.
638
+ configured_tools: Tool bindings for RLM graphs. Required for graph_type="rlm".
639
+
640
+ Returns:
641
+ After training completes via GraphGenJob, you receive a result dict:
642
+ ```python
643
+ {
644
+ "status": "succeeded",
645
+ "graphgen_job_id": "graphgen_abc123",
646
+ "best_score": 0.89,
647
+ "best_snapshot_id": "snap_xyz789",
648
+ "dataset_name": "My Classification Tasks",
649
+ "task_count": 50,
650
+ }
651
+ ```
652
+
653
+ Events:
654
+ During training, you'll receive streaming events via GraphGenJob.stream_until_complete():
655
+ - `graphgen.created` - Job created
656
+ - `graphgen.running` - Training started
657
+ - `graphgen.generation.started` - New generation of candidates started
658
+ - `graphgen.candidate.evaluated` - A candidate graph was evaluated
659
+ - `graphgen.generation.completed` - Generation finished with metrics
660
+ - `graphgen.optimization.completed` - Training finished successfully
661
+ - `graphgen.failed` - Job encountered an error
662
+
663
+ See Also:
664
+ - GraphGenJob: High-level SDK class for running jobs
665
+ - GraphGenTaskSet: Dataset format for tasks and gold outputs
666
+ - Training reference: /training/graph-evolve
667
+ - Quickstart: /quickstart/graph-evolve
618
668
  """
619
669
 
620
670
  # Graph type
@@ -656,14 +706,14 @@ class GraphGenJobConfig(BaseModel):
656
706
  ),
657
707
  )
658
708
 
659
- # Judge settings (if not specified in dataset)
660
- judge_model: Optional[str] = Field(
709
+ # Verifier settings (if not specified in dataset)
710
+ verifier_model: Optional[str] = Field(
661
711
  default=None,
662
- description="Override judge model from dataset",
712
+ description="Override verifier model from dataset",
663
713
  )
664
- judge_provider: Optional[str] = Field(
714
+ verifier_provider: Optional[str] = Field(
665
715
  default=None,
666
- description="Override judge provider from dataset",
716
+ description="Override verifier provider from dataset",
667
717
  )
668
718
 
669
719
  # Advanced settings
@@ -792,7 +842,7 @@ GraphGenRubric = GraphGenRubric
792
842
  GraphGenRubricCriterion = GraphGenRubricCriterion
793
843
  GraphGenRubricOutcome = GraphGenRubricOutcome
794
844
  GraphGenRubricEvents = GraphGenRubricEvents
795
- GraphGenJudgeConfig = GraphGenJudgeConfig
845
+ GraphGenVerifierConfig = GraphGenVerifierConfig
796
846
  GraphGenJobConfig = GraphGenJobConfig
797
847
  parse_graphgen_taskset = parse_graphgen_taskset
798
848
  load_graphgen_taskset = load_graphgen_taskset
@@ -800,7 +850,7 @@ load_graphgen_taskset = load_graphgen_taskset
800
850
  __all__ = [
801
851
  # Core types (new)
802
852
  "OutputConfig",
803
- "JudgeProviderType",
853
+ "VerifierProviderType",
804
854
  # GraphGen names (preferred)
805
855
  "GraphGenTaskSet",
806
856
  "GraphGenTaskSetMetadata",
@@ -810,14 +860,14 @@ __all__ = [
810
860
  "GraphGenRubricCriterion",
811
861
  "GraphGenRubricOutcome",
812
862
  "GraphGenRubricEvents",
813
- "GraphGenJudgeConfig",
863
+ "GraphGenVerifierConfig",
814
864
  "GraphGenJobConfig",
815
865
  "parse_graphgen_taskset",
816
866
  "load_graphgen_taskset",
817
867
  # Constants
818
868
  "SUPPORTED_POLICY_MODELS",
819
- "SUPPORTED_JUDGE_MODELS",
869
+ "SUPPORTED_VERIFIER_MODELS",
820
870
  "DEFAULT_POLICY_MODEL",
821
- "DEFAULT_JUDGE_MODEL",
822
- "DEFAULT_JUDGE_PROVIDER",
871
+ "DEFAULT_VERIFIER_MODEL",
872
+ "DEFAULT_VERIFIER_PROVIDER",
823
873
  ]
@@ -0,0 +1,10 @@
1
+ """LocalAPI health helpers.
2
+
3
+ Prefer this module over synth_ai.sdk.api.train.task_app for LocalAPI naming.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from synth_ai.sdk.api.train.task_app import LocalAPIHealth, check_local_api_health
9
+
10
+ __all__ = ["LocalAPIHealth", "check_local_api_health"]
@@ -116,9 +116,45 @@ class PromptLearningJobPoller(JobPoller):
116
116
  return super().poll(f"/api/prompt-learning/online/jobs/{job_id}")
117
117
 
118
118
 
119
+ class EvalJobPoller(JobPoller):
120
+ """Poller for evaluation jobs.
121
+
122
+ Polls the backend eval job API to check job status until completion.
123
+
124
+ Example:
125
+ >>> poller = EvalJobPoller(
126
+ ... base_url="https://api.usesynth.ai",
127
+ ... api_key="sk_live_...",
128
+ ... interval=2.0,
129
+ ... timeout=1200.0,
130
+ ... )
131
+ >>> outcome = poller.poll_job("eval-abc123")
132
+ >>> if outcome.status == "completed":
133
+ ... print(outcome.payload)
134
+
135
+ See Also:
136
+ - `synth_ai.sdk.api.eval.EvalJob`: High-level eval job API
137
+ - Backend API: GET /api/eval/jobs/{job_id}
138
+ """
139
+
140
+ def poll_job(self, job_id: str) -> PollOutcome:
141
+ """Poll an eval job by ID.
142
+
143
+ Args:
144
+ job_id: Job ID (e.g., "eval-abc123")
145
+
146
+ Returns:
147
+ PollOutcome with status and payload
148
+ """
149
+ ctx: dict[str, Any] = {"job_id": job_id, "job_type": "eval"}
150
+ log_info("EvalJobPoller.poll_job invoked", ctx=ctx)
151
+ return super().poll(f"/api/eval/jobs/{job_id}")
152
+
153
+
119
154
  __all__ = [
120
155
  "PollOutcome",
121
156
  "RLJobPoller",
122
157
  "SFTJobPoller",
123
158
  "PromptLearningJobPoller",
159
+ "EvalJobPoller",
124
160
  ]