synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show
  1. synth_ai/__init__.py +13 -13
  2. synth_ai/cli/__init__.py +6 -15
  3. synth_ai/cli/commands/eval/__init__.py +6 -15
  4. synth_ai/cli/commands/eval/config.py +338 -0
  5. synth_ai/cli/commands/eval/core.py +236 -1091
  6. synth_ai/cli/commands/eval/runner.py +704 -0
  7. synth_ai/cli/commands/eval/validation.py +44 -117
  8. synth_ai/cli/commands/filter/core.py +7 -7
  9. synth_ai/cli/commands/filter/validation.py +2 -2
  10. synth_ai/cli/commands/smoke/core.py +7 -17
  11. synth_ai/cli/commands/status/__init__.py +1 -64
  12. synth_ai/cli/commands/status/client.py +50 -151
  13. synth_ai/cli/commands/status/config.py +3 -83
  14. synth_ai/cli/commands/status/errors.py +4 -13
  15. synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
  16. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  17. synth_ai/cli/commands/status/subcommands/files.py +18 -63
  18. synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
  19. synth_ai/cli/commands/status/subcommands/models.py +18 -62
  20. synth_ai/cli/commands/status/subcommands/runs.py +16 -63
  21. synth_ai/cli/commands/status/subcommands/session.py +67 -172
  22. synth_ai/cli/commands/status/subcommands/summary.py +24 -32
  23. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  24. synth_ai/cli/commands/status/utils.py +16 -107
  25. synth_ai/cli/commands/train/__init__.py +18 -20
  26. synth_ai/cli/commands/train/errors.py +3 -3
  27. synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
  28. synth_ai/cli/commands/train/validation.py +7 -7
  29. synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
  30. synth_ai/cli/commands/train/verifier_validation.py +235 -0
  31. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
  32. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
  33. synth_ai/cli/demo_apps/math/config.toml +0 -1
  34. synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
  35. synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
  36. synth_ai/cli/lib/apps/task_app.py +12 -13
  37. synth_ai/cli/lib/task_app_discovery.py +6 -6
  38. synth_ai/cli/lib/train_cfgs.py +10 -10
  39. synth_ai/cli/task_apps/__init__.py +11 -0
  40. synth_ai/cli/task_apps/commands.py +7 -15
  41. synth_ai/core/env.py +12 -1
  42. synth_ai/core/errors.py +1 -2
  43. synth_ai/core/integrations/cloudflare.py +209 -33
  44. synth_ai/core/tracing_v3/abstractions.py +46 -0
  45. synth_ai/data/__init__.py +3 -30
  46. synth_ai/data/enums.py +1 -20
  47. synth_ai/data/rewards.py +100 -3
  48. synth_ai/products/graph_evolve/__init__.py +1 -2
  49. synth_ai/products/graph_evolve/config.py +16 -16
  50. synth_ai/products/graph_evolve/converters/__init__.py +3 -3
  51. synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
  52. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
  53. synth_ai/products/graph_gepa/__init__.py +23 -0
  54. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  55. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  56. synth_ai/sdk/__init__.py +45 -35
  57. synth_ai/sdk/api/eval/__init__.py +33 -0
  58. synth_ai/sdk/api/eval/job.py +732 -0
  59. synth_ai/sdk/api/research_agent/__init__.py +276 -66
  60. synth_ai/sdk/api/train/builders.py +181 -0
  61. synth_ai/sdk/api/train/cli.py +41 -33
  62. synth_ai/sdk/api/train/configs/__init__.py +6 -4
  63. synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
  64. synth_ai/sdk/api/train/configs/rl.py +264 -16
  65. synth_ai/sdk/api/train/configs/sft.py +165 -1
  66. synth_ai/sdk/api/train/graph_validators.py +12 -12
  67. synth_ai/sdk/api/train/graphgen.py +169 -51
  68. synth_ai/sdk/api/train/graphgen_models.py +95 -45
  69. synth_ai/sdk/api/train/local_api.py +10 -0
  70. synth_ai/sdk/api/train/pollers.py +36 -0
  71. synth_ai/sdk/api/train/prompt_learning.py +390 -60
  72. synth_ai/sdk/api/train/rl.py +41 -5
  73. synth_ai/sdk/api/train/sft.py +2 -0
  74. synth_ai/sdk/api/train/task_app.py +20 -0
  75. synth_ai/sdk/api/train/validators.py +17 -17
  76. synth_ai/sdk/graphs/completions.py +239 -33
  77. synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
  78. synth_ai/sdk/learning/__init__.py +35 -5
  79. synth_ai/sdk/learning/context_learning_client.py +531 -0
  80. synth_ai/sdk/learning/context_learning_types.py +294 -0
  81. synth_ai/sdk/learning/prompt_learning_client.py +1 -1
  82. synth_ai/sdk/learning/prompt_learning_types.py +2 -1
  83. synth_ai/sdk/learning/rl/__init__.py +0 -4
  84. synth_ai/sdk/learning/rl/contracts.py +0 -4
  85. synth_ai/sdk/localapi/__init__.py +40 -0
  86. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  87. synth_ai/sdk/localapi/client.py +10 -0
  88. synth_ai/sdk/localapi/contracts.py +10 -0
  89. synth_ai/sdk/localapi/helpers.py +519 -0
  90. synth_ai/sdk/localapi/rollouts.py +93 -0
  91. synth_ai/sdk/localapi/server.py +29 -0
  92. synth_ai/sdk/localapi/template.py +49 -0
  93. synth_ai/sdk/streaming/handlers.py +6 -6
  94. synth_ai/sdk/streaming/streamer.py +10 -6
  95. synth_ai/sdk/task/__init__.py +18 -5
  96. synth_ai/sdk/task/apps/__init__.py +37 -1
  97. synth_ai/sdk/task/client.py +9 -1
  98. synth_ai/sdk/task/config.py +6 -11
  99. synth_ai/sdk/task/contracts.py +137 -95
  100. synth_ai/sdk/task/in_process.py +32 -22
  101. synth_ai/sdk/task/in_process_runner.py +9 -4
  102. synth_ai/sdk/task/rubrics/__init__.py +2 -3
  103. synth_ai/sdk/task/rubrics/loaders.py +4 -4
  104. synth_ai/sdk/task/rubrics/strict.py +3 -4
  105. synth_ai/sdk/task/server.py +76 -16
  106. synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
  107. synth_ai/sdk/task/validators.py +34 -49
  108. synth_ai/sdk/training/__init__.py +7 -16
  109. synth_ai/sdk/tunnels/__init__.py +118 -0
  110. synth_ai/sdk/tunnels/cleanup.py +83 -0
  111. synth_ai/sdk/tunnels/ports.py +120 -0
  112. synth_ai/sdk/tunnels/tunneled_api.py +363 -0
  113. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
  114. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
  115. synth_ai/cli/commands/baseline/__init__.py +0 -12
  116. synth_ai/cli/commands/baseline/core.py +0 -636
  117. synth_ai/cli/commands/baseline/list.py +0 -94
  118. synth_ai/cli/commands/eval/errors.py +0 -81
  119. synth_ai/cli/commands/status/formatters.py +0 -164
  120. synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
  121. synth_ai/cli/commands/status/subcommands/usage.py +0 -203
  122. synth_ai/cli/commands/train/judge_validation.py +0 -305
  123. synth_ai/cli/usage.py +0 -159
  124. synth_ai/data/specs.py +0 -36
  125. synth_ai/sdk/api/research_agent/cli.py +0 -428
  126. synth_ai/sdk/api/research_agent/config.py +0 -357
  127. synth_ai/sdk/api/research_agent/job.py +0 -717
  128. synth_ai/sdk/baseline/__init__.py +0 -25
  129. synth_ai/sdk/baseline/config.py +0 -209
  130. synth_ai/sdk/baseline/discovery.py +0 -216
  131. synth_ai/sdk/baseline/execution.py +0 -154
  132. synth_ai/sdk/judging/__init__.py +0 -15
  133. synth_ai/sdk/judging/base.py +0 -24
  134. synth_ai/sdk/judging/client.py +0 -191
  135. synth_ai/sdk/judging/types.py +0 -42
  136. synth_ai/sdk/research_agent/__init__.py +0 -34
  137. synth_ai/sdk/research_agent/container_builder.py +0 -328
  138. synth_ai/sdk/research_agent/container_spec.py +0 -198
  139. synth_ai/sdk/research_agent/defaults.py +0 -34
  140. synth_ai/sdk/research_agent/results_collector.py +0 -69
  141. synth_ai/sdk/specs/__init__.py +0 -46
  142. synth_ai/sdk/specs/dataclasses.py +0 -149
  143. synth_ai/sdk/specs/loader.py +0 -144
  144. synth_ai/sdk/specs/serializer.py +0 -199
  145. synth_ai/sdk/specs/validation.py +0 -250
  146. synth_ai/sdk/tracing/__init__.py +0 -39
  147. synth_ai/sdk/usage/__init__.py +0 -37
  148. synth_ai/sdk/usage/client.py +0 -171
  149. synth_ai/sdk/usage/models.py +0 -261
  150. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
  151. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
  152. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
  153. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,114 +1,23 @@
1
- """Shared utilities for status commands."""
1
+ """Utility helpers for status commands."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import asyncio
6
- from collections.abc import Callable, Coroutine
7
- from datetime import UTC, datetime, timedelta
8
- from typing import Any, TypeVar
5
+ from typing import Any
9
6
 
10
- import click
11
- from rich.console import Console
12
7
 
13
- from .config import DEFAULT_TIMEOUT, BackendConfig, resolve_backend_config
8
+ def build_headers(api_key: str | None) -> dict[str, str]:
9
+ if not api_key:
10
+ return {}
11
+ return {"Authorization": f"Bearer {api_key}", "X-API-Key": api_key}
14
12
 
15
- T = TypeVar("T")
16
13
 
17
- console = Console()
18
-
19
-
20
- def parse_relative_time(value: str | None) -> str | None:
21
- """Convert relative time expressions (e.g., '5m', '2h', '1d') to ISO strings."""
22
- if not value:
23
- return None
24
- token = value.strip().lower()
25
- if not token:
26
- return None
27
- multiplier = 1.0
28
- if token.endswith("ms"):
29
- multiplier = 0.001
30
- token = token[:-2]
31
- elif token.endswith("s"):
32
- multiplier = 1.0
33
- token = token[:-1]
34
- elif token.endswith("m"):
35
- multiplier = 60.0
36
- token = token[:-1]
37
- elif token.endswith("h"):
38
- multiplier = 3600.0
39
- token = token[:-1]
40
- elif token.endswith("d"):
41
- multiplier = 86400.0
42
- token = token[:-1]
43
-
44
- try:
45
- seconds = float(token) * multiplier
46
- except ValueError:
47
- return value
48
-
49
- dt = datetime.now(UTC) - timedelta(seconds=seconds)
50
- return dt.isoformat()
51
-
52
-
53
- def ensure_async(fn: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
54
- """Decorator to run an async callable via asyncio.run inside Click commands."""
55
-
56
- def wrapper(*args, **kwargs):
57
- return asyncio.run(fn(*args, **kwargs))
58
-
59
- return wrapper
60
-
61
-
62
- def resolve_context_config(
63
- ctx: click.Context,
64
- *,
65
- base_url: str | None,
66
- api_key: str | None,
67
- timeout: float | None,
68
- ) -> BackendConfig:
69
- if base_url is not None or api_key is not None or timeout not in (None, DEFAULT_TIMEOUT):
70
- return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
71
- obj = ctx.find_object(dict)
72
- if obj and isinstance(obj.get("status_backend_config"), BackendConfig):
73
- return obj["status_backend_config"]
74
- return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
75
-
76
-
77
- def warn(message: str) -> None:
78
- console.print(f"[yellow]{message}[/yellow]")
79
-
80
-
81
- def bail(message: str) -> None:
82
- raise click.ClickException(message)
83
-
84
-
85
- def common_options() -> Callable[[Callable[..., T]], Callable[..., T]]:
86
- """Apply shared backend CLI options to a command."""
87
-
88
- def decorator(func: Callable[..., T]) -> Callable[..., T]:
89
- options = [
90
- click.option(
91
- "--base-url",
92
- envvar="SYNTH_STATUS_BASE_URL",
93
- default=None,
94
- help="Override the Synth backend base URL for this command.",
95
- ),
96
- click.option(
97
- "--api-key",
98
- envvar="SYNTH_STATUS_API_KEY",
99
- default=None,
100
- help="API key for the Synth backend.",
101
- ),
102
- click.option(
103
- "--timeout",
104
- default=DEFAULT_TIMEOUT,
105
- show_default=True,
106
- type=float,
107
- help="HTTP request timeout in seconds.",
108
- ),
109
- ]
110
- for option in reversed(options):
111
- func = option(func)
112
- return func
113
-
114
- return decorator
14
+ def ensure_status_ok(response) -> dict[str, Any]:
15
+ if response.status_code >= 400:
16
+ detail = ""
17
+ try:
18
+ payload = response.json()
19
+ detail = payload.get("detail", "")
20
+ except Exception:
21
+ detail = response.text
22
+ raise RuntimeError(detail or f"Request failed ({response.status_code})")
23
+ return response.json()
@@ -1,22 +1,21 @@
1
1
  from .core import register, train_command
2
2
  from .errors import (
3
- InvalidJudgeConfigError,
4
3
  InvalidRubricConfigError,
4
+ InvalidVerifierConfigError,
5
5
  TrainCliError,
6
6
  )
7
- from .judge_schemas import (
8
- JudgeConfig,
9
- JudgeOptionsConfig,
10
- JudgeRequestPayload,
7
+ from .verifier_schemas import (
11
8
  RubricConfig,
12
9
  RubricWeightsConfig,
13
- build_judge_http_options,
10
+ VerifierConfig,
11
+ VerifierOptionsConfig,
12
+ VerifierRequestPayload,
13
+ build_verifier_http_options,
14
14
  )
15
- from .judge_validation import (
16
- check_for_deprecated_fields,
17
- extract_and_validate_judge_rubric,
18
- validate_judge_config,
15
+ from .verifier_validation import (
16
+ extract_and_validate_verifier_rubric,
19
17
  validate_rubric_config,
18
+ validate_verifier_config,
20
19
  )
21
20
  from .validation import (
22
21
  load_and_validate_rl,
@@ -31,23 +30,22 @@ __all__ = [
31
30
  "train_command",
32
31
  # Errors
33
32
  "TrainCliError",
34
- "InvalidJudgeConfigError",
33
+ "InvalidVerifierConfigError",
35
34
  "InvalidRubricConfigError",
36
35
  # SFT/RL validation
37
36
  "validate_sft_config",
38
37
  "validate_rl_config",
39
38
  "load_and_validate_sft",
40
39
  "load_and_validate_rl",
41
- # Judge/Rubric schemas
40
+ # Verifier/Rubric schemas
42
41
  "RubricWeightsConfig",
43
42
  "RubricConfig",
44
- "JudgeOptionsConfig",
45
- "JudgeConfig",
46
- "JudgeRequestPayload",
47
- "build_judge_http_options",
48
- # Judge/Rubric validation
43
+ "VerifierOptionsConfig",
44
+ "VerifierConfig",
45
+ "VerifierRequestPayload",
46
+ "build_verifier_http_options",
47
+ # Verifier/Rubric validation
49
48
  "validate_rubric_config",
50
- "validate_judge_config",
51
- "extract_and_validate_judge_rubric",
52
- "check_for_deprecated_fields",
49
+ "validate_verifier_config",
50
+ "extract_and_validate_verifier_rubric",
53
51
  ]
@@ -82,8 +82,8 @@ class InvalidTopologyError(TrainCliError):
82
82
 
83
83
 
84
84
  @dataclass(slots=True)
85
- class InvalidJudgeConfigError(TrainCliError):
86
- """Raised when judge configuration validation fails."""
85
+ class InvalidVerifierConfigError(TrainCliError):
86
+ """Raised when verifier configuration validation fails."""
87
87
  detail: str
88
88
 
89
89
  def __str__(self) -> str:
@@ -112,6 +112,6 @@ __all__ = [
112
112
  "UnsupportedAlgorithmError",
113
113
  "InvalidHyperparametersError",
114
114
  "InvalidTopologyError",
115
- "InvalidJudgeConfigError",
115
+ "InvalidVerifierConfigError",
116
116
  "InvalidRubricConfigError",
117
117
  ]
@@ -29,7 +29,7 @@ KNOWN_PROMPT_LEARNING_FIELDS = {
29
29
  "policy",
30
30
  "mipro",
31
31
  "gepa",
32
- "judge",
32
+ "verifier",
33
33
  "proxy_models",
34
34
  "env_config",
35
35
  "env_name",
@@ -87,7 +87,7 @@ KNOWN_GEPA_FIELDS = {
87
87
  "population",
88
88
  "archive",
89
89
  "token",
90
- "judge",
90
+ "verifier",
91
91
  "proxy_models",
92
92
  "adaptive_pool",
93
93
  "adaptive_batch",
@@ -204,7 +204,7 @@ KNOWN_MIPRO_FIELDS = {
204
204
  "demo",
205
205
  "grounding",
206
206
  "meta_update",
207
- "judge",
207
+ "verifier",
208
208
  "proxy_models",
209
209
  "adaptive_pool",
210
210
  "spec_path",
@@ -219,16 +219,15 @@ KNOWN_MIPRO_FIELDS = {
219
219
  "min_bootstrap_demos",
220
220
  }
221
221
 
222
- # Known fields in [prompt_learning.judge]
223
- KNOWN_JUDGE_FIELDS = {
222
+ # Known fields in [prompt_learning.verifier]
223
+ KNOWN_VERIFIER_FIELDS = {
224
224
  "enabled",
225
225
  "reward_source",
226
226
  "backend_base",
227
227
  "backend_api_key_env",
228
228
  "backend_provider",
229
229
  "backend_model",
230
- "synth_verifier_id",
231
- "backend_rubric_id",
230
+ "verifier_graph_id",
232
231
  "backend_event_enabled",
233
232
  "backend_outcome_enabled",
234
233
  "backend_options",
@@ -300,7 +299,7 @@ DEPRECATED_FIELDS = {
300
299
  "max_concurrent_rollouts": "Use [prompt_learning.gepa.rollout].max_concurrent instead.",
301
300
  "evaluation_seeds": "Use [prompt_learning.gepa.evaluation].seeds instead of flat evaluation_seeds.",
302
301
  "validation_seeds": "Use [prompt_learning.gepa.evaluation].validation_seeds instead.",
303
- "backend_rubric_id": "Use 'synth_verifier_id' instead of 'backend_rubric_id' in [prompt_learning.judge].",
302
+ "backend_rubric_id": "Use 'verifier_graph_id' in [prompt_learning.verifier].",
304
303
  }
305
304
 
306
305
 
@@ -444,10 +443,10 @@ def validate_prompt_learning_config(
444
443
  "termination_config is supported and will create backend TerminationManager conditions"
445
444
  )
446
445
 
447
- # Validate [prompt_learning.judge] if present
448
- judge = pl_config.get("judge")
449
- if judge and isinstance(judge, dict):
450
- _check_unknown_fields(judge, KNOWN_JUDGE_FIELDS, "prompt_learning.judge", result)
446
+ # Validate [prompt_learning.verifier] if present
447
+ verifier = pl_config.get("verifier")
448
+ if verifier and isinstance(verifier, dict):
449
+ _check_unknown_fields(verifier, KNOWN_VERIFIER_FIELDS, "prompt_learning.verifier", result)
451
450
 
452
451
  # Validate [prompt_learning.proxy_models] if present
453
452
  proxy_models = pl_config.get("proxy_models")
@@ -553,9 +552,9 @@ def _validate_gepa_config(
553
552
  result,
554
553
  )
555
554
 
556
- if "judge" in gepa and isinstance(gepa["judge"], dict):
555
+ if "verifier" in gepa and isinstance(gepa["verifier"], dict):
557
556
  _check_unknown_fields(
558
- gepa["judge"], KNOWN_JUDGE_FIELDS, "prompt_learning.gepa.judge", result
557
+ gepa["verifier"], KNOWN_VERIFIER_FIELDS, "prompt_learning.gepa.verifier", result
559
558
  )
560
559
 
561
560
 
@@ -575,9 +574,9 @@ def _validate_mipro_config(
575
574
  _check_unknown_fields(mipro, KNOWN_MIPRO_FIELDS, "prompt_learning.mipro", result)
576
575
 
577
576
  # Validate nested sections
578
- if "judge" in mipro and isinstance(mipro["judge"], dict):
577
+ if "verifier" in mipro and isinstance(mipro["verifier"], dict):
579
578
  _check_unknown_fields(
580
- mipro["judge"], KNOWN_JUDGE_FIELDS, "prompt_learning.mipro.judge", result
579
+ mipro["verifier"], KNOWN_VERIFIER_FIELDS, "prompt_learning.mipro.verifier", result
581
580
  )
582
581
 
583
582
  if "adaptive_pool" in mipro and isinstance(mipro["adaptive_pool"], dict):
@@ -12,10 +12,10 @@ from synth_ai.sdk.api.train.configs.sft import SFTConfig
12
12
  from synth_ai.sdk.api.train.utils import load_toml
13
13
 
14
14
  from .errors import (
15
- InvalidJudgeConfigError,
16
15
  InvalidRLConfigError,
17
16
  InvalidRubricConfigError,
18
17
  InvalidSFTConfigError,
18
+ InvalidVerifierConfigError,
19
19
  MissingAlgorithmError,
20
20
  MissingComputeError,
21
21
  MissingDatasetError,
@@ -23,7 +23,7 @@ from .errors import (
23
23
  TomlParseError,
24
24
  UnsupportedAlgorithmError,
25
25
  )
26
- from .judge_validation import extract_and_validate_judge_rubric
26
+ from .verifier_validation import extract_and_validate_verifier_rubric
27
27
 
28
28
  __all__ = [
29
29
  "validate_sft_config",
@@ -317,16 +317,16 @@ def validate_rl_config(config: MutableMapping[str, Any]) -> dict[str, Any]:
317
317
  if "reference_placement" not in config["compute"]["topology"]:
318
318
  config["compute"]["topology"]["reference_placement"] = "none"
319
319
 
320
- # Validate judge/rubric configuration with formalized Pydantic models
320
+ # Validate verifier/rubric configuration with formalized Pydantic models
321
321
  # This will emit deprecation warnings for dead fields and validate structure
322
322
  try:
323
- rubric_config, judge_config = extract_and_validate_judge_rubric(config)
323
+ rubric_config, verifier_config = extract_and_validate_verifier_rubric(config)
324
324
  # Validation passed - configs are clean and ready for use
325
325
  # The validated Pydantic models can be used by training code if needed
326
- except (InvalidJudgeConfigError, InvalidRubricConfigError) as exc:
326
+ except (InvalidVerifierConfigError, InvalidRubricConfigError) as exc:
327
327
  raise InvalidRLConfigError(
328
- detail=f"Judge/Rubric validation failed: {exc.detail}",
329
- hint="Check JUDGE_RUBRIC_CLEANUP_GUIDE.md for migration help."
328
+ detail=f"Verifier/Rubric validation failed: {exc.detail}",
329
+ hint="Check the verifier/rubric cleanup guide for migration help."
330
330
  ) from exc
331
331
 
332
332
  # Validate using Pydantic model
@@ -1,7 +1,7 @@
1
1
  """
2
- Pydantic schemas for judge/rubric configuration.
2
+ Pydantic schemas for verifier/rubric configuration.
3
3
 
4
- These models define the ACTUAL fields used by the backend judge service,
4
+ These models define the ACTUAL fields used by the backend verifier service,
5
5
  with all dead code removed. This is the single source of truth for what
6
6
  gets sent in HTTP requests.
7
7
  """
@@ -17,9 +17,9 @@ from synth_ai.sdk.api.train.configs.shared import ExtraModel
17
17
  __all__ = [
18
18
  "RubricWeightsConfig",
19
19
  "RubricConfig",
20
- "JudgeOptionsConfig",
21
- "JudgeConfig",
22
- "JudgeRequestPayload",
20
+ "VerifierOptionsConfig",
21
+ "VerifierConfig",
22
+ "VerifierRequestPayload",
23
23
  ]
24
24
 
25
25
 
@@ -27,8 +27,8 @@ class RubricWeightsConfig(ExtraModel):
27
27
  """
28
28
  Reward blending weights (client-side only, not sent to backend).
29
29
 
30
- These weights control how env rewards, event judge scores, and outcome
31
- judge scores are combined into a final reward signal for policy gradients.
30
+ These weights control how env rewards, event verifier scores, and outcome
31
+ verifier scores are combined into a final reward signal for policy gradients.
32
32
 
33
33
  Formula:
34
34
  total_reward = (env * env_return) + (event * sum(event_scores)) + (outcome * outcome_score)
@@ -40,12 +40,12 @@ class RubricWeightsConfig(ExtraModel):
40
40
  )
41
41
  event: float = Field(
42
42
  default=0.0,
43
- description="Weight for per-event judge scores (step-level judging)",
43
+ description="Weight for per-event verifier scores (step-level verification)",
44
44
  ge=0.0,
45
45
  )
46
46
  outcome: float = Field(
47
47
  default=0.0,
48
- description="Weight for outcome judge score (episode-level judging)",
48
+ description="Weight for outcome verifier score (episode-level verification)",
49
49
  ge=0.0,
50
50
  )
51
51
 
@@ -61,11 +61,11 @@ class RubricConfig(ExtraModel):
61
61
  """
62
62
  Top-level rubric configuration.
63
63
 
64
- Controls whether rubric-based judging is enabled and how rewards are blended.
64
+ Controls whether rubric-based verification is enabled and how rewards are blended.
65
65
  """
66
66
  enabled: bool = Field(
67
67
  default=False,
68
- description="Master switch for rubric-based judging",
68
+ description="Master switch for rubric-based verification",
69
69
  )
70
70
  weights: RubricWeightsConfig = Field(
71
71
  default_factory=RubricWeightsConfig,
@@ -73,16 +73,16 @@ class RubricConfig(ExtraModel):
73
73
  )
74
74
 
75
75
 
76
- class JudgeOptionsConfig(ExtraModel):
76
+ class VerifierOptionsConfig(ExtraModel):
77
77
  """
78
- Judge provider options (sent to backend in HTTP request).
78
+ Verifier provider options (sent to backend in HTTP request).
79
79
 
80
- These fields are sent in the "options" object of the judge score request.
81
- All fields here map directly to the backend JudgeOptions schema.
80
+ These fields are sent in the "options" object of the verifier request.
81
+ All fields here map directly to the backend verifier options schema.
82
82
  """
83
83
  provider: str = Field(
84
84
  ...,
85
- description="Judge provider type ('openai', 'groq', 'gemini')",
85
+ description="Verifier provider type ('openai', 'groq', 'gemini')",
86
86
  pattern=r"^(openai|groq|gemini)$",
87
87
  )
88
88
  model: str = Field(
@@ -96,11 +96,11 @@ class JudgeOptionsConfig(ExtraModel):
96
96
  )
97
97
  event: bool = Field(
98
98
  default=True,
99
- description="Enable per-event (step-level) judging",
99
+ description="Enable per-event (step-level) verification",
100
100
  )
101
101
  outcome: bool = Field(
102
102
  default=True,
103
- description="Enable outcome (episode-level) judging",
103
+ description="Enable outcome (episode-level) verification",
104
104
  )
105
105
  timeout_s: Optional[float] = Field(
106
106
  default=None,
@@ -120,38 +120,38 @@ class JudgeOptionsConfig(ExtraModel):
120
120
  )
121
121
 
122
122
  @model_validator(mode="after")
123
- def _validate_at_least_one_enabled(self) -> JudgeOptionsConfig:
124
- """Ensure at least one judging type is enabled."""
123
+ def _validate_at_least_one_enabled(self) -> VerifierOptionsConfig:
124
+ """Ensure at least one verification type is enabled."""
125
125
  if not self.event and not self.outcome:
126
126
  raise ValueError("At least one of 'event' or 'outcome' must be enabled")
127
127
  return self
128
128
 
129
129
 
130
- class JudgeConfig(ExtraModel):
130
+ class VerifierConfig(ExtraModel):
131
131
  """
132
- Top-level judge configuration.
132
+ Top-level verifier configuration.
133
133
 
134
- This is parsed from TOML [judge] section and contains all judge-related settings.
134
+ This is parsed from TOML [verifier] section and contains all verifier-related settings.
135
135
  """
136
- options: JudgeOptionsConfig = Field(
136
+ options: VerifierOptionsConfig = Field(
137
137
  ...,
138
- description="Judge provider options (sent to backend)",
138
+ description="Verifier provider options (sent to backend)",
139
139
  )
140
140
 
141
141
 
142
142
  # HTTP Request Payload Structures (for documentation/type safety)
143
143
 
144
- class JudgeRequestPayload(ExtraModel):
144
+ class VerifierRequestPayload(ExtraModel):
145
145
  """
146
- HTTP request payload structure for POST /api/judge/v1/score.
146
+ HTTP request payload structure for POST /api/graphs/verifiers/completions.
147
147
 
148
- This is the ACTUAL payload sent to the backend judge service.
148
+ This is the ACTUAL payload sent to the backend verifier service.
149
149
  Used for type safety and documentation only.
150
150
  """
151
151
  policy_name: str = Field(..., description="Name of the policy being evaluated")
152
152
  task_app: dict[str, Any] = Field(..., description="Task app metadata (id, base_url)")
153
153
  trace: dict[str, Any] = Field(..., description="Tracing v3 payload (event_history, metadata)")
154
- options: dict[str, Any] = Field(..., description="Judge options (provider, model, etc.)")
154
+ options: dict[str, Any] = Field(..., description="Verifier options (provider, model, etc.)")
155
155
 
156
156
  class Config:
157
157
  extra = "allow" # Backend might add extra fields
@@ -159,16 +159,16 @@ class JudgeRequestPayload(ExtraModel):
159
159
 
160
160
  # Helper to convert to backend request format
161
161
 
162
- def build_judge_http_options(
163
- options_config: JudgeOptionsConfig,
162
+ def build_verifier_http_options(
163
+ options_config: VerifierOptionsConfig,
164
164
  *,
165
165
  rubric_overrides_from_task_info: Optional[dict[str, Any]] = None,
166
166
  ) -> dict[str, Any]:
167
167
  """
168
- Build the 'options' dict for HTTP request to backend judge.
168
+ Build the 'options' dict for HTTP request to backend verifier.
169
169
 
170
170
  Args:
171
- options_config: Validated judge options from TOML
171
+ options_config: Validated verifier options from TOML
172
172
  rubric_overrides_from_task_info: Dynamic overrides fetched from TaskInfo (takes priority)
173
173
 
174
174
  Returns:
@@ -198,4 +198,3 @@ def build_judge_http_options(
198
198
  payload["rubric_overrides"] = options_config.rubric_overrides
199
199
 
200
200
  return payload
201
-