synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +13 -13
- synth_ai/cli/__init__.py +6 -15
- synth_ai/cli/commands/eval/__init__.py +6 -15
- synth_ai/cli/commands/eval/config.py +338 -0
- synth_ai/cli/commands/eval/core.py +236 -1091
- synth_ai/cli/commands/eval/runner.py +704 -0
- synth_ai/cli/commands/eval/validation.py +44 -117
- synth_ai/cli/commands/filter/core.py +7 -7
- synth_ai/cli/commands/filter/validation.py +2 -2
- synth_ai/cli/commands/smoke/core.py +7 -17
- synth_ai/cli/commands/status/__init__.py +1 -64
- synth_ai/cli/commands/status/client.py +50 -151
- synth_ai/cli/commands/status/config.py +3 -83
- synth_ai/cli/commands/status/errors.py +4 -13
- synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
- synth_ai/cli/commands/status/subcommands/config.py +13 -0
- synth_ai/cli/commands/status/subcommands/files.py +18 -63
- synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
- synth_ai/cli/commands/status/subcommands/models.py +18 -62
- synth_ai/cli/commands/status/subcommands/runs.py +16 -63
- synth_ai/cli/commands/status/subcommands/session.py +67 -172
- synth_ai/cli/commands/status/subcommands/summary.py +24 -32
- synth_ai/cli/commands/status/subcommands/utils.py +41 -0
- synth_ai/cli/commands/status/utils.py +16 -107
- synth_ai/cli/commands/train/__init__.py +18 -20
- synth_ai/cli/commands/train/errors.py +3 -3
- synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
- synth_ai/cli/commands/train/validation.py +7 -7
- synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
- synth_ai/cli/commands/train/verifier_validation.py +235 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
- synth_ai/cli/lib/apps/task_app.py +12 -13
- synth_ai/cli/lib/task_app_discovery.py +6 -6
- synth_ai/cli/lib/train_cfgs.py +10 -10
- synth_ai/cli/task_apps/__init__.py +11 -0
- synth_ai/cli/task_apps/commands.py +7 -15
- synth_ai/core/env.py +12 -1
- synth_ai/core/errors.py +1 -2
- synth_ai/core/integrations/cloudflare.py +209 -33
- synth_ai/core/tracing_v3/abstractions.py +46 -0
- synth_ai/data/__init__.py +3 -30
- synth_ai/data/enums.py +1 -20
- synth_ai/data/rewards.py +100 -3
- synth_ai/products/graph_evolve/__init__.py +1 -2
- synth_ai/products/graph_evolve/config.py +16 -16
- synth_ai/products/graph_evolve/converters/__init__.py +3 -3
- synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
- synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
- synth_ai/products/graph_gepa/__init__.py +23 -0
- synth_ai/products/graph_gepa/converters/__init__.py +19 -0
- synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
- synth_ai/sdk/__init__.py +45 -35
- synth_ai/sdk/api/eval/__init__.py +33 -0
- synth_ai/sdk/api/eval/job.py +732 -0
- synth_ai/sdk/api/research_agent/__init__.py +276 -66
- synth_ai/sdk/api/train/builders.py +181 -0
- synth_ai/sdk/api/train/cli.py +41 -33
- synth_ai/sdk/api/train/configs/__init__.py +6 -4
- synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
- synth_ai/sdk/api/train/configs/rl.py +264 -16
- synth_ai/sdk/api/train/configs/sft.py +165 -1
- synth_ai/sdk/api/train/graph_validators.py +12 -12
- synth_ai/sdk/api/train/graphgen.py +169 -51
- synth_ai/sdk/api/train/graphgen_models.py +95 -45
- synth_ai/sdk/api/train/local_api.py +10 -0
- synth_ai/sdk/api/train/pollers.py +36 -0
- synth_ai/sdk/api/train/prompt_learning.py +390 -60
- synth_ai/sdk/api/train/rl.py +41 -5
- synth_ai/sdk/api/train/sft.py +2 -0
- synth_ai/sdk/api/train/task_app.py +20 -0
- synth_ai/sdk/api/train/validators.py +17 -17
- synth_ai/sdk/graphs/completions.py +239 -33
- synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
- synth_ai/sdk/learning/__init__.py +35 -5
- synth_ai/sdk/learning/context_learning_client.py +531 -0
- synth_ai/sdk/learning/context_learning_types.py +294 -0
- synth_ai/sdk/learning/prompt_learning_client.py +1 -1
- synth_ai/sdk/learning/prompt_learning_types.py +2 -1
- synth_ai/sdk/learning/rl/__init__.py +0 -4
- synth_ai/sdk/learning/rl/contracts.py +0 -4
- synth_ai/sdk/localapi/__init__.py +40 -0
- synth_ai/sdk/localapi/apps/__init__.py +28 -0
- synth_ai/sdk/localapi/client.py +10 -0
- synth_ai/sdk/localapi/contracts.py +10 -0
- synth_ai/sdk/localapi/helpers.py +519 -0
- synth_ai/sdk/localapi/rollouts.py +93 -0
- synth_ai/sdk/localapi/server.py +29 -0
- synth_ai/sdk/localapi/template.py +49 -0
- synth_ai/sdk/streaming/handlers.py +6 -6
- synth_ai/sdk/streaming/streamer.py +10 -6
- synth_ai/sdk/task/__init__.py +18 -5
- synth_ai/sdk/task/apps/__init__.py +37 -1
- synth_ai/sdk/task/client.py +9 -1
- synth_ai/sdk/task/config.py +6 -11
- synth_ai/sdk/task/contracts.py +137 -95
- synth_ai/sdk/task/in_process.py +32 -22
- synth_ai/sdk/task/in_process_runner.py +9 -4
- synth_ai/sdk/task/rubrics/__init__.py +2 -3
- synth_ai/sdk/task/rubrics/loaders.py +4 -4
- synth_ai/sdk/task/rubrics/strict.py +3 -4
- synth_ai/sdk/task/server.py +76 -16
- synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
- synth_ai/sdk/task/validators.py +34 -49
- synth_ai/sdk/training/__init__.py +7 -16
- synth_ai/sdk/tunnels/__init__.py +118 -0
- synth_ai/sdk/tunnels/cleanup.py +83 -0
- synth_ai/sdk/tunnels/ports.py +120 -0
- synth_ai/sdk/tunnels/tunneled_api.py +363 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
- synth_ai/cli/commands/baseline/__init__.py +0 -12
- synth_ai/cli/commands/baseline/core.py +0 -636
- synth_ai/cli/commands/baseline/list.py +0 -94
- synth_ai/cli/commands/eval/errors.py +0 -81
- synth_ai/cli/commands/status/formatters.py +0 -164
- synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
- synth_ai/cli/commands/status/subcommands/usage.py +0 -203
- synth_ai/cli/commands/train/judge_validation.py +0 -305
- synth_ai/cli/usage.py +0 -159
- synth_ai/data/specs.py +0 -36
- synth_ai/sdk/api/research_agent/cli.py +0 -428
- synth_ai/sdk/api/research_agent/config.py +0 -357
- synth_ai/sdk/api/research_agent/job.py +0 -717
- synth_ai/sdk/baseline/__init__.py +0 -25
- synth_ai/sdk/baseline/config.py +0 -209
- synth_ai/sdk/baseline/discovery.py +0 -216
- synth_ai/sdk/baseline/execution.py +0 -154
- synth_ai/sdk/judging/__init__.py +0 -15
- synth_ai/sdk/judging/base.py +0 -24
- synth_ai/sdk/judging/client.py +0 -191
- synth_ai/sdk/judging/types.py +0 -42
- synth_ai/sdk/research_agent/__init__.py +0 -34
- synth_ai/sdk/research_agent/container_builder.py +0 -328
- synth_ai/sdk/research_agent/container_spec.py +0 -198
- synth_ai/sdk/research_agent/defaults.py +0 -34
- synth_ai/sdk/research_agent/results_collector.py +0 -69
- synth_ai/sdk/specs/__init__.py +0 -46
- synth_ai/sdk/specs/dataclasses.py +0 -149
- synth_ai/sdk/specs/loader.py +0 -144
- synth_ai/sdk/specs/serializer.py +0 -199
- synth_ai/sdk/specs/validation.py +0 -250
- synth_ai/sdk/tracing/__init__.py +0 -39
- synth_ai/sdk/usage/__init__.py +0 -37
- synth_ai/sdk/usage/client.py +0 -171
- synth_ai/sdk/usage/models.py +0 -261
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0
|
@@ -1,114 +1,23 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Utility helpers for status commands."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import
|
|
6
|
-
from collections.abc import Callable, Coroutine
|
|
7
|
-
from datetime import UTC, datetime, timedelta
|
|
8
|
-
from typing import Any, TypeVar
|
|
5
|
+
from typing import Any
|
|
9
6
|
|
|
10
|
-
import click
|
|
11
|
-
from rich.console import Console
|
|
12
7
|
|
|
13
|
-
|
|
8
|
+
def build_headers(api_key: str | None) -> dict[str, str]:
|
|
9
|
+
if not api_key:
|
|
10
|
+
return {}
|
|
11
|
+
return {"Authorization": f"Bearer {api_key}", "X-API-Key": api_key}
|
|
14
12
|
|
|
15
|
-
T = TypeVar("T")
|
|
16
13
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
multiplier = 1.0
|
|
28
|
-
if token.endswith("ms"):
|
|
29
|
-
multiplier = 0.001
|
|
30
|
-
token = token[:-2]
|
|
31
|
-
elif token.endswith("s"):
|
|
32
|
-
multiplier = 1.0
|
|
33
|
-
token = token[:-1]
|
|
34
|
-
elif token.endswith("m"):
|
|
35
|
-
multiplier = 60.0
|
|
36
|
-
token = token[:-1]
|
|
37
|
-
elif token.endswith("h"):
|
|
38
|
-
multiplier = 3600.0
|
|
39
|
-
token = token[:-1]
|
|
40
|
-
elif token.endswith("d"):
|
|
41
|
-
multiplier = 86400.0
|
|
42
|
-
token = token[:-1]
|
|
43
|
-
|
|
44
|
-
try:
|
|
45
|
-
seconds = float(token) * multiplier
|
|
46
|
-
except ValueError:
|
|
47
|
-
return value
|
|
48
|
-
|
|
49
|
-
dt = datetime.now(UTC) - timedelta(seconds=seconds)
|
|
50
|
-
return dt.isoformat()
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def ensure_async(fn: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
|
|
54
|
-
"""Decorator to run an async callable via asyncio.run inside Click commands."""
|
|
55
|
-
|
|
56
|
-
def wrapper(*args, **kwargs):
|
|
57
|
-
return asyncio.run(fn(*args, **kwargs))
|
|
58
|
-
|
|
59
|
-
return wrapper
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def resolve_context_config(
|
|
63
|
-
ctx: click.Context,
|
|
64
|
-
*,
|
|
65
|
-
base_url: str | None,
|
|
66
|
-
api_key: str | None,
|
|
67
|
-
timeout: float | None,
|
|
68
|
-
) -> BackendConfig:
|
|
69
|
-
if base_url is not None or api_key is not None or timeout not in (None, DEFAULT_TIMEOUT):
|
|
70
|
-
return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
|
|
71
|
-
obj = ctx.find_object(dict)
|
|
72
|
-
if obj and isinstance(obj.get("status_backend_config"), BackendConfig):
|
|
73
|
-
return obj["status_backend_config"]
|
|
74
|
-
return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def warn(message: str) -> None:
|
|
78
|
-
console.print(f"[yellow]{message}[/yellow]")
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def bail(message: str) -> None:
|
|
82
|
-
raise click.ClickException(message)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def common_options() -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
86
|
-
"""Apply shared backend CLI options to a command."""
|
|
87
|
-
|
|
88
|
-
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
|
89
|
-
options = [
|
|
90
|
-
click.option(
|
|
91
|
-
"--base-url",
|
|
92
|
-
envvar="SYNTH_STATUS_BASE_URL",
|
|
93
|
-
default=None,
|
|
94
|
-
help="Override the Synth backend base URL for this command.",
|
|
95
|
-
),
|
|
96
|
-
click.option(
|
|
97
|
-
"--api-key",
|
|
98
|
-
envvar="SYNTH_STATUS_API_KEY",
|
|
99
|
-
default=None,
|
|
100
|
-
help="API key for the Synth backend.",
|
|
101
|
-
),
|
|
102
|
-
click.option(
|
|
103
|
-
"--timeout",
|
|
104
|
-
default=DEFAULT_TIMEOUT,
|
|
105
|
-
show_default=True,
|
|
106
|
-
type=float,
|
|
107
|
-
help="HTTP request timeout in seconds.",
|
|
108
|
-
),
|
|
109
|
-
]
|
|
110
|
-
for option in reversed(options):
|
|
111
|
-
func = option(func)
|
|
112
|
-
return func
|
|
113
|
-
|
|
114
|
-
return decorator
|
|
14
|
+
def ensure_status_ok(response) -> dict[str, Any]:
|
|
15
|
+
if response.status_code >= 400:
|
|
16
|
+
detail = ""
|
|
17
|
+
try:
|
|
18
|
+
payload = response.json()
|
|
19
|
+
detail = payload.get("detail", "")
|
|
20
|
+
except Exception:
|
|
21
|
+
detail = response.text
|
|
22
|
+
raise RuntimeError(detail or f"Request failed ({response.status_code})")
|
|
23
|
+
return response.json()
|
|
@@ -1,22 +1,21 @@
|
|
|
1
1
|
from .core import register, train_command
|
|
2
2
|
from .errors import (
|
|
3
|
-
InvalidJudgeConfigError,
|
|
4
3
|
InvalidRubricConfigError,
|
|
4
|
+
InvalidVerifierConfigError,
|
|
5
5
|
TrainCliError,
|
|
6
6
|
)
|
|
7
|
-
from .
|
|
8
|
-
JudgeConfig,
|
|
9
|
-
JudgeOptionsConfig,
|
|
10
|
-
JudgeRequestPayload,
|
|
7
|
+
from .verifier_schemas import (
|
|
11
8
|
RubricConfig,
|
|
12
9
|
RubricWeightsConfig,
|
|
13
|
-
|
|
10
|
+
VerifierConfig,
|
|
11
|
+
VerifierOptionsConfig,
|
|
12
|
+
VerifierRequestPayload,
|
|
13
|
+
build_verifier_http_options,
|
|
14
14
|
)
|
|
15
|
-
from .
|
|
16
|
-
|
|
17
|
-
extract_and_validate_judge_rubric,
|
|
18
|
-
validate_judge_config,
|
|
15
|
+
from .verifier_validation import (
|
|
16
|
+
extract_and_validate_verifier_rubric,
|
|
19
17
|
validate_rubric_config,
|
|
18
|
+
validate_verifier_config,
|
|
20
19
|
)
|
|
21
20
|
from .validation import (
|
|
22
21
|
load_and_validate_rl,
|
|
@@ -31,23 +30,22 @@ __all__ = [
|
|
|
31
30
|
"train_command",
|
|
32
31
|
# Errors
|
|
33
32
|
"TrainCliError",
|
|
34
|
-
"
|
|
33
|
+
"InvalidVerifierConfigError",
|
|
35
34
|
"InvalidRubricConfigError",
|
|
36
35
|
# SFT/RL validation
|
|
37
36
|
"validate_sft_config",
|
|
38
37
|
"validate_rl_config",
|
|
39
38
|
"load_and_validate_sft",
|
|
40
39
|
"load_and_validate_rl",
|
|
41
|
-
#
|
|
40
|
+
# Verifier/Rubric schemas
|
|
42
41
|
"RubricWeightsConfig",
|
|
43
42
|
"RubricConfig",
|
|
44
|
-
"
|
|
45
|
-
"
|
|
46
|
-
"
|
|
47
|
-
"
|
|
48
|
-
#
|
|
43
|
+
"VerifierOptionsConfig",
|
|
44
|
+
"VerifierConfig",
|
|
45
|
+
"VerifierRequestPayload",
|
|
46
|
+
"build_verifier_http_options",
|
|
47
|
+
# Verifier/Rubric validation
|
|
49
48
|
"validate_rubric_config",
|
|
50
|
-
"
|
|
51
|
-
"
|
|
52
|
-
"check_for_deprecated_fields",
|
|
49
|
+
"validate_verifier_config",
|
|
50
|
+
"extract_and_validate_verifier_rubric",
|
|
53
51
|
]
|
|
@@ -82,8 +82,8 @@ class InvalidTopologyError(TrainCliError):
|
|
|
82
82
|
|
|
83
83
|
|
|
84
84
|
@dataclass(slots=True)
|
|
85
|
-
class
|
|
86
|
-
"""Raised when
|
|
85
|
+
class InvalidVerifierConfigError(TrainCliError):
|
|
86
|
+
"""Raised when verifier configuration validation fails."""
|
|
87
87
|
detail: str
|
|
88
88
|
|
|
89
89
|
def __str__(self) -> str:
|
|
@@ -112,6 +112,6 @@ __all__ = [
|
|
|
112
112
|
"UnsupportedAlgorithmError",
|
|
113
113
|
"InvalidHyperparametersError",
|
|
114
114
|
"InvalidTopologyError",
|
|
115
|
-
"
|
|
115
|
+
"InvalidVerifierConfigError",
|
|
116
116
|
"InvalidRubricConfigError",
|
|
117
117
|
]
|
|
@@ -29,7 +29,7 @@ KNOWN_PROMPT_LEARNING_FIELDS = {
|
|
|
29
29
|
"policy",
|
|
30
30
|
"mipro",
|
|
31
31
|
"gepa",
|
|
32
|
-
"
|
|
32
|
+
"verifier",
|
|
33
33
|
"proxy_models",
|
|
34
34
|
"env_config",
|
|
35
35
|
"env_name",
|
|
@@ -87,7 +87,7 @@ KNOWN_GEPA_FIELDS = {
|
|
|
87
87
|
"population",
|
|
88
88
|
"archive",
|
|
89
89
|
"token",
|
|
90
|
-
"
|
|
90
|
+
"verifier",
|
|
91
91
|
"proxy_models",
|
|
92
92
|
"adaptive_pool",
|
|
93
93
|
"adaptive_batch",
|
|
@@ -204,7 +204,7 @@ KNOWN_MIPRO_FIELDS = {
|
|
|
204
204
|
"demo",
|
|
205
205
|
"grounding",
|
|
206
206
|
"meta_update",
|
|
207
|
-
"
|
|
207
|
+
"verifier",
|
|
208
208
|
"proxy_models",
|
|
209
209
|
"adaptive_pool",
|
|
210
210
|
"spec_path",
|
|
@@ -219,16 +219,15 @@ KNOWN_MIPRO_FIELDS = {
|
|
|
219
219
|
"min_bootstrap_demos",
|
|
220
220
|
}
|
|
221
221
|
|
|
222
|
-
# Known fields in [prompt_learning.
|
|
223
|
-
|
|
222
|
+
# Known fields in [prompt_learning.verifier]
|
|
223
|
+
KNOWN_VERIFIER_FIELDS = {
|
|
224
224
|
"enabled",
|
|
225
225
|
"reward_source",
|
|
226
226
|
"backend_base",
|
|
227
227
|
"backend_api_key_env",
|
|
228
228
|
"backend_provider",
|
|
229
229
|
"backend_model",
|
|
230
|
-
"
|
|
231
|
-
"backend_rubric_id",
|
|
230
|
+
"verifier_graph_id",
|
|
232
231
|
"backend_event_enabled",
|
|
233
232
|
"backend_outcome_enabled",
|
|
234
233
|
"backend_options",
|
|
@@ -300,7 +299,7 @@ DEPRECATED_FIELDS = {
|
|
|
300
299
|
"max_concurrent_rollouts": "Use [prompt_learning.gepa.rollout].max_concurrent instead.",
|
|
301
300
|
"evaluation_seeds": "Use [prompt_learning.gepa.evaluation].seeds instead of flat evaluation_seeds.",
|
|
302
301
|
"validation_seeds": "Use [prompt_learning.gepa.evaluation].validation_seeds instead.",
|
|
303
|
-
"backend_rubric_id": "Use '
|
|
302
|
+
"backend_rubric_id": "Use 'verifier_graph_id' in [prompt_learning.verifier].",
|
|
304
303
|
}
|
|
305
304
|
|
|
306
305
|
|
|
@@ -444,10 +443,10 @@ def validate_prompt_learning_config(
|
|
|
444
443
|
"termination_config is supported and will create backend TerminationManager conditions"
|
|
445
444
|
)
|
|
446
445
|
|
|
447
|
-
# Validate [prompt_learning.
|
|
448
|
-
|
|
449
|
-
if
|
|
450
|
-
_check_unknown_fields(
|
|
446
|
+
# Validate [prompt_learning.verifier] if present
|
|
447
|
+
verifier = pl_config.get("verifier")
|
|
448
|
+
if verifier and isinstance(verifier, dict):
|
|
449
|
+
_check_unknown_fields(verifier, KNOWN_VERIFIER_FIELDS, "prompt_learning.verifier", result)
|
|
451
450
|
|
|
452
451
|
# Validate [prompt_learning.proxy_models] if present
|
|
453
452
|
proxy_models = pl_config.get("proxy_models")
|
|
@@ -553,9 +552,9 @@ def _validate_gepa_config(
|
|
|
553
552
|
result,
|
|
554
553
|
)
|
|
555
554
|
|
|
556
|
-
if "
|
|
555
|
+
if "verifier" in gepa and isinstance(gepa["verifier"], dict):
|
|
557
556
|
_check_unknown_fields(
|
|
558
|
-
gepa["
|
|
557
|
+
gepa["verifier"], KNOWN_VERIFIER_FIELDS, "prompt_learning.gepa.verifier", result
|
|
559
558
|
)
|
|
560
559
|
|
|
561
560
|
|
|
@@ -575,9 +574,9 @@ def _validate_mipro_config(
|
|
|
575
574
|
_check_unknown_fields(mipro, KNOWN_MIPRO_FIELDS, "prompt_learning.mipro", result)
|
|
576
575
|
|
|
577
576
|
# Validate nested sections
|
|
578
|
-
if "
|
|
577
|
+
if "verifier" in mipro and isinstance(mipro["verifier"], dict):
|
|
579
578
|
_check_unknown_fields(
|
|
580
|
-
mipro["
|
|
579
|
+
mipro["verifier"], KNOWN_VERIFIER_FIELDS, "prompt_learning.mipro.verifier", result
|
|
581
580
|
)
|
|
582
581
|
|
|
583
582
|
if "adaptive_pool" in mipro and isinstance(mipro["adaptive_pool"], dict):
|
|
@@ -12,10 +12,10 @@ from synth_ai.sdk.api.train.configs.sft import SFTConfig
|
|
|
12
12
|
from synth_ai.sdk.api.train.utils import load_toml
|
|
13
13
|
|
|
14
14
|
from .errors import (
|
|
15
|
-
InvalidJudgeConfigError,
|
|
16
15
|
InvalidRLConfigError,
|
|
17
16
|
InvalidRubricConfigError,
|
|
18
17
|
InvalidSFTConfigError,
|
|
18
|
+
InvalidVerifierConfigError,
|
|
19
19
|
MissingAlgorithmError,
|
|
20
20
|
MissingComputeError,
|
|
21
21
|
MissingDatasetError,
|
|
@@ -23,7 +23,7 @@ from .errors import (
|
|
|
23
23
|
TomlParseError,
|
|
24
24
|
UnsupportedAlgorithmError,
|
|
25
25
|
)
|
|
26
|
-
from .
|
|
26
|
+
from .verifier_validation import extract_and_validate_verifier_rubric
|
|
27
27
|
|
|
28
28
|
__all__ = [
|
|
29
29
|
"validate_sft_config",
|
|
@@ -317,16 +317,16 @@ def validate_rl_config(config: MutableMapping[str, Any]) -> dict[str, Any]:
|
|
|
317
317
|
if "reference_placement" not in config["compute"]["topology"]:
|
|
318
318
|
config["compute"]["topology"]["reference_placement"] = "none"
|
|
319
319
|
|
|
320
|
-
# Validate
|
|
320
|
+
# Validate verifier/rubric configuration with formalized Pydantic models
|
|
321
321
|
# This will emit deprecation warnings for dead fields and validate structure
|
|
322
322
|
try:
|
|
323
|
-
rubric_config,
|
|
323
|
+
rubric_config, verifier_config = extract_and_validate_verifier_rubric(config)
|
|
324
324
|
# Validation passed - configs are clean and ready for use
|
|
325
325
|
# The validated Pydantic models can be used by training code if needed
|
|
326
|
-
except (
|
|
326
|
+
except (InvalidVerifierConfigError, InvalidRubricConfigError) as exc:
|
|
327
327
|
raise InvalidRLConfigError(
|
|
328
|
-
detail=f"
|
|
329
|
-
hint="Check
|
|
328
|
+
detail=f"Verifier/Rubric validation failed: {exc.detail}",
|
|
329
|
+
hint="Check the verifier/rubric cleanup guide for migration help."
|
|
330
330
|
) from exc
|
|
331
331
|
|
|
332
332
|
# Validate using Pydantic model
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Pydantic schemas for
|
|
2
|
+
Pydantic schemas for verifier/rubric configuration.
|
|
3
3
|
|
|
4
|
-
These models define the ACTUAL fields used by the backend
|
|
4
|
+
These models define the ACTUAL fields used by the backend verifier service,
|
|
5
5
|
with all dead code removed. This is the single source of truth for what
|
|
6
6
|
gets sent in HTTP requests.
|
|
7
7
|
"""
|
|
@@ -17,9 +17,9 @@ from synth_ai.sdk.api.train.configs.shared import ExtraModel
|
|
|
17
17
|
__all__ = [
|
|
18
18
|
"RubricWeightsConfig",
|
|
19
19
|
"RubricConfig",
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
20
|
+
"VerifierOptionsConfig",
|
|
21
|
+
"VerifierConfig",
|
|
22
|
+
"VerifierRequestPayload",
|
|
23
23
|
]
|
|
24
24
|
|
|
25
25
|
|
|
@@ -27,8 +27,8 @@ class RubricWeightsConfig(ExtraModel):
|
|
|
27
27
|
"""
|
|
28
28
|
Reward blending weights (client-side only, not sent to backend).
|
|
29
29
|
|
|
30
|
-
These weights control how env rewards, event
|
|
31
|
-
|
|
30
|
+
These weights control how env rewards, event verifier scores, and outcome
|
|
31
|
+
verifier scores are combined into a final reward signal for policy gradients.
|
|
32
32
|
|
|
33
33
|
Formula:
|
|
34
34
|
total_reward = (env * env_return) + (event * sum(event_scores)) + (outcome * outcome_score)
|
|
@@ -40,12 +40,12 @@ class RubricWeightsConfig(ExtraModel):
|
|
|
40
40
|
)
|
|
41
41
|
event: float = Field(
|
|
42
42
|
default=0.0,
|
|
43
|
-
description="Weight for per-event
|
|
43
|
+
description="Weight for per-event verifier scores (step-level verification)",
|
|
44
44
|
ge=0.0,
|
|
45
45
|
)
|
|
46
46
|
outcome: float = Field(
|
|
47
47
|
default=0.0,
|
|
48
|
-
description="Weight for outcome
|
|
48
|
+
description="Weight for outcome verifier score (episode-level verification)",
|
|
49
49
|
ge=0.0,
|
|
50
50
|
)
|
|
51
51
|
|
|
@@ -61,11 +61,11 @@ class RubricConfig(ExtraModel):
|
|
|
61
61
|
"""
|
|
62
62
|
Top-level rubric configuration.
|
|
63
63
|
|
|
64
|
-
Controls whether rubric-based
|
|
64
|
+
Controls whether rubric-based verification is enabled and how rewards are blended.
|
|
65
65
|
"""
|
|
66
66
|
enabled: bool = Field(
|
|
67
67
|
default=False,
|
|
68
|
-
description="Master switch for rubric-based
|
|
68
|
+
description="Master switch for rubric-based verification",
|
|
69
69
|
)
|
|
70
70
|
weights: RubricWeightsConfig = Field(
|
|
71
71
|
default_factory=RubricWeightsConfig,
|
|
@@ -73,16 +73,16 @@ class RubricConfig(ExtraModel):
|
|
|
73
73
|
)
|
|
74
74
|
|
|
75
75
|
|
|
76
|
-
class
|
|
76
|
+
class VerifierOptionsConfig(ExtraModel):
|
|
77
77
|
"""
|
|
78
|
-
|
|
78
|
+
Verifier provider options (sent to backend in HTTP request).
|
|
79
79
|
|
|
80
|
-
These fields are sent in the "options" object of the
|
|
81
|
-
All fields here map directly to the backend
|
|
80
|
+
These fields are sent in the "options" object of the verifier request.
|
|
81
|
+
All fields here map directly to the backend verifier options schema.
|
|
82
82
|
"""
|
|
83
83
|
provider: str = Field(
|
|
84
84
|
...,
|
|
85
|
-
description="
|
|
85
|
+
description="Verifier provider type ('openai', 'groq', 'gemini')",
|
|
86
86
|
pattern=r"^(openai|groq|gemini)$",
|
|
87
87
|
)
|
|
88
88
|
model: str = Field(
|
|
@@ -96,11 +96,11 @@ class JudgeOptionsConfig(ExtraModel):
|
|
|
96
96
|
)
|
|
97
97
|
event: bool = Field(
|
|
98
98
|
default=True,
|
|
99
|
-
description="Enable per-event (step-level)
|
|
99
|
+
description="Enable per-event (step-level) verification",
|
|
100
100
|
)
|
|
101
101
|
outcome: bool = Field(
|
|
102
102
|
default=True,
|
|
103
|
-
description="Enable outcome (episode-level)
|
|
103
|
+
description="Enable outcome (episode-level) verification",
|
|
104
104
|
)
|
|
105
105
|
timeout_s: Optional[float] = Field(
|
|
106
106
|
default=None,
|
|
@@ -120,38 +120,38 @@ class JudgeOptionsConfig(ExtraModel):
|
|
|
120
120
|
)
|
|
121
121
|
|
|
122
122
|
@model_validator(mode="after")
|
|
123
|
-
def _validate_at_least_one_enabled(self) ->
|
|
124
|
-
"""Ensure at least one
|
|
123
|
+
def _validate_at_least_one_enabled(self) -> VerifierOptionsConfig:
|
|
124
|
+
"""Ensure at least one verification type is enabled."""
|
|
125
125
|
if not self.event and not self.outcome:
|
|
126
126
|
raise ValueError("At least one of 'event' or 'outcome' must be enabled")
|
|
127
127
|
return self
|
|
128
128
|
|
|
129
129
|
|
|
130
|
-
class
|
|
130
|
+
class VerifierConfig(ExtraModel):
|
|
131
131
|
"""
|
|
132
|
-
Top-level
|
|
132
|
+
Top-level verifier configuration.
|
|
133
133
|
|
|
134
|
-
This is parsed from TOML [
|
|
134
|
+
This is parsed from TOML [verifier] section and contains all verifier-related settings.
|
|
135
135
|
"""
|
|
136
|
-
options:
|
|
136
|
+
options: VerifierOptionsConfig = Field(
|
|
137
137
|
...,
|
|
138
|
-
description="
|
|
138
|
+
description="Verifier provider options (sent to backend)",
|
|
139
139
|
)
|
|
140
140
|
|
|
141
141
|
|
|
142
142
|
# HTTP Request Payload Structures (for documentation/type safety)
|
|
143
143
|
|
|
144
|
-
class
|
|
144
|
+
class VerifierRequestPayload(ExtraModel):
|
|
145
145
|
"""
|
|
146
|
-
HTTP request payload structure for POST /api/
|
|
146
|
+
HTTP request payload structure for POST /api/graphs/verifiers/completions.
|
|
147
147
|
|
|
148
|
-
This is the ACTUAL payload sent to the backend
|
|
148
|
+
This is the ACTUAL payload sent to the backend verifier service.
|
|
149
149
|
Used for type safety and documentation only.
|
|
150
150
|
"""
|
|
151
151
|
policy_name: str = Field(..., description="Name of the policy being evaluated")
|
|
152
152
|
task_app: dict[str, Any] = Field(..., description="Task app metadata (id, base_url)")
|
|
153
153
|
trace: dict[str, Any] = Field(..., description="Tracing v3 payload (event_history, metadata)")
|
|
154
|
-
options: dict[str, Any] = Field(..., description="
|
|
154
|
+
options: dict[str, Any] = Field(..., description="Verifier options (provider, model, etc.)")
|
|
155
155
|
|
|
156
156
|
class Config:
|
|
157
157
|
extra = "allow" # Backend might add extra fields
|
|
@@ -159,16 +159,16 @@ class JudgeRequestPayload(ExtraModel):
|
|
|
159
159
|
|
|
160
160
|
# Helper to convert to backend request format
|
|
161
161
|
|
|
162
|
-
def
|
|
163
|
-
options_config:
|
|
162
|
+
def build_verifier_http_options(
|
|
163
|
+
options_config: VerifierOptionsConfig,
|
|
164
164
|
*,
|
|
165
165
|
rubric_overrides_from_task_info: Optional[dict[str, Any]] = None,
|
|
166
166
|
) -> dict[str, Any]:
|
|
167
167
|
"""
|
|
168
|
-
Build the 'options' dict for HTTP request to backend
|
|
168
|
+
Build the 'options' dict for HTTP request to backend verifier.
|
|
169
169
|
|
|
170
170
|
Args:
|
|
171
|
-
options_config: Validated
|
|
171
|
+
options_config: Validated verifier options from TOML
|
|
172
172
|
rubric_overrides_from_task_info: Dynamic overrides fetched from TaskInfo (takes priority)
|
|
173
173
|
|
|
174
174
|
Returns:
|
|
@@ -198,4 +198,3 @@ def build_judge_http_options(
|
|
|
198
198
|
payload["rubric_overrides"] = options_config.rubric_overrides
|
|
199
199
|
|
|
200
200
|
return payload
|
|
201
|
-
|