synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +5 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +125 -10
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +12 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +58 -1487
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -11
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/validators.py +2 -2
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/utils/env.py +25 -18
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/modal.py +2 -2
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Validation logic for judge/rubric configuration from TOML.
|
|
3
|
+
|
|
4
|
+
This module validates and normalizes judge/rubric config, removing all dead fields
|
|
5
|
+
and ensuring only the fields actually used by the backend are present.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import warnings
|
|
11
|
+
from collections.abc import MutableMapping
|
|
12
|
+
from typing import Any, Optional, Tuple
|
|
13
|
+
|
|
14
|
+
from pydantic import ValidationError
|
|
15
|
+
|
|
16
|
+
from .errors import InvalidJudgeConfigError, InvalidRubricConfigError
|
|
17
|
+
from .judge_schemas import JudgeConfig, JudgeOptionsConfig, RubricConfig, RubricWeightsConfig
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"validate_judge_config",
|
|
21
|
+
"validate_rubric_config",
|
|
22
|
+
"extract_and_validate_judge_rubric",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
# Dead fields that should trigger deprecation warnings
|
|
26
|
+
DEPRECATED_RUBRIC_FIELDS = {
|
|
27
|
+
"model",
|
|
28
|
+
"api_base",
|
|
29
|
+
"api_key_env",
|
|
30
|
+
"event",
|
|
31
|
+
"outcome",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
DEPRECATED_JUDGE_FIELDS = {
|
|
35
|
+
"type",
|
|
36
|
+
"timeout_s", # Moved to judge.options.timeout_s
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
DEPRECATED_JUDGE_OPTIONS_FIELDS = {
|
|
40
|
+
"max_concurrency",
|
|
41
|
+
"tracks",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _warn_deprecated_fields(section: str, fields: set[str], present_fields: set[str]) -> None:
|
|
46
|
+
"""Emit deprecation warnings for dead fields that are present in config."""
|
|
47
|
+
deprecated_present = fields & present_fields
|
|
48
|
+
if deprecated_present:
|
|
49
|
+
field_list = ", ".join(sorted(deprecated_present))
|
|
50
|
+
warnings.warn(
|
|
51
|
+
f"[{section}] contains deprecated fields that are no longer used: {field_list}. "
|
|
52
|
+
f"These fields will be ignored and should be removed from your config. "
|
|
53
|
+
f"See judge/rubric cleanup guide for details.",
|
|
54
|
+
DeprecationWarning,
|
|
55
|
+
stacklevel=3,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def validate_rubric_config(config: MutableMapping[str, Any]) -> RubricConfig:
|
|
60
|
+
"""
|
|
61
|
+
Validate and normalize rubric configuration from TOML.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
config: Raw [rubric] section from TOML
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Validated RubricConfig instance
|
|
68
|
+
|
|
69
|
+
Raises:
|
|
70
|
+
InvalidRubricConfigError: If validation fails
|
|
71
|
+
"""
|
|
72
|
+
if not config:
|
|
73
|
+
# Default: rubric disabled
|
|
74
|
+
return RubricConfig(enabled=False)
|
|
75
|
+
|
|
76
|
+
config_dict = dict(config)
|
|
77
|
+
|
|
78
|
+
# Warn about deprecated fields
|
|
79
|
+
_warn_deprecated_fields("rubric", DEPRECATED_RUBRIC_FIELDS, set(config_dict.keys()))
|
|
80
|
+
|
|
81
|
+
# Warn about deprecated subsections
|
|
82
|
+
if "event" in config_dict:
|
|
83
|
+
warnings.warn(
|
|
84
|
+
"[rubric.event] section is deprecated and no longer used. "
|
|
85
|
+
"Criteria are now fetched dynamically from TaskInfo or specified in "
|
|
86
|
+
"[judge.options.rubric_overrides]. This section will be ignored.",
|
|
87
|
+
DeprecationWarning,
|
|
88
|
+
stacklevel=2,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if "outcome" in config_dict:
|
|
92
|
+
warnings.warn(
|
|
93
|
+
"[rubric.outcome] section is deprecated and no longer used. "
|
|
94
|
+
"Criteria are now fetched dynamically from TaskInfo or specified in "
|
|
95
|
+
"[judge.options.rubric_overrides]. This section will be ignored.",
|
|
96
|
+
DeprecationWarning,
|
|
97
|
+
stacklevel=2,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Extract only valid fields
|
|
101
|
+
enabled = config_dict.get("enabled", False)
|
|
102
|
+
weights_dict = config_dict.get("weights", {})
|
|
103
|
+
|
|
104
|
+
# Validate using Pydantic
|
|
105
|
+
try:
|
|
106
|
+
if not isinstance(weights_dict, dict):
|
|
107
|
+
raise ValueError("[rubric.weights] must be a dictionary")
|
|
108
|
+
|
|
109
|
+
weights = RubricWeightsConfig(**weights_dict)
|
|
110
|
+
return RubricConfig(enabled=enabled, weights=weights)
|
|
111
|
+
|
|
112
|
+
except ValidationError as exc:
|
|
113
|
+
errors = []
|
|
114
|
+
for error in exc.errors():
|
|
115
|
+
loc = ".".join(str(x) for x in error["loc"])
|
|
116
|
+
msg = error["msg"]
|
|
117
|
+
errors.append(f" • rubric.{loc}: {msg}")
|
|
118
|
+
raise InvalidRubricConfigError(
|
|
119
|
+
detail="Rubric validation failed:\n" + "\n".join(errors)
|
|
120
|
+
) from exc
|
|
121
|
+
except Exception as exc:
|
|
122
|
+
raise InvalidRubricConfigError(
|
|
123
|
+
detail=f"Rubric validation failed: {exc}"
|
|
124
|
+
) from exc
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def validate_judge_config(config: MutableMapping[str, Any]) -> Optional[JudgeConfig]:
|
|
128
|
+
"""
|
|
129
|
+
Validate and normalize judge configuration from TOML.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
config: Raw [judge] section from TOML
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Validated JudgeConfig instance, or None if not present
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
InvalidJudgeConfigError: If validation fails
|
|
139
|
+
"""
|
|
140
|
+
if not config:
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
config_dict = dict(config)
|
|
144
|
+
|
|
145
|
+
# Warn about deprecated top-level fields
|
|
146
|
+
_warn_deprecated_fields("judge", DEPRECATED_JUDGE_FIELDS, set(config_dict.keys()))
|
|
147
|
+
|
|
148
|
+
# Extract judge.options (required)
|
|
149
|
+
options_dict = config_dict.get("options")
|
|
150
|
+
if not options_dict:
|
|
151
|
+
raise InvalidJudgeConfigError(
|
|
152
|
+
detail="[judge.options] section is required when [judge] is present"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
if not isinstance(options_dict, dict):
|
|
156
|
+
raise InvalidJudgeConfigError(
|
|
157
|
+
detail="[judge.options] must be a dictionary"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Warn about deprecated options fields
|
|
161
|
+
_warn_deprecated_fields(
|
|
162
|
+
"judge.options",
|
|
163
|
+
DEPRECATED_JUDGE_OPTIONS_FIELDS,
|
|
164
|
+
set(options_dict.keys()),
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Remove deprecated fields from options
|
|
168
|
+
options_dict = {
|
|
169
|
+
k: v for k, v in options_dict.items()
|
|
170
|
+
if k not in DEPRECATED_JUDGE_OPTIONS_FIELDS
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
# Migrate judge.timeout_s to judge.options.timeout_s if present
|
|
174
|
+
if "timeout_s" in config_dict and "timeout_s" not in options_dict:
|
|
175
|
+
warnings.warn(
|
|
176
|
+
"[judge].timeout_s is deprecated. Use [judge.options].timeout_s instead. "
|
|
177
|
+
"Auto-migrating for now.",
|
|
178
|
+
DeprecationWarning,
|
|
179
|
+
stacklevel=2,
|
|
180
|
+
)
|
|
181
|
+
options_dict["timeout_s"] = config_dict["timeout_s"]
|
|
182
|
+
|
|
183
|
+
# Validate using Pydantic
|
|
184
|
+
try:
|
|
185
|
+
options = JudgeOptionsConfig(**options_dict)
|
|
186
|
+
return JudgeConfig(options=options)
|
|
187
|
+
|
|
188
|
+
except ValidationError as exc:
|
|
189
|
+
errors = []
|
|
190
|
+
for error in exc.errors():
|
|
191
|
+
loc = ".".join(str(x) for x in error["loc"])
|
|
192
|
+
msg = error["msg"]
|
|
193
|
+
errors.append(f" • judge.options.{loc}: {msg}")
|
|
194
|
+
raise InvalidJudgeConfigError(
|
|
195
|
+
detail="Judge validation failed:\n" + "\n".join(errors)
|
|
196
|
+
) from exc
|
|
197
|
+
except Exception as exc:
|
|
198
|
+
raise InvalidJudgeConfigError(
|
|
199
|
+
detail=f"Judge validation failed: {exc}"
|
|
200
|
+
) from exc
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def extract_and_validate_judge_rubric(
|
|
204
|
+
toml_config: MutableMapping[str, Any]
|
|
205
|
+
) -> Tuple[RubricConfig, Optional[JudgeConfig]]:
|
|
206
|
+
"""
|
|
207
|
+
Extract and validate judge/rubric config from full TOML config.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
toml_config: Full TOML configuration dict
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Tuple of (validated_rubric, validated_judge_or_none)
|
|
214
|
+
|
|
215
|
+
Raises:
|
|
216
|
+
InvalidRubricConfigError: If rubric validation fails
|
|
217
|
+
InvalidJudgeConfigError: If judge validation fails
|
|
218
|
+
"""
|
|
219
|
+
rubric_dict = toml_config.get("rubric", {})
|
|
220
|
+
judge_dict = toml_config.get("judge", {})
|
|
221
|
+
|
|
222
|
+
# Validate rubric
|
|
223
|
+
rubric_config = validate_rubric_config(rubric_dict)
|
|
224
|
+
|
|
225
|
+
# Validate judge (if present)
|
|
226
|
+
judge_config = validate_judge_config(judge_dict) if judge_dict else None
|
|
227
|
+
|
|
228
|
+
# Cross-validation: If rubric is enabled, judge options should be present
|
|
229
|
+
if rubric_config.enabled and not judge_config:
|
|
230
|
+
warnings.warn(
|
|
231
|
+
"[rubric].enabled=true but [judge] section is missing. "
|
|
232
|
+
"Rubric-based judging requires judge configuration. "
|
|
233
|
+
"Rubric scoring will be disabled.",
|
|
234
|
+
UserWarning,
|
|
235
|
+
stacklevel=2,
|
|
236
|
+
)
|
|
237
|
+
rubric_config.enabled = False
|
|
238
|
+
|
|
239
|
+
# Cross-validation: Warn if weights don't align with enabled judging types
|
|
240
|
+
if rubric_config.enabled and judge_config:
|
|
241
|
+
weights = rubric_config.weights
|
|
242
|
+
options = judge_config.options
|
|
243
|
+
|
|
244
|
+
if weights.event > 0 and not options.event:
|
|
245
|
+
warnings.warn(
|
|
246
|
+
"[rubric.weights].event > 0 but [judge.options].event=false. "
|
|
247
|
+
"Event-level judge scores will be 0 (no event judging enabled).",
|
|
248
|
+
UserWarning,
|
|
249
|
+
stacklevel=2,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
if weights.outcome > 0 and not options.outcome:
|
|
253
|
+
warnings.warn(
|
|
254
|
+
"[rubric.weights].outcome > 0 but [judge.options].outcome=false. "
|
|
255
|
+
"Outcome judge score will be 0 (no outcome judging enabled).",
|
|
256
|
+
UserWarning,
|
|
257
|
+
stacklevel=2,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
return rubric_config, judge_config
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
# Helper to check if config has any deprecated fields (for testing/migration)
|
|
264
|
+
|
|
265
|
+
def check_for_deprecated_fields(toml_config: MutableMapping[str, Any]) -> dict[str, list[str]]:
|
|
266
|
+
"""
|
|
267
|
+
Check TOML config for deprecated fields without validation.
|
|
268
|
+
|
|
269
|
+
Returns dict of {section: [deprecated_field_names]} for reporting.
|
|
270
|
+
"""
|
|
271
|
+
deprecated: dict[str, list[str]] = {}
|
|
272
|
+
|
|
273
|
+
rubric_dict = toml_config.get("rubric", {})
|
|
274
|
+
if rubric_dict:
|
|
275
|
+
found = [
|
|
276
|
+
field for field in DEPRECATED_RUBRIC_FIELDS
|
|
277
|
+
if field in rubric_dict
|
|
278
|
+
]
|
|
279
|
+
if "event" in rubric_dict:
|
|
280
|
+
found.append("event (entire section)")
|
|
281
|
+
if "outcome" in rubric_dict:
|
|
282
|
+
found.append("outcome (entire section)")
|
|
283
|
+
if found:
|
|
284
|
+
deprecated["rubric"] = found
|
|
285
|
+
|
|
286
|
+
judge_dict = toml_config.get("judge", {})
|
|
287
|
+
if judge_dict:
|
|
288
|
+
found = [
|
|
289
|
+
field for field in DEPRECATED_JUDGE_FIELDS
|
|
290
|
+
if field in judge_dict
|
|
291
|
+
]
|
|
292
|
+
if found:
|
|
293
|
+
deprecated["judge"] = found
|
|
294
|
+
|
|
295
|
+
options_dict = judge_dict.get("options", {})
|
|
296
|
+
if options_dict:
|
|
297
|
+
options_found = [
|
|
298
|
+
field for field in DEPRECATED_JUDGE_OPTIONS_FIELDS
|
|
299
|
+
if field in options_dict
|
|
300
|
+
]
|
|
301
|
+
if options_found:
|
|
302
|
+
deprecated["judge.options"] = options_found
|
|
303
|
+
|
|
304
|
+
return deprecated
|