synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +190 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
- examples/qwen_coder/configs/coder_lora_small.toml +2 -1
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +154 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +275 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +423 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
- examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +62 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +1 -1
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +37 -0
- examples/rl/configs/rl_from_base_qwen17.toml +76 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +22 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/sft/README.md +5 -5
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
- examples/sft/evaluate.py +4 -4
- examples/sft/export_dataset.py +7 -4
- examples/sft/generate_traces.py +2 -0
- examples/swe/task_app/README.md +1 -1
- examples/swe/task_app/grpo_swe_mini.py +1 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +2 -8
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/__init__.py +3 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
- examples/task_apps/pokemon_red/task_app.py +199 -6
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/groq_test.py +2 -0
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/run_rollout_remote.py +2 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +145 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/cli.py +30 -7
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/cli/__init__.py +66 -49
- synth_ai/cli/_modal_wrapper.py +9 -6
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/recent.py +1 -0
- synth_ai/cli/setup.py +266 -0
- synth_ai/cli/task_app_deploy.py +16 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +16 -0
- synth_ai/cli/task_app_serve.py +18 -0
- synth_ai/cli/task_apps.py +392 -141
- synth_ai/cli/train.py +18 -0
- synth_ai/cli/tui.py +62 -0
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/verilog/engine.py +76 -10
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/inference/client.py +1 -1
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +1 -1
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/task/__init__.py +11 -1
- synth_ai/task/apps/__init__.py +5 -2
- synth_ai/task/config.py +259 -0
- synth_ai/task/contracts.py +15 -2
- synth_ai/task/rubrics/__init__.py +4 -2
- synth_ai/task/rubrics/loaders.py +27 -4
- synth_ai/task/rubrics/scoring.py +3 -0
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +145 -2
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/session_tracer.py +10 -0
- synth_ai/tracing_v3/turso/daemon.py +2 -2
- synth_ai/tracing_v3/turso/native_manager.py +108 -77
- synth_ai/tracing_v3/utils.py +1 -1
- synth_ai/tui/__init__.py +5 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/cli/__init__.py +1 -0
- synth_ai/tui/cli/query_experiments.py +164 -0
- synth_ai/tui/cli/query_experiments_v3.py +164 -0
- synth_ai/tui/dashboard.py +911 -0
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +287 -0
- synth_ai/utils/http.py +169 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
- synth_ai/cli/man.py +0 -106
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/http.py +0 -26
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Task app registry entry for the math demo Modal deployment."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contextlib import suppress
|
|
6
|
+
from importlib import import_module
|
|
7
|
+
|
|
8
|
+
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from synth_ai.task.apps.math_single_step import build_config as base_build_config
|
|
12
|
+
except ModuleNotFoundError:
|
|
13
|
+
base_module = import_module("examples.rl.task_app.math_single_step")
|
|
14
|
+
base_build_config = base_module.build_config
|
|
15
|
+
|
|
16
|
+
DEMO_MODAL_CONFIG = ModalDeploymentConfig(
|
|
17
|
+
app_name="hendrycks-math-task-app",
|
|
18
|
+
pip_packages=(
|
|
19
|
+
"fastapi>=0.110.0",
|
|
20
|
+
"uvicorn>=0.23.0",
|
|
21
|
+
"pydantic>=2.6.0",
|
|
22
|
+
"httpx>=0.24.0",
|
|
23
|
+
"numpy>=1.24.0",
|
|
24
|
+
"aiohttp>=3.8.0",
|
|
25
|
+
"datasets>=2.16.0",
|
|
26
|
+
"synth-ai",
|
|
27
|
+
),
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def build_config():
|
|
32
|
+
"""Reuse the shared math single-step TaskAppConfig."""
|
|
33
|
+
|
|
34
|
+
return base_build_config()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def register_demo_entry() -> None:
|
|
38
|
+
entry = TaskAppEntry(
|
|
39
|
+
app_id="hendrycks-math-demo",
|
|
40
|
+
description="Demo math task app (Modal-focused) shipping with synth-ai demos.",
|
|
41
|
+
config_factory=build_config,
|
|
42
|
+
modal=DEMO_MODAL_CONFIG,
|
|
43
|
+
)
|
|
44
|
+
with suppress(ValueError):
|
|
45
|
+
register_task_app(entry=entry)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
register_demo_entry()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
__all__ = ["DEMO_MODAL_CONFIG", "build_config", "register_demo_entry"]
|
|
@@ -397,7 +397,7 @@ class CrafterClassicEnvironment(StatefulEnvironment, ReproducibleEnvironment[Cra
|
|
|
397
397
|
priv_state, pub_state, self.custom_step_observation_callable
|
|
398
398
|
)
|
|
399
399
|
total_step_time = time.time() - step_start_time
|
|
400
|
-
logger.
|
|
400
|
+
logger.debug(
|
|
401
401
|
f"CrafterClassic step completed in {total_step_time:.3f}s (interact: {interact_time:.3f}s)"
|
|
402
402
|
)
|
|
403
403
|
return obs
|
|
@@ -46,7 +46,7 @@ class VerilogCompileSuccessComponent(RewardComponent):
|
|
|
46
46
|
if hasattr(action, "get") and action.get("type") == "compile":
|
|
47
47
|
# Check if compilation was successful (returncode 0)
|
|
48
48
|
if action.get("returncode") == 0:
|
|
49
|
-
return 0.1
|
|
49
|
+
return 0.01 # Normalized: 0.1 / 10.0 = 0.01
|
|
50
50
|
return 0.0
|
|
51
51
|
|
|
52
52
|
|
|
@@ -55,12 +55,12 @@ class VerilogSimulationPassComponent(RewardComponent):
|
|
|
55
55
|
if hasattr(action, "get") and action.get("type") == "simulate":
|
|
56
56
|
# Check if simulation passed
|
|
57
57
|
if action.get("passed", False):
|
|
58
|
-
return 1.0
|
|
58
|
+
return 0.1 # Normalized: 1.0 / 10.0 = 0.1
|
|
59
59
|
return 0.0
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
class VerilogStepPenaltyComponent(RewardComponent):
|
|
63
|
-
def __init__(self, penalty: float =
|
|
63
|
+
def __init__(self, penalty: float = 0.0): # No per-step reward - only reward accomplishments
|
|
64
64
|
self.penalty = penalty
|
|
65
65
|
|
|
66
66
|
async def score(self, state: Any, action: Any) -> float:
|
|
@@ -68,12 +68,12 @@ class VerilogStepPenaltyComponent(RewardComponent):
|
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
class VerilogSubmitSuccessComponent(RewardComponent):
|
|
71
|
-
"""Reward for successful submission (tests passed)."""
|
|
71
|
+
"""Reward for successful submission (tests passed). Max reward = 1.0 (normalized)."""
|
|
72
72
|
async def score(self, state: VerilogPublicState, action: Any) -> float:
|
|
73
73
|
if hasattr(action, "get") and action.get("type") == "submit":
|
|
74
74
|
# Check if submission passed
|
|
75
75
|
if action.get("passed", False):
|
|
76
|
-
return
|
|
76
|
+
return 1.0 # Normalized: Maximum reward is now 1.0
|
|
77
77
|
return 0.0
|
|
78
78
|
|
|
79
79
|
|
|
@@ -83,6 +83,9 @@ class VerilogEngine(StatefulEngine):
|
|
|
83
83
|
"""
|
|
84
84
|
|
|
85
85
|
def __init__(self, task_instance: TaskInstance):
|
|
86
|
+
# Validate required Verilog tools are available
|
|
87
|
+
self._validate_verilog_tools()
|
|
88
|
+
|
|
86
89
|
self.task_instance = task_instance
|
|
87
90
|
self._total_reward = 0.0
|
|
88
91
|
self._current_action_for_reward: Optional[Dict[str, Any]] = None
|
|
@@ -92,7 +95,7 @@ class VerilogEngine(StatefulEngine):
|
|
|
92
95
|
VerilogCompileSuccessComponent(),
|
|
93
96
|
VerilogSimulationPassComponent(),
|
|
94
97
|
VerilogSubmitSuccessComponent(),
|
|
95
|
-
VerilogStepPenaltyComponent(penalty
|
|
98
|
+
VerilogStepPenaltyComponent(penalty=0.0), # No per-step reward
|
|
96
99
|
]
|
|
97
100
|
)
|
|
98
101
|
|
|
@@ -103,6 +106,39 @@ class VerilogEngine(StatefulEngine):
|
|
|
103
106
|
# Track last compile/simulate outputs
|
|
104
107
|
self._last_compile_output: Optional[str] = None
|
|
105
108
|
self._last_simulate_output: Optional[str] = None
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def _validate_verilog_tools() -> None:
|
|
112
|
+
"""Validate that required Verilog tools (iverilog, vvp) are available."""
|
|
113
|
+
missing_tools = []
|
|
114
|
+
|
|
115
|
+
if not shutil.which("iverilog"):
|
|
116
|
+
missing_tools.append("iverilog")
|
|
117
|
+
if not shutil.which("vvp"):
|
|
118
|
+
missing_tools.append("vvp")
|
|
119
|
+
|
|
120
|
+
if missing_tools:
|
|
121
|
+
error_msg = (
|
|
122
|
+
f"🚨🚨🚨 CRITICAL CONFIGURATION ERROR 🚨🚨🚨\n"
|
|
123
|
+
f"\n"
|
|
124
|
+
f"Missing required Verilog tools: {', '.join(missing_tools)}\n"
|
|
125
|
+
f"\n"
|
|
126
|
+
f"The Verilog environment CANNOT function without these tools.\n"
|
|
127
|
+
f"ALL compile/simulate operations will FAIL.\n"
|
|
128
|
+
f"ALL rewards will be ZERO.\n"
|
|
129
|
+
f"Training or evaluation will be COMPLETELY BROKEN.\n"
|
|
130
|
+
f"\n"
|
|
131
|
+
f"🔧 FIX THIS NOW:\n"
|
|
132
|
+
f"1. Add 'iverilog' to apt_packages in Modal deployment config\n"
|
|
133
|
+
f"2. Location: examples/task_apps/verilog/task_app/grpo_verilog.py\n"
|
|
134
|
+
f"3. Look for: modal=ModalDeploymentConfig(\n"
|
|
135
|
+
f"4. Add: apt_packages=('iverilog',) # Provides both iverilog and vvp\n"
|
|
136
|
+
f"5. Redeploy: uvx synth-ai modal-serve grpo-verilog\n"
|
|
137
|
+
f"\n"
|
|
138
|
+
f"{'='*80}"
|
|
139
|
+
)
|
|
140
|
+
print(f"\n{'='*80}\n{error_msg}\n{'='*80}\n", flush=True)
|
|
141
|
+
raise RuntimeError(error_msg)
|
|
106
142
|
|
|
107
143
|
async def _reset_engine(
|
|
108
144
|
self, *, seed: Optional[int] = None
|
|
@@ -133,6 +169,13 @@ class VerilogEngine(StatefulEngine):
|
|
|
133
169
|
) -> Tuple[VerilogPrivateState, VerilogPublicState]:
|
|
134
170
|
"""Process an action result and update engine state."""
|
|
135
171
|
self._current_action_for_reward = action_result
|
|
172
|
+
|
|
173
|
+
# DEBUG: Print action_result
|
|
174
|
+
print(f"\n[ENGINE DEBUG] _step_engine called")
|
|
175
|
+
print(f" action_result: {action_result}")
|
|
176
|
+
print(f" action_result.type: {action_result.get('type')}")
|
|
177
|
+
print(f" action_result.returncode: {action_result.get('returncode')}")
|
|
178
|
+
print(f" action_result.ok: {action_result.get('ok')}")
|
|
136
179
|
|
|
137
180
|
# Update last outputs if this is a compile or simulate action
|
|
138
181
|
if action_result.get("type") == "compile":
|
|
@@ -147,18 +190,21 @@ class VerilogEngine(StatefulEngine):
|
|
|
147
190
|
current_pub_state = VerilogPublicState(
|
|
148
191
|
files=self._get_file_contents(),
|
|
149
192
|
build_dir=str(self.build_dir),
|
|
150
|
-
task_completed=action_result.get("passed", False),
|
|
193
|
+
task_completed=action_result.get("submitted", False) and action_result.get("passed", False),
|
|
151
194
|
)
|
|
152
195
|
|
|
153
196
|
reward_from_stack = await self.reward_stack.step_reward(
|
|
154
197
|
state=current_pub_state, action=self._current_action_for_reward
|
|
155
198
|
)
|
|
156
199
|
self._current_action_for_reward = None
|
|
200
|
+
|
|
201
|
+
# DEBUG: Print reward
|
|
202
|
+
print(f"[ENGINE DEBUG] reward_from_stack: {reward_from_stack}")
|
|
157
203
|
|
|
158
204
|
self._total_reward += reward_from_stack
|
|
159
205
|
|
|
160
|
-
# Check termination conditions
|
|
161
|
-
terminated = action_result.get("
|
|
206
|
+
# Check termination conditions - only terminate if submitted (regardless of pass/fail)
|
|
207
|
+
terminated = action_result.get("submitted", False)
|
|
162
208
|
|
|
163
209
|
priv = VerilogPrivateState(
|
|
164
210
|
reward_last=reward_from_stack,
|
|
@@ -170,7 +216,7 @@ class VerilogEngine(StatefulEngine):
|
|
|
170
216
|
pub = VerilogPublicState(
|
|
171
217
|
files=self._get_file_contents(),
|
|
172
218
|
build_dir=str(self.build_dir),
|
|
173
|
-
task_completed=action_result.get("passed", False),
|
|
219
|
+
task_completed=action_result.get("submitted", False) and action_result.get("passed", False),
|
|
174
220
|
last_compile_output=self._last_compile_output,
|
|
175
221
|
last_simulate_output=self._last_simulate_output,
|
|
176
222
|
)
|
|
@@ -259,6 +305,16 @@ class VerilogEngine(StatefulEngine):
|
|
|
259
305
|
}
|
|
260
306
|
except subprocess.TimeoutExpired:
|
|
261
307
|
return {"ok": False, "error": "Compilation timeout", "type": "compile"}
|
|
308
|
+
except FileNotFoundError:
|
|
309
|
+
error_msg = (
|
|
310
|
+
"🚨 CRITICAL ERROR: 'iverilog' executable not found! 🚨\n"
|
|
311
|
+
"The Verilog compiler (iverilog) is not installed in this environment.\n"
|
|
312
|
+
"This will cause ALL compile operations to fail and result in ZERO rewards.\n"
|
|
313
|
+
"Fix: Add 'iverilog' to apt_packages in the Modal deployment config.\n"
|
|
314
|
+
"Location: examples/task_apps/verilog/task_app/grpo_verilog.py -> modal=ModalDeploymentConfig(apt_packages=('iverilog',))"
|
|
315
|
+
)
|
|
316
|
+
print(f"\n{'='*80}\n{error_msg}\n{'='*80}\n", flush=True)
|
|
317
|
+
raise RuntimeError(error_msg) from None
|
|
262
318
|
except Exception as e:
|
|
263
319
|
return {"ok": False, "error": str(e), "type": "compile"}
|
|
264
320
|
|
|
@@ -290,6 +346,16 @@ class VerilogEngine(StatefulEngine):
|
|
|
290
346
|
}
|
|
291
347
|
except subprocess.TimeoutExpired:
|
|
292
348
|
return {"ok": False, "error": "Simulation timeout", "type": "simulate"}
|
|
349
|
+
except FileNotFoundError:
|
|
350
|
+
error_msg = (
|
|
351
|
+
"🚨 CRITICAL ERROR: 'vvp' executable not found! 🚨\n"
|
|
352
|
+
"The Verilog simulator (vvp) is not installed in this environment.\n"
|
|
353
|
+
"This will cause ALL simulate operations to fail and result in ZERO rewards.\n"
|
|
354
|
+
"Fix: Add 'iverilog' to apt_packages in the Modal deployment config (provides both iverilog and vvp).\n"
|
|
355
|
+
"Location: examples/task_apps/verilog/task_app/grpo_verilog.py -> modal=ModalDeploymentConfig(apt_packages=('iverilog',))"
|
|
356
|
+
)
|
|
357
|
+
print(f"\n{'='*80}\n{error_msg}\n{'='*80}\n", flush=True)
|
|
358
|
+
raise RuntimeError(error_msg) from None
|
|
293
359
|
except Exception as e:
|
|
294
360
|
return {"ok": False, "error": str(e), "type": "simulate"}
|
|
295
361
|
|
synth_ai/evals/base.py
CHANGED
|
@@ -1,13 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
1
7
|
class Judgement:
|
|
2
8
|
def __init__(
|
|
3
|
-
self,
|
|
4
|
-
|
|
9
|
+
self,
|
|
10
|
+
criteria: str,
|
|
11
|
+
score: float,
|
|
12
|
+
reasoning: str = "",
|
|
13
|
+
evidence: list[str] | None = None,
|
|
14
|
+
) -> None:
|
|
5
15
|
self.criteria = criteria
|
|
6
16
|
self.score = score
|
|
7
17
|
self.reasoning = reasoning
|
|
8
18
|
self.evidence = evidence or []
|
|
9
19
|
|
|
10
20
|
|
|
11
|
-
class BaseEval:
|
|
12
|
-
|
|
13
|
-
|
|
21
|
+
class BaseEval(ABC):
|
|
22
|
+
@abstractmethod
|
|
23
|
+
async def run(self, data: Any) -> list[Judgement]:
|
|
24
|
+
"""Execute the evaluation and return a list of judgements."""
|
synth_ai/evals/client.py
CHANGED
|
@@ -10,7 +10,7 @@ import os
|
|
|
10
10
|
import warnings
|
|
11
11
|
from typing import Any, Literal, TypedDict
|
|
12
12
|
|
|
13
|
-
from synth_ai.
|
|
13
|
+
from synth_ai.http_client import AsyncHttpClient, HTTPError
|
|
14
14
|
from synth_ai.tracing_v3.serialization import normalize_for_json
|
|
15
15
|
|
|
16
16
|
Provider = Literal["groq", "gemini"]
|
synth_ai/inference/client.py
CHANGED
synth_ai/learning/client.py
CHANGED
|
@@ -11,7 +11,7 @@ from synth_ai.api.models.supported import (
|
|
|
11
11
|
)
|
|
12
12
|
from synth_ai.learning.sft.config import prepare_sft_job_payload
|
|
13
13
|
|
|
14
|
-
from ..http import AsyncHttpClient, HTTPError, sleep
|
|
14
|
+
from .._utils.http import AsyncHttpClient, HTTPError, sleep
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class LearningClient:
|
synth_ai/learning/health.py
CHANGED
synth_ai/learning/jobs.py
CHANGED
|
@@ -5,7 +5,7 @@ from collections.abc import Callable
|
|
|
5
5
|
from contextlib import suppress
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
-
from ..http import AsyncHttpClient, sleep
|
|
8
|
+
from .._utils.http import AsyncHttpClient, sleep
|
|
9
9
|
from .constants import TERMINAL_EVENT_FAILURE, TERMINAL_EVENT_SUCCESS, TERMINAL_STATUSES
|
|
10
10
|
|
|
11
11
|
|
synth_ai/learning/rl/client.py
CHANGED
synth_ai/learning/rl/env_keys.py
CHANGED
synth_ai/learning/rl/secrets.py
CHANGED
synth_ai/learning/sft/client.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from ...http import AsyncHttpClient, HTTPError
|
|
6
|
+
from ..._utils.http import AsyncHttpClient, HTTPError
|
|
7
7
|
from .config import prepare_sft_job_payload
|
|
8
8
|
from .data import validate_jsonl_or_raise
|
|
9
9
|
|