synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -2
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +6 -6
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +79 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +6 -6
- examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +33 -3
- examples/swe/task_app/grpo_swe_mini.py +4 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +155 -17
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +61 -69
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +21 -0
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +110 -1499
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/http.py +8 -22
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +4 -5
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +4 -2
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +294 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/tui.py +0 -57
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -417,8 +417,6 @@ async def step_policy(
|
|
|
417
417
|
inf_req = meta["inference_request"]
|
|
418
418
|
msgs = inf_req["messages"]
|
|
419
419
|
model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
|
|
420
|
-
system_messages: list[str] = []
|
|
421
|
-
user_messages: list[str] = []
|
|
422
420
|
if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
|
|
423
421
|
sys_text = msgs[0]["content"]
|
|
424
422
|
policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
|
|
@@ -464,6 +462,8 @@ async def step_policy(
|
|
|
464
462
|
)
|
|
465
463
|
|
|
466
464
|
# Emit full system/user prompts for observability (no secrets included)
|
|
465
|
+
system_prompt_records: list[dict[str, Any]] = []
|
|
466
|
+
user_prompt_records: list[dict[str, Any]] = []
|
|
467
467
|
try:
|
|
468
468
|
|
|
469
469
|
def _as_text(content: object) -> str:
|
|
@@ -483,8 +483,6 @@ async def step_policy(
|
|
|
483
483
|
return "".join(parts)
|
|
484
484
|
return str(content)
|
|
485
485
|
|
|
486
|
-
system_prompt_records: list[dict[str, Any]] = []
|
|
487
|
-
user_prompt_records: list[dict[str, Any]] = []
|
|
488
486
|
for message in msgs:
|
|
489
487
|
role = message.get("role")
|
|
490
488
|
raw_content = message.get("content")
|
|
@@ -527,6 +525,11 @@ async def step_policy(
|
|
|
527
525
|
|
|
528
526
|
if tracing_context is not None:
|
|
529
527
|
try:
|
|
528
|
+
logger.info(
|
|
529
|
+
"[TRACE_DEBUG] record_policy_prompts sys=%s user=%s",
|
|
530
|
+
len(system_prompt_records),
|
|
531
|
+
len(user_prompt_records),
|
|
532
|
+
)
|
|
530
533
|
await tracing_context.record_policy_prompts(
|
|
531
534
|
system_prompt_records, user_prompt_records
|
|
532
535
|
)
|
|
@@ -782,9 +785,10 @@ async def step_policy(
|
|
|
782
785
|
"sokoban-react",
|
|
783
786
|
"crafter-react",
|
|
784
787
|
) and getattr(policy, "use_tools", True):
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
+
inf_req = meta.get("inference_request", {})
|
|
789
|
+
req_tools = inf_req.get("tools")
|
|
790
|
+
req_tool_choice = inf_req.get("tool_choice")
|
|
791
|
+
req_stop_after = inf_req.get("stop_after_tool_calls")
|
|
788
792
|
logger.info(
|
|
789
793
|
f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
|
|
790
794
|
)
|
|
@@ -793,6 +797,8 @@ async def step_policy(
|
|
|
793
797
|
status_code=500,
|
|
794
798
|
detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
|
|
795
799
|
)
|
|
800
|
+
if req_stop_after is None:
|
|
801
|
+
inf_req["stop_after_tool_calls"] = 1
|
|
796
802
|
|
|
797
803
|
# Call inference service with retries for Flash cold-start (503)
|
|
798
804
|
import time as _t
|
|
@@ -901,38 +907,71 @@ async def step_policy(
|
|
|
901
907
|
req_body["temperature"] = 0.1
|
|
902
908
|
meta["inference_request"] = req_body
|
|
903
909
|
|
|
904
|
-
#
|
|
905
|
-
#
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
910
|
+
# Message flattening: Convert multimodal content to text-only for non-vision models.
|
|
911
|
+
# SKIP message flattening for vision models to preserve image_url parts!
|
|
912
|
+
# The old code here was flattening multimodal content (list) to text-only (str),
|
|
913
|
+
# which strips out image_url parts. This breaks vision models.
|
|
914
|
+
# Only flatten for non-vision models that can't handle multimodal format.
|
|
915
|
+
is_vision_model = False
|
|
916
|
+
try:
|
|
917
|
+
# Check if the policy is a vision-capable policy
|
|
918
|
+
if isinstance(policy, CrafterPolicy):
|
|
919
|
+
is_vision_model = getattr(policy, "use_vision", False)
|
|
920
|
+
except Exception:
|
|
921
|
+
pass
|
|
922
|
+
|
|
923
|
+
logger.debug(f"🔊 [POLICY_ROUTES] is_vision_model={is_vision_model}, will_flatten={not is_vision_model}")
|
|
924
|
+
|
|
925
|
+
if not is_vision_model:
|
|
926
|
+
# Only flatten for non-vision models (backward compatibility)
|
|
927
|
+
req_body2 = meta.get("inference_request", {})
|
|
928
|
+
if isinstance(req_body2, dict):
|
|
929
|
+
msgs = req_body2.get("messages")
|
|
930
|
+
if isinstance(msgs, list):
|
|
931
|
+
new_msgs = []
|
|
932
|
+
changed = False
|
|
933
|
+
for m in msgs:
|
|
934
|
+
try:
|
|
935
|
+
if isinstance(m, dict):
|
|
936
|
+
content = m.get("content")
|
|
937
|
+
if isinstance(content, list):
|
|
938
|
+
parts: list[str] = []
|
|
939
|
+
for seg in content:
|
|
940
|
+
if isinstance(seg, dict):
|
|
941
|
+
txt = seg.get("text") or seg.get("content")
|
|
942
|
+
if isinstance(txt, str) and txt:
|
|
943
|
+
parts.append(txt)
|
|
944
|
+
m2 = dict(m)
|
|
945
|
+
m2["content"] = "\n".join(parts)
|
|
946
|
+
new_msgs.append(m2)
|
|
947
|
+
changed = True
|
|
948
|
+
else:
|
|
949
|
+
new_msgs.append(m)
|
|
927
950
|
else:
|
|
928
951
|
new_msgs.append(m)
|
|
929
|
-
|
|
952
|
+
except Exception:
|
|
930
953
|
new_msgs.append(m)
|
|
931
|
-
|
|
932
|
-
new_msgs
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
954
|
+
if changed:
|
|
955
|
+
req_body2["messages"] = new_msgs
|
|
956
|
+
meta["inference_request"] = req_body2
|
|
957
|
+
logger.debug(f"🔊 [POLICY_ROUTES] Flattened messages for non-vision model")
|
|
958
|
+
else:
|
|
959
|
+
logger.debug(f"🔊 [POLICY_ROUTES] Preserving multimodal content for vision model")
|
|
960
|
+
|
|
961
|
+
# DEBUG: Log final message structure before calling inference
|
|
962
|
+
final_req = meta.get("inference_request", {})
|
|
963
|
+
if isinstance(final_req, dict):
|
|
964
|
+
final_msgs = final_req.get("messages", [])
|
|
965
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Sending {len(final_msgs)} messages to inference")
|
|
966
|
+
for idx, msg in enumerate(final_msgs):
|
|
967
|
+
if isinstance(msg, dict):
|
|
968
|
+
content = msg.get("content")
|
|
969
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}")
|
|
970
|
+
if isinstance(content, list):
|
|
971
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Content list has {len(content)} items")
|
|
972
|
+
for part_idx, part in enumerate(content[:3]): # Show first 3 items
|
|
973
|
+
if isinstance(part, dict):
|
|
974
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Part[{part_idx}]: type={part.get('type')}")
|
|
936
975
|
|
|
937
976
|
_t_start = _t.time()
|
|
938
977
|
call_started_at = datetime.utcnow()
|
|
@@ -491,6 +491,11 @@ class RolloutTracingContext:
|
|
|
491
491
|
getattr(request.record, "trace_format", "compact") or "compact"
|
|
492
492
|
).lower()
|
|
493
493
|
self.return_trace = bool(getattr(request.record, "return_trace", False))
|
|
494
|
+
logger.warning(
|
|
495
|
+
"[TRACE_DEBUG] RolloutTracingContext init: trace_format=%s return_trace=%s",
|
|
496
|
+
self.trace_format,
|
|
497
|
+
self.return_trace,
|
|
498
|
+
)
|
|
494
499
|
self.sft_output_dir = getattr(fastapi_request.app.state, "sft_output_dir", None)
|
|
495
500
|
self.session_trace = None
|
|
496
501
|
self.metadata_updates: dict[str, Any] = {}
|
|
@@ -590,7 +595,7 @@ class RolloutTracingContext:
|
|
|
590
595
|
# Debug: Check message count
|
|
591
596
|
if self.tracer and self.tracer._current_trace:
|
|
592
597
|
msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
|
|
593
|
-
logger.
|
|
598
|
+
logger.warning("[TRACE_DEBUG] After record_policy_prompts: %s messages", msg_count)
|
|
594
599
|
|
|
595
600
|
def _content_to_text(self, content: Any) -> str:
|
|
596
601
|
if isinstance(content, str):
|
|
@@ -669,6 +674,11 @@ class RolloutTracingContext:
|
|
|
669
674
|
message_type="assistant", # Map to standard assistant message type
|
|
670
675
|
metadata={**self._message_metadata(), "is_tool_call": True},
|
|
671
676
|
)
|
|
677
|
+
if self.tracer._current_trace:
|
|
678
|
+
logger.warning(
|
|
679
|
+
"[TRACE_DEBUG] After tool invocation: messages=%s",
|
|
680
|
+
len(self.tracer._current_trace.markov_blanket_message_history),
|
|
681
|
+
)
|
|
672
682
|
except Exception as exc:
|
|
673
683
|
logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
|
|
674
684
|
|
|
@@ -985,10 +995,19 @@ class RolloutTracingContext:
|
|
|
985
995
|
def build_trace_payload(self, session_trace: Any) -> dict[str, Any] | None:
|
|
986
996
|
if not self.return_trace or session_trace is None:
|
|
987
997
|
return None
|
|
988
|
-
|
|
998
|
+
|
|
999
|
+
# For both "full" and "structured" formats, return the complete session trace
|
|
1000
|
+
# The CLI (synth-ai eval) expects this for proper trace storage
|
|
1001
|
+
if self.trace_format in ("full", "structured"):
|
|
989
1002
|
payload = session_trace.to_dict()
|
|
990
1003
|
payload.setdefault("metadata", {}).update(self.metadata_updates)
|
|
1004
|
+
logger.warning(
|
|
1005
|
+
"[TRACE_DEBUG] build_trace_payload returning structured trace with messages=%s",
|
|
1006
|
+
len(payload.get("markov_blanket_message_history") or []),
|
|
1007
|
+
)
|
|
991
1008
|
return payload
|
|
1009
|
+
|
|
1010
|
+
# For "compact" format, return only summary stats
|
|
992
1011
|
metadata = dict(session_trace.metadata)
|
|
993
1012
|
metadata.update(self.metadata_updates)
|
|
994
1013
|
return {
|
|
@@ -1173,14 +1192,6 @@ async def execute_rollout(
|
|
|
1173
1192
|
logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
|
|
1174
1193
|
tracing_context = RolloutTracingContext(tracer_instance, request, req)
|
|
1175
1194
|
await tracing_context.start_session()
|
|
1176
|
-
# Print whether tracing is active for this rollout
|
|
1177
|
-
try:
|
|
1178
|
-
print(
|
|
1179
|
-
f"[rollout] tracing enabled={bool(tracing_context.enabled)} run_id={request.run_id}",
|
|
1180
|
-
flush=True,
|
|
1181
|
-
)
|
|
1182
|
-
except Exception:
|
|
1183
|
-
pass
|
|
1184
1195
|
|
|
1185
1196
|
# Register run
|
|
1186
1197
|
registry.register_run(request.run_id)
|
|
@@ -1625,16 +1636,21 @@ async def execute_rollout(
|
|
|
1625
1636
|
|
|
1626
1637
|
elif op == "env":
|
|
1627
1638
|
if not pending_tool_calls:
|
|
1639
|
+
# Instead of failing, inject a no-op action to keep the rollout going
|
|
1628
1640
|
with contextlib.suppress(Exception):
|
|
1629
1641
|
logger.warning(
|
|
1630
|
-
"
|
|
1642
|
+
"POLICY_STEP_NOOP: missing tool_calls; injecting noop action run_id=%s op_idx=%s",
|
|
1631
1643
|
request.run_id,
|
|
1632
1644
|
str(op_idx),
|
|
1633
1645
|
)
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1646
|
+
# Create a noop tool call in the format expected by the environment
|
|
1647
|
+
pending_tool_calls = [
|
|
1648
|
+
{
|
|
1649
|
+
"id": f"noop_{op_idx}",
|
|
1650
|
+
"tool": "interact",
|
|
1651
|
+
"arguments": {"action": "noop"},
|
|
1652
|
+
}
|
|
1653
|
+
]
|
|
1638
1654
|
|
|
1639
1655
|
# Environment step
|
|
1640
1656
|
from .environment_routes import EnvStepRequest, step_environment
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
This mirrors the structure of the Crafter task app wrapper while delegating
|
|
4
4
|
all configuration to the colocated `grpo_enron.py` module. Normal usage should
|
|
5
|
-
prefer invoking `uvx synth-ai
|
|
5
|
+
prefer invoking `uvx synth-ai deploy --runtime uvicorn grpo-enron`, but this module remains for
|
|
6
6
|
direct execution or importing the FastAPI app object.
|
|
7
7
|
"""
|
|
8
8
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This directory hosts the legacy entrypoint for the math single-step task app. Prefer starting the app via:
|
|
4
4
|
|
|
5
5
|
```bash
|
|
6
|
-
uvx synth-ai
|
|
6
|
+
uvx synth-ai deploy --runtime uvicorn math-single-step --env-file examples/rl/.env --port 8101
|
|
7
7
|
```
|
|
8
8
|
|
|
9
9
|
If you need to run it directly (e.g., for Modal `modal deploy` compatibility), use:
|
|
@@ -19,4 +19,3 @@ Environment variables:
|
|
|
19
19
|
- `MATH_DATASET_DEFAULT_SPLIT`, `MATH_DATASET_VALIDATION_SPLIT`, `MATH_DATASET_TEST_SPLIT`
|
|
20
20
|
|
|
21
21
|
The task app enforces a single `math_submit` tool call per episode, enabling RL to reward correct final answers and penalise missing or malformed submissions.
|
|
22
|
-
|
|
@@ -17,7 +17,7 @@ A reinforcement learning environment for Pokémon Red using PyBoy emulation with
|
|
|
17
17
|
|
|
18
18
|
```bash
|
|
19
19
|
# From synth-ai root
|
|
20
|
-
uv run -m synth_ai task-app
|
|
20
|
+
uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913
|
|
21
21
|
```
|
|
22
22
|
|
|
23
23
|
### 2. Run a Random Rollout
|
|
@@ -232,7 +232,7 @@ uv add pyboy
|
|
|
232
232
|
lsof -ti :8913 | xargs -r kill -9
|
|
233
233
|
|
|
234
234
|
# Or use a different port
|
|
235
|
-
uv run -m synth_ai task-app
|
|
235
|
+
uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8914
|
|
236
236
|
```
|
|
237
237
|
|
|
238
238
|
## Examples
|
|
@@ -249,7 +249,7 @@ cd /Users/joshpurtell/Documents/GitHub/synth-ai
|
|
|
249
249
|
echo "OPENAI_API_KEY=sk-..." >> .env
|
|
250
250
|
|
|
251
251
|
# 2. Start the task app server (in background)
|
|
252
|
-
nohup sh -c 'printf "n\n" | uv run -m synth_ai task-app
|
|
252
|
+
nohup sh -c 'printf "n\n" | uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913 --no-reload' > nohup_pokemon.log 2>&1 &
|
|
253
253
|
|
|
254
254
|
# Wait for startup
|
|
255
255
|
sleep 8
|
|
@@ -354,4 +354,3 @@ TOTAL REWARD: 705 points
|
|
|
354
354
|
- **PyBoy**: Game Boy emulator - https://github.com/Baekalfen/PyBoy
|
|
355
355
|
- **Pokémon Red Disassembly**: RAM map reference - https://github.com/pret/pokered
|
|
356
356
|
- **Datacrystal.org**: Memory address documentation
|
|
357
|
-
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
# Evaluation config for Pokemon Red with image-only input
|
|
1
|
+
# Evaluation config for Pokemon Red with image-only input and NEW REWARD SYSTEM
|
|
2
2
|
# This config uses GPT-4o mini with only image data (no text observations)
|
|
3
|
+
# Uses the comprehensive reward system with deterministic progress milestones
|
|
3
4
|
|
|
4
5
|
[eval]
|
|
5
6
|
app_id = "pokemon_red"
|
|
6
7
|
model = "gpt-4o-mini-2024-07-18"
|
|
7
|
-
seeds = [0, 1, 2, 3, 4
|
|
8
|
-
max_turns =
|
|
8
|
+
seeds = [0, 1, 2, 3, 4] # Test with fewer seeds for quick results
|
|
9
|
+
max_turns = 20 # Allow more turns to see progress
|
|
9
10
|
concurrency = 1 # Keep low initially to avoid issues
|
|
10
11
|
env_name = "pokemon_red"
|
|
11
12
|
policy_name = "pokemon_red_policy"
|
|
@@ -13,7 +14,7 @@ trace_format = "full"
|
|
|
13
14
|
return_trace = true
|
|
14
15
|
|
|
15
16
|
[eval.env_config]
|
|
16
|
-
max_steps_per_episode =
|
|
17
|
+
max_steps_per_episode = 20
|
|
17
18
|
|
|
18
19
|
[eval.policy_config]
|
|
19
20
|
provider = "openai"
|
|
@@ -24,6 +25,6 @@ top_p = 0.95
|
|
|
24
25
|
max_tokens = 512
|
|
25
26
|
use_vision = true
|
|
26
27
|
image_only_mode = true
|
|
27
|
-
max_llm_calls =
|
|
28
|
+
max_llm_calls = 20
|
|
28
29
|
|
|
29
30
|
|
|
@@ -129,7 +129,7 @@ async def main():
|
|
|
129
129
|
print("✓ Server is healthy")
|
|
130
130
|
except Exception as e:
|
|
131
131
|
print(f"❌ Server not responding: {e}")
|
|
132
|
-
print(f" Start it with: uv run -m synth_ai task-app
|
|
132
|
+
print(f" Start it with: uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913")
|
|
133
133
|
return
|
|
134
134
|
|
|
135
135
|
# Check API key
|
|
@@ -222,4 +222,3 @@ async def main():
|
|
|
222
222
|
|
|
223
223
|
if __name__ == "__main__":
|
|
224
224
|
asyncio.run(main())
|
|
225
|
-
|
|
@@ -12,7 +12,7 @@ from synth_ai.environments.examples.red.taskset import INSTANCE as RED_DEFAULT_I
|
|
|
12
12
|
from synth_ai.environments.examples.red.engine_helpers.reward_library.pallet_town_progression import (
|
|
13
13
|
PalletTownProgressionCompositeReward,
|
|
14
14
|
)
|
|
15
|
-
from synth_ai.task.apps import TaskAppEntry, register_task_app
|
|
15
|
+
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
16
16
|
from synth_ai.task.contracts import (
|
|
17
17
|
RolloutMetrics,
|
|
18
18
|
RolloutRequest,
|
|
@@ -260,8 +260,10 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
260
260
|
{
|
|
261
261
|
"role": "system",
|
|
262
262
|
"content": (
|
|
263
|
-
"You are controlling Pokémon Red.
|
|
264
|
-
"
|
|
263
|
+
"You are controlling Pokémon Red, a classic Game Boy game. You can see the game screen in the images provided. "
|
|
264
|
+
"Your goal is to make progress in the game. Use the execute_sequence tool to press buttons. "
|
|
265
|
+
"Choose appropriate button presses based on what you see in the game screen. "
|
|
266
|
+
"Always respond with exactly one tool call in the format: <tool_call>{\"name\": \"execute_sequence\", \"arguments\": {...}}</tool_call>"
|
|
265
267
|
),
|
|
266
268
|
},
|
|
267
269
|
{
|
|
@@ -788,11 +790,40 @@ def build_config() -> TaskAppConfig:
|
|
|
788
790
|
register_task_app(
|
|
789
791
|
entry=TaskAppEntry(
|
|
790
792
|
app_id="pokemon_red",
|
|
791
|
-
description="Pokémon Red demo task app",
|
|
793
|
+
description="Pokémon Red demo task app with vision support",
|
|
792
794
|
config_factory=build_config,
|
|
793
795
|
aliases=("pokemon_red_demo",),
|
|
794
796
|
env_files=(),
|
|
795
|
-
modal=
|
|
797
|
+
modal=ModalDeploymentConfig(
|
|
798
|
+
app_name="pokemon-red-vision-task-app",
|
|
799
|
+
python_version="3.11",
|
|
800
|
+
pip_packages=(
|
|
801
|
+
"fastapi>=0.100.0",
|
|
802
|
+
"uvicorn>=0.23.0",
|
|
803
|
+
"pydantic>=2.0.0",
|
|
804
|
+
"numpy>=1.24.0",
|
|
805
|
+
"aiohttp>=3.8.0",
|
|
806
|
+
"httpx>=0.24.0",
|
|
807
|
+
"python-dotenv>=1.0.1",
|
|
808
|
+
# Tracing/DB runtime deps
|
|
809
|
+
"sqlalchemy>=2.0.42",
|
|
810
|
+
"aiosqlite>=0.21.0",
|
|
811
|
+
"greenlet>=3.2.3",
|
|
812
|
+
# Pokemon Red environment
|
|
813
|
+
"pyboy>=2.0.0",
|
|
814
|
+
"pillow>=9.0.0",
|
|
815
|
+
),
|
|
816
|
+
extra_local_dirs=(
|
|
817
|
+
# Mount repo root so local modules resolve when deployed on Modal
|
|
818
|
+
("/Users/joshpurtell/Documents/GitHub/synth-ai", "/opt/synth_ai_repo"),
|
|
819
|
+
("/Users/joshpurtell/Documents/GitHub/synth-ai/synth_ai", "/opt/synth_ai_repo/synth_ai"),
|
|
820
|
+
("/Users/joshpurtell/Documents/GitHub/synth-ai/examples/task_apps/pokemon_red", "/opt/synth_ai_repo/examples/task_apps/pokemon_red"),
|
|
821
|
+
),
|
|
822
|
+
secret_names=("openai-api-key", "groq-api-key"),
|
|
823
|
+
memory=16384,
|
|
824
|
+
cpu=4.0,
|
|
825
|
+
max_containers=10,
|
|
826
|
+
),
|
|
796
827
|
)
|
|
797
828
|
)
|
|
798
829
|
|
|
@@ -20,7 +20,7 @@ Sokoban is a classic puzzle game where the player must push boxes onto target lo
|
|
|
20
20
|
cd /path/to/synth-ai
|
|
21
21
|
|
|
22
22
|
# Start the Sokoban task app on port 8911
|
|
23
|
-
uvx synth-ai task-app
|
|
23
|
+
uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
|
|
24
24
|
```
|
|
25
25
|
|
|
26
26
|
The server will be available at `http://localhost:8911`.
|
|
@@ -283,7 +283,7 @@ lsof -i :8911
|
|
|
283
283
|
kill -9 $(lsof -ti :8911)
|
|
284
284
|
|
|
285
285
|
# Restart
|
|
286
|
-
uvx synth-ai task-app
|
|
286
|
+
uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
|
|
287
287
|
```
|
|
288
288
|
|
|
289
289
|
## Examples
|
|
@@ -304,4 +304,3 @@ To add new features:
|
|
|
304
304
|
## License
|
|
305
305
|
|
|
306
306
|
MIT
|
|
307
|
-
|
|
@@ -1,24 +1,22 @@
|
|
|
1
1
|
# Verilog Eval Config for Groq Qwen3-32B
|
|
2
|
-
# Quick eval to test Verilog task app before RL training
|
|
3
|
-
|
|
4
|
-
[task_app]
|
|
5
|
-
# Update this with your Modal URL after deployment
|
|
6
|
-
url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
|
|
2
|
+
# Quick eval to test the Verilog task app before RL training
|
|
7
3
|
|
|
8
4
|
[eval]
|
|
9
|
-
|
|
5
|
+
app_id = "grpo-verilog"
|
|
6
|
+
task_app_url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
|
|
7
|
+
model = "groq:qwen3-32b"
|
|
10
8
|
seeds = [0, 1, 2]
|
|
11
|
-
|
|
9
|
+
max_turns = 15
|
|
10
|
+
concurrency = 1
|
|
11
|
+
return_trace = true
|
|
12
|
+
trace_format = "structured"
|
|
13
|
+
|
|
14
|
+
[eval.env_config]
|
|
15
|
+
difficulty = "medium"
|
|
12
16
|
|
|
13
|
-
[
|
|
17
|
+
[eval.policy_config]
|
|
14
18
|
provider = "groq"
|
|
15
19
|
model = "qwen/qwen3-32b"
|
|
16
20
|
temperature = 0.2
|
|
17
21
|
max_tokens = 768
|
|
18
22
|
inference_url = "https://api.groq.com/openai/v1/chat/completions"
|
|
19
|
-
|
|
20
|
-
[env]
|
|
21
|
-
difficulty = "medium" # Can be "easy", "medium", or "hard"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Compatibility wrapper for the GRPO Verilog task app.
|
|
2
2
|
|
|
3
3
|
This mirrors the Crafter task app wrapper while delegating configuration to
|
|
4
|
-
`grpo_verilog.py`. Normal usage should prefer `uvx synth-ai
|
|
4
|
+
`grpo_verilog.py`. Normal usage should prefer `uvx synth-ai deploy --runtime uvicorn grpo-verilog`,
|
|
5
5
|
but the module remains for direct execution or importing the FastAPI app.
|
|
6
6
|
"""
|
|
7
7
|
|
examples/vlm/README.md
CHANGED
|
@@ -21,8 +21,8 @@ plumbing with lightweight utilities for dataset curation and training.
|
|
|
21
21
|
3. **Export multimodal SFT rows**
|
|
22
22
|
```
|
|
23
23
|
uv run python examples/warming_up_to_rl/export_trace_sft.py \
|
|
24
|
-
|
|
25
|
-
--output examples/vlm/output/
|
|
24
|
+
--db traces/v3/task_app_traces_<timestamp>.db \
|
|
25
|
+
--output examples/vlm/output/crafter_sft_full.jsonl
|
|
26
26
|
```
|
|
27
27
|
The exporter now emits `metadata.has_image`, `metadata.user_has_image`, and
|
|
28
28
|
`metadata.assistant_has_image` flags per turn.
|
|
@@ -30,7 +30,7 @@ plumbing with lightweight utilities for dataset curation and training.
|
|
|
30
30
|
4. **Filter to image-rich turns**
|
|
31
31
|
```
|
|
32
32
|
uv run python examples/vlm/filter_image_rows.py \
|
|
33
|
-
--input examples/vlm/output/
|
|
33
|
+
--input examples/vlm/output/crafter_sft_full.jsonl \
|
|
34
34
|
--output examples/vlm/output/crafter_vlm_dataset.jsonl
|
|
35
35
|
```
|
|
36
36
|
|
|
@@ -24,6 +24,7 @@ import asyncio
|
|
|
24
24
|
import base64
|
|
25
25
|
import json
|
|
26
26
|
import os
|
|
27
|
+
from contextlib import suppress
|
|
27
28
|
from pathlib import Path
|
|
28
29
|
from typing import Any
|
|
29
30
|
from uuid import uuid4
|
|
@@ -62,7 +63,7 @@ class EpisodeResult:
|
|
|
62
63
|
if unlocked:
|
|
63
64
|
self.achievements.add(str(name))
|
|
64
65
|
reward = obs.get("reward_last_step")
|
|
65
|
-
if isinstance(reward,
|
|
66
|
+
if isinstance(reward, int | float):
|
|
66
67
|
self.total_reward += float(reward)
|
|
67
68
|
|
|
68
69
|
|
|
@@ -107,11 +108,8 @@ def _decode_and_save_image(observation: dict[str, Any], path: Path) -> None:
|
|
|
107
108
|
if not isinstance(base64_data, str) or not base64_data:
|
|
108
109
|
return
|
|
109
110
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
110
|
-
|
|
111
|
+
with suppress(Exception):
|
|
111
112
|
path.write_bytes(base64.b64decode(base64_data))
|
|
112
|
-
except Exception:
|
|
113
|
-
# Best-effort; corrupted frames should not halt rollout
|
|
114
|
-
pass
|
|
115
113
|
|
|
116
114
|
|
|
117
115
|
def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
|
|
@@ -8,7 +8,7 @@ output now that each record's metadata includes `has_image`, `user_has_image`, a
|
|
|
8
8
|
|
|
9
9
|
Usage:
|
|
10
10
|
uv run python examples/vlm/filter_image_rows.py \
|
|
11
|
-
--input examples/sft/ft_data/
|
|
11
|
+
--input examples/sft/ft_data/crafter_sft.jsonl \
|
|
12
12
|
--output examples/vlm/output/crafter_vlm_dataset.jsonl
|
|
13
13
|
"""
|
|
14
14
|
|
|
@@ -224,7 +224,7 @@ async def _run_episode(
|
|
|
224
224
|
if unlocked:
|
|
225
225
|
achievements.add(str(name))
|
|
226
226
|
reward = obs.get("reward_last_step")
|
|
227
|
-
if isinstance(reward,
|
|
227
|
+
if isinstance(reward, int | float):
|
|
228
228
|
total_reward += float(reward)
|
|
229
229
|
|
|
230
230
|
_save_observation_frame(env_response, frames_dir / f"step_{step_idx + 1:03d}.png")
|
|
@@ -263,7 +263,7 @@ def _summarise(results: list[EpisodeResult]) -> dict[str, Any]:
|
|
|
263
263
|
"mean_steps": round(mean_steps, 2),
|
|
264
264
|
"mean_achievements": round(mean_achievements, 2),
|
|
265
265
|
"total_tool_calls": sum(r.tool_calls for r in mode_results),
|
|
266
|
-
"achievements":
|
|
266
|
+
"achievements": dict(sorted(achievement_counts.items())),
|
|
267
267
|
}
|
|
268
268
|
return summary
|
|
269
269
|
|