synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +60 -2
- synth_ai/api/train/builders.py +347 -39
- synth_ai/api/train/cli.py +895 -160
- synth_ai/api/train/config_finder.py +103 -25
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +70 -20
- synth_ai/api/train/pollers.py +29 -4
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +6 -4
- synth_ai/api/train/utils.py +64 -52
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +85 -63
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +156 -116
- synth_ai/cli/root.py +131 -132
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +2284 -257
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +579 -291
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +50 -30
- synth_ai/task/apps/__init__.py +63 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +165 -64
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +59 -66
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +65 -31
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +44 -28
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +73 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -258
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -107
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/task/apps/grpo_crafter.py +0 -438
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
- synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
- {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py → py.typed} +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Trace hooks for Pokemon Red environment - v3 version.
|
|
3
|
+
Captures reward information and saves to Turso database.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
from synth_ai.tracing_v3.abstractions import BaseEvent, EnvironmentEvent
|
|
10
|
+
from synth_ai.tracing_v3.hooks import HookManager
|
|
11
|
+
|
|
12
|
+
# Pokemon Red achievement categories by reward value
|
|
13
|
+
EXPLORATION_ACHIEVEMENTS = {
|
|
14
|
+
0.02: "explore_new_area",
|
|
15
|
+
0.04: "explore_multiple_areas",
|
|
16
|
+
1.0: "leave_starting_area",
|
|
17
|
+
1.5: "enter_new_city",
|
|
18
|
+
2.0: "explore_new_route",
|
|
19
|
+
5.0: "enter_gym_building",
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
TRAINING_ACHIEVEMENTS = {
|
|
23
|
+
0.2: "pokemon_level_up",
|
|
24
|
+
0.3: "reach_power_level",
|
|
25
|
+
3.0: "pokemon_ready_for_battle",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
BATTLE_ACHIEVEMENTS = {
|
|
29
|
+
0.1: "encounter_wild_pokemon",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
RESOURCE_ACHIEVEMENTS = {
|
|
33
|
+
0.05: "keep_pokemon_healthy",
|
|
34
|
+
0.5: "find_valuable_item",
|
|
35
|
+
0.8: "visit_pokemon_center",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
MAJOR_ACHIEVEMENTS = {
|
|
39
|
+
50.0: "defeat_brock_win_badge",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
async def track_pokemon_rewards(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
|
|
44
|
+
"""Hook that captures detailed Pokemon Red reward information."""
|
|
45
|
+
# Only process EnvironmentEvents
|
|
46
|
+
if not isinstance(event_obj, EnvironmentEvent):
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
reward = event_obj.reward
|
|
50
|
+
if reward is None or reward == 0.0:
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
# Determine achievement type based on reward value
|
|
54
|
+
achievement_type = "unknown"
|
|
55
|
+
achievement_category = "other"
|
|
56
|
+
|
|
57
|
+
# Check each category
|
|
58
|
+
if reward in EXPLORATION_ACHIEVEMENTS:
|
|
59
|
+
achievement_type = EXPLORATION_ACHIEVEMENTS[reward]
|
|
60
|
+
achievement_category = "exploration"
|
|
61
|
+
elif reward in TRAINING_ACHIEVEMENTS:
|
|
62
|
+
achievement_type = TRAINING_ACHIEVEMENTS[reward]
|
|
63
|
+
achievement_category = "training"
|
|
64
|
+
elif reward in BATTLE_ACHIEVEMENTS:
|
|
65
|
+
achievement_type = BATTLE_ACHIEVEMENTS[reward]
|
|
66
|
+
achievement_category = "battle"
|
|
67
|
+
elif reward in RESOURCE_ACHIEVEMENTS:
|
|
68
|
+
achievement_type = RESOURCE_ACHIEVEMENTS[reward]
|
|
69
|
+
achievement_category = "resource"
|
|
70
|
+
elif reward in MAJOR_ACHIEVEMENTS:
|
|
71
|
+
achievement_type = MAJOR_ACHIEVEMENTS[reward]
|
|
72
|
+
achievement_category = "major"
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
"reward_value": reward,
|
|
76
|
+
"achievement_type": achievement_type,
|
|
77
|
+
"achievement_category": achievement_category,
|
|
78
|
+
"timestamp": datetime.now().isoformat(),
|
|
79
|
+
"system_state_before": event_obj.system_state_before,
|
|
80
|
+
"system_state_after": event_obj.system_state_after,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
async def track_pokemon_milestones(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
|
|
85
|
+
"""Hook that tracks significant Pokemon Red milestones."""
|
|
86
|
+
# Only process EnvironmentEvents
|
|
87
|
+
if not isinstance(event_obj, EnvironmentEvent):
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
reward = event_obj.reward
|
|
91
|
+
if reward is None:
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
# Track major milestones
|
|
95
|
+
if reward >= 1.0: # Significant progress rewards
|
|
96
|
+
return {
|
|
97
|
+
"milestone": "major_progress",
|
|
98
|
+
"reward": reward,
|
|
99
|
+
"timestamp": datetime.now().isoformat(),
|
|
100
|
+
}
|
|
101
|
+
elif reward >= 0.5: # Moderate rewards
|
|
102
|
+
return {
|
|
103
|
+
"milestone": "moderate_progress",
|
|
104
|
+
"reward": reward,
|
|
105
|
+
"timestamp": datetime.now().isoformat(),
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
async def track_pokemon_outcomes(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
|
|
112
|
+
"""Hook that tracks episode outcomes for Pokemon Red."""
|
|
113
|
+
# Only process EnvironmentEvents
|
|
114
|
+
if not isinstance(event_obj, EnvironmentEvent):
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
# Check for termination conditions
|
|
118
|
+
if event_obj.terminated or event_obj.truncated:
|
|
119
|
+
total_reward = getattr(event_obj, 'total_reward', 0.0)
|
|
120
|
+
steps_taken = getattr(event_obj, 'step_count', 0)
|
|
121
|
+
|
|
122
|
+
# Extract achievement information from system state
|
|
123
|
+
achievements_count = 0
|
|
124
|
+
if event_obj.system_state_after:
|
|
125
|
+
# Count positive rewards as achievements
|
|
126
|
+
# This is a simplified count - in practice you'd track actual achievements
|
|
127
|
+
achievements_count = max(1, int(total_reward / 0.1)) # Rough estimate
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
"outcome_type": "episode_end",
|
|
131
|
+
"total_reward": total_reward,
|
|
132
|
+
"steps_taken": steps_taken,
|
|
133
|
+
"achievements_count": achievements_count,
|
|
134
|
+
"terminated": event_obj.terminated,
|
|
135
|
+
"truncated": event_obj.truncated,
|
|
136
|
+
"timestamp": datetime.now().isoformat(),
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# Create the global POKEMON_RED_HOOKS instance
|
|
143
|
+
POKEMON_RED_HOOKS = HookManager()
|
|
144
|
+
|
|
145
|
+
# Register all hooks
|
|
146
|
+
POKEMON_RED_HOOKS.register(
|
|
147
|
+
"event_recorded",
|
|
148
|
+
track_pokemon_rewards,
|
|
149
|
+
name="pokemon_rewards",
|
|
150
|
+
priority=10,
|
|
151
|
+
event_types=["environment"],
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
POKEMON_RED_HOOKS.register(
|
|
155
|
+
"event_recorded",
|
|
156
|
+
track_pokemon_milestones,
|
|
157
|
+
name="pokemon_milestones",
|
|
158
|
+
priority=5,
|
|
159
|
+
event_types=["environment"],
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
POKEMON_RED_HOOKS.register(
|
|
163
|
+
"event_recorded",
|
|
164
|
+
track_pokemon_outcomes,
|
|
165
|
+
name="pokemon_outcomes",
|
|
166
|
+
priority=5,
|
|
167
|
+
event_types=["environment"],
|
|
168
|
+
)
|
|
@@ -1,5 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import os
|
|
5
|
+
from collections.abc import Iterable, Sequence
|
|
3
6
|
from dataclasses import asdict, dataclass, fields
|
|
4
7
|
from typing import List, Tuple
|
|
5
8
|
from uuid import UUID, uuid4
|
|
@@ -18,6 +21,7 @@ from synth_ai.environments.tasks.core import (
|
|
|
18
21
|
TaskInstanceMetadataFilter,
|
|
19
22
|
TaskInstanceSet,
|
|
20
23
|
)
|
|
24
|
+
from synth_ai.task.contracts import TaskInfo
|
|
21
25
|
|
|
22
26
|
logger = logging.getLogger(__name__)
|
|
23
27
|
|
|
@@ -96,6 +100,118 @@ class SokobanTaskInstance(TaskInstance):
|
|
|
96
100
|
return cls(**filtered_data)
|
|
97
101
|
|
|
98
102
|
|
|
103
|
+
def _base_task_info_template() -> TaskInfo:
|
|
104
|
+
return TaskInfo(
|
|
105
|
+
task={"id": "sokoban", "name": "Sokoban", "version": "1.0.0"},
|
|
106
|
+
environment="sokoban",
|
|
107
|
+
action_space={
|
|
108
|
+
"type": "tool_call",
|
|
109
|
+
"tools": [{"name": "interact", "schema": {"action": "int"}}],
|
|
110
|
+
"max_calls": 1,
|
|
111
|
+
},
|
|
112
|
+
observation={"summary": "Sokoban grid observation", "keys": ["grid", "player"]},
|
|
113
|
+
dataset={"id": "sokoban", "name": "Sokoban", "version": "1.0.0"},
|
|
114
|
+
rubric={"version": "1", "criteria_count": 1, "source": "inline"},
|
|
115
|
+
inference={"supports_proxy": False},
|
|
116
|
+
capabilities={"supports_rollout": True, "supports_env_lifecycle": True},
|
|
117
|
+
limits={"max_turns": 200},
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class SokobanTaskSet:
|
|
122
|
+
"""Minimal helper compatible with Task App expectations."""
|
|
123
|
+
|
|
124
|
+
def __init__(self) -> None:
|
|
125
|
+
self._taskset: TaskInstanceSet | None = None
|
|
126
|
+
self._seed_index: dict[int, SokobanTaskInstance] = {}
|
|
127
|
+
self._base_info = _base_task_info_template()
|
|
128
|
+
|
|
129
|
+
async def _ensure_loaded(self) -> TaskInstanceSet:
|
|
130
|
+
if self._taskset is None:
|
|
131
|
+
dataset = await create_sokoban_taskset()
|
|
132
|
+
self._taskset = dataset
|
|
133
|
+
self._seed_index.clear()
|
|
134
|
+
for inst in dataset.instances:
|
|
135
|
+
try:
|
|
136
|
+
seed_value = int(getattr(inst.metadata, "seed"))
|
|
137
|
+
except Exception:
|
|
138
|
+
continue
|
|
139
|
+
# Keep the first instance encountered for a seed
|
|
140
|
+
self._seed_index.setdefault(seed_value, inst)
|
|
141
|
+
return self._taskset
|
|
142
|
+
|
|
143
|
+
def describe(self) -> dict[str, object]:
|
|
144
|
+
if not self._taskset:
|
|
145
|
+
return {"id": "sokoban", "name": "Sokoban"}
|
|
146
|
+
return {
|
|
147
|
+
"id": "sokoban",
|
|
148
|
+
"name": self._taskset.name,
|
|
149
|
+
"description": self._taskset.description,
|
|
150
|
+
"instance_count": len(self._taskset.instances),
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
async def provide_task_instances(self, seeds: Sequence[int]) -> Iterable[TaskInfo]:
|
|
154
|
+
await self._ensure_loaded()
|
|
155
|
+
if not seeds:
|
|
156
|
+
return []
|
|
157
|
+
|
|
158
|
+
infos: list[TaskInfo] = []
|
|
159
|
+
for raw_seed in seeds:
|
|
160
|
+
try:
|
|
161
|
+
seed_value = int(raw_seed)
|
|
162
|
+
except Exception:
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
instance = self._seed_index.get(seed_value)
|
|
166
|
+
if instance is None:
|
|
167
|
+
# Attempt to construct on the fly; try configured difficulties in order
|
|
168
|
+
for difficulty in DIFFICULTY_CONFIGS:
|
|
169
|
+
try:
|
|
170
|
+
instance = await create_task_instance_from_seed(difficulty, seed_value)
|
|
171
|
+
break
|
|
172
|
+
except Exception:
|
|
173
|
+
continue
|
|
174
|
+
if instance is None:
|
|
175
|
+
continue
|
|
176
|
+
self._seed_index[seed_value] = instance
|
|
177
|
+
|
|
178
|
+
metadata = getattr(instance, "metadata", None)
|
|
179
|
+
base_info = self._base_info.model_copy(deep=True)
|
|
180
|
+
|
|
181
|
+
observation = dict(base_info.observation)
|
|
182
|
+
dataset_info = dict(base_info.dataset)
|
|
183
|
+
task_metadata = {"seed": seed_value}
|
|
184
|
+
|
|
185
|
+
if metadata is not None:
|
|
186
|
+
for key in ("difficulty", "num_boxes", "dim_room", "max_steps", "shortest_path_length"):
|
|
187
|
+
value = getattr(metadata, key, None)
|
|
188
|
+
if value is not None:
|
|
189
|
+
observation[key] = value
|
|
190
|
+
task_metadata[key] = value
|
|
191
|
+
dataset_info.update(
|
|
192
|
+
{
|
|
193
|
+
"seed": getattr(metadata, "seed", seed_value),
|
|
194
|
+
"difficulty": getattr(metadata, "difficulty", None),
|
|
195
|
+
"num_boxes": getattr(metadata, "num_boxes", None),
|
|
196
|
+
"dim_room": getattr(metadata, "dim_room", None),
|
|
197
|
+
}
|
|
198
|
+
)
|
|
199
|
+
generation_params = getattr(metadata, "generation_params", None)
|
|
200
|
+
if generation_params is not None:
|
|
201
|
+
task_metadata["generation_params"] = generation_params
|
|
202
|
+
|
|
203
|
+
infos.append(
|
|
204
|
+
base_info.model_copy(
|
|
205
|
+
update={
|
|
206
|
+
"observation": observation,
|
|
207
|
+
"dataset": dataset_info,
|
|
208
|
+
"task_metadata": task_metadata,
|
|
209
|
+
}
|
|
210
|
+
)
|
|
211
|
+
)
|
|
212
|
+
return infos
|
|
213
|
+
|
|
214
|
+
|
|
99
215
|
async def create_sokoban_taskset() -> TaskInstanceSet:
|
|
100
216
|
"""Generates Sokoban task instances from pre-generated verified puzzles."""
|
|
101
217
|
instances = []
|
|
@@ -46,7 +46,7 @@ class VerilogCompileSuccessComponent(RewardComponent):
|
|
|
46
46
|
if hasattr(action, "get") and action.get("type") == "compile":
|
|
47
47
|
# Check if compilation was successful (returncode 0)
|
|
48
48
|
if action.get("returncode") == 0:
|
|
49
|
-
return 0.1
|
|
49
|
+
return 0.01 # Normalized: 0.1 / 10.0 = 0.01
|
|
50
50
|
return 0.0
|
|
51
51
|
|
|
52
52
|
|
|
@@ -55,24 +55,37 @@ class VerilogSimulationPassComponent(RewardComponent):
|
|
|
55
55
|
if hasattr(action, "get") and action.get("type") == "simulate":
|
|
56
56
|
# Check if simulation passed
|
|
57
57
|
if action.get("passed", False):
|
|
58
|
-
return 1.0
|
|
58
|
+
return 0.1 # Normalized: 1.0 / 10.0 = 0.1
|
|
59
59
|
return 0.0
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
class VerilogStepPenaltyComponent(RewardComponent):
|
|
63
|
-
def __init__(self, penalty: float =
|
|
63
|
+
def __init__(self, penalty: float = 0.0): # No per-step reward - only reward accomplishments
|
|
64
64
|
self.penalty = penalty
|
|
65
65
|
|
|
66
66
|
async def score(self, state: Any, action: Any) -> float:
|
|
67
67
|
return self.penalty
|
|
68
68
|
|
|
69
69
|
|
|
70
|
+
class VerilogSubmitSuccessComponent(RewardComponent):
|
|
71
|
+
"""Reward for successful submission (tests passed). Max reward = 1.0 (normalized)."""
|
|
72
|
+
async def score(self, state: VerilogPublicState, action: Any) -> float:
|
|
73
|
+
if hasattr(action, "get") and action.get("type") == "submit":
|
|
74
|
+
# Check if submission passed
|
|
75
|
+
if action.get("passed", False):
|
|
76
|
+
return 1.0 # Normalized: Maximum reward is now 1.0
|
|
77
|
+
return 0.0
|
|
78
|
+
|
|
79
|
+
|
|
70
80
|
class VerilogEngine(StatefulEngine):
|
|
71
81
|
"""
|
|
72
82
|
Stateful Verilog evaluation engine with persistent artifact snapshots.
|
|
73
83
|
"""
|
|
74
84
|
|
|
75
85
|
def __init__(self, task_instance: TaskInstance):
|
|
86
|
+
# Validate required Verilog tools are available
|
|
87
|
+
self._validate_verilog_tools()
|
|
88
|
+
|
|
76
89
|
self.task_instance = task_instance
|
|
77
90
|
self._total_reward = 0.0
|
|
78
91
|
self._current_action_for_reward: Optional[Dict[str, Any]] = None
|
|
@@ -81,7 +94,8 @@ class VerilogEngine(StatefulEngine):
|
|
|
81
94
|
components=[
|
|
82
95
|
VerilogCompileSuccessComponent(),
|
|
83
96
|
VerilogSimulationPassComponent(),
|
|
84
|
-
|
|
97
|
+
VerilogSubmitSuccessComponent(),
|
|
98
|
+
VerilogStepPenaltyComponent(penalty=0.0), # No per-step reward
|
|
85
99
|
]
|
|
86
100
|
)
|
|
87
101
|
|
|
@@ -92,6 +106,39 @@ class VerilogEngine(StatefulEngine):
|
|
|
92
106
|
# Track last compile/simulate outputs
|
|
93
107
|
self._last_compile_output: Optional[str] = None
|
|
94
108
|
self._last_simulate_output: Optional[str] = None
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def _validate_verilog_tools() -> None:
|
|
112
|
+
"""Validate that required Verilog tools (iverilog, vvp) are available."""
|
|
113
|
+
missing_tools = []
|
|
114
|
+
|
|
115
|
+
if not shutil.which("iverilog"):
|
|
116
|
+
missing_tools.append("iverilog")
|
|
117
|
+
if not shutil.which("vvp"):
|
|
118
|
+
missing_tools.append("vvp")
|
|
119
|
+
|
|
120
|
+
if missing_tools:
|
|
121
|
+
error_msg = (
|
|
122
|
+
f"🚨🚨🚨 CRITICAL CONFIGURATION ERROR 🚨🚨🚨\n"
|
|
123
|
+
f"\n"
|
|
124
|
+
f"Missing required Verilog tools: {', '.join(missing_tools)}\n"
|
|
125
|
+
f"\n"
|
|
126
|
+
f"The Verilog environment CANNOT function without these tools.\n"
|
|
127
|
+
f"ALL compile/simulate operations will FAIL.\n"
|
|
128
|
+
f"ALL rewards will be ZERO.\n"
|
|
129
|
+
f"Training or evaluation will be COMPLETELY BROKEN.\n"
|
|
130
|
+
f"\n"
|
|
131
|
+
f"🔧 FIX THIS NOW:\n"
|
|
132
|
+
f"1. Add 'iverilog' to apt_packages in Modal deployment config\n"
|
|
133
|
+
f"2. Location: examples/task_apps/verilog/task_app/grpo_verilog.py\n"
|
|
134
|
+
f"3. Look for: modal=ModalDeploymentConfig(\n"
|
|
135
|
+
f"4. Add: apt_packages=('iverilog',) # Provides both iverilog and vvp\n"
|
|
136
|
+
f"5. Redeploy: uvx synth-ai modal-serve grpo-verilog\n"
|
|
137
|
+
f"\n"
|
|
138
|
+
f"{'='*80}"
|
|
139
|
+
)
|
|
140
|
+
print(f"\n{'='*80}\n{error_msg}\n{'='*80}\n", flush=True)
|
|
141
|
+
raise RuntimeError(error_msg)
|
|
95
142
|
|
|
96
143
|
async def _reset_engine(
|
|
97
144
|
self, *, seed: Optional[int] = None
|
|
@@ -122,6 +169,13 @@ class VerilogEngine(StatefulEngine):
|
|
|
122
169
|
) -> Tuple[VerilogPrivateState, VerilogPublicState]:
|
|
123
170
|
"""Process an action result and update engine state."""
|
|
124
171
|
self._current_action_for_reward = action_result
|
|
172
|
+
|
|
173
|
+
# DEBUG: Print action_result
|
|
174
|
+
print(f"\n[ENGINE DEBUG] _step_engine called")
|
|
175
|
+
print(f" action_result: {action_result}")
|
|
176
|
+
print(f" action_result.type: {action_result.get('type')}")
|
|
177
|
+
print(f" action_result.returncode: {action_result.get('returncode')}")
|
|
178
|
+
print(f" action_result.ok: {action_result.get('ok')}")
|
|
125
179
|
|
|
126
180
|
# Update last outputs if this is a compile or simulate action
|
|
127
181
|
if action_result.get("type") == "compile":
|
|
@@ -136,18 +190,21 @@ class VerilogEngine(StatefulEngine):
|
|
|
136
190
|
current_pub_state = VerilogPublicState(
|
|
137
191
|
files=self._get_file_contents(),
|
|
138
192
|
build_dir=str(self.build_dir),
|
|
139
|
-
task_completed=action_result.get("passed", False),
|
|
193
|
+
task_completed=action_result.get("submitted", False) and action_result.get("passed", False),
|
|
140
194
|
)
|
|
141
195
|
|
|
142
196
|
reward_from_stack = await self.reward_stack.step_reward(
|
|
143
197
|
state=current_pub_state, action=self._current_action_for_reward
|
|
144
198
|
)
|
|
145
199
|
self._current_action_for_reward = None
|
|
200
|
+
|
|
201
|
+
# DEBUG: Print reward
|
|
202
|
+
print(f"[ENGINE DEBUG] reward_from_stack: {reward_from_stack}")
|
|
146
203
|
|
|
147
204
|
self._total_reward += reward_from_stack
|
|
148
205
|
|
|
149
|
-
# Check termination conditions
|
|
150
|
-
terminated = action_result.get("
|
|
206
|
+
# Check termination conditions - only terminate if submitted (regardless of pass/fail)
|
|
207
|
+
terminated = action_result.get("submitted", False)
|
|
151
208
|
|
|
152
209
|
priv = VerilogPrivateState(
|
|
153
210
|
reward_last=reward_from_stack,
|
|
@@ -159,7 +216,7 @@ class VerilogEngine(StatefulEngine):
|
|
|
159
216
|
pub = VerilogPublicState(
|
|
160
217
|
files=self._get_file_contents(),
|
|
161
218
|
build_dir=str(self.build_dir),
|
|
162
|
-
task_completed=action_result.get("passed", False),
|
|
219
|
+
task_completed=action_result.get("submitted", False) and action_result.get("passed", False),
|
|
163
220
|
last_compile_output=self._last_compile_output,
|
|
164
221
|
last_simulate_output=self._last_simulate_output,
|
|
165
222
|
)
|
|
@@ -248,6 +305,16 @@ class VerilogEngine(StatefulEngine):
|
|
|
248
305
|
}
|
|
249
306
|
except subprocess.TimeoutExpired:
|
|
250
307
|
return {"ok": False, "error": "Compilation timeout", "type": "compile"}
|
|
308
|
+
except FileNotFoundError:
|
|
309
|
+
error_msg = (
|
|
310
|
+
"🚨 CRITICAL ERROR: 'iverilog' executable not found! 🚨\n"
|
|
311
|
+
"The Verilog compiler (iverilog) is not installed in this environment.\n"
|
|
312
|
+
"This will cause ALL compile operations to fail and result in ZERO rewards.\n"
|
|
313
|
+
"Fix: Add 'iverilog' to apt_packages in the Modal deployment config.\n"
|
|
314
|
+
"Location: examples/task_apps/verilog/task_app/grpo_verilog.py -> modal=ModalDeploymentConfig(apt_packages=('iverilog',))"
|
|
315
|
+
)
|
|
316
|
+
print(f"\n{'='*80}\n{error_msg}\n{'='*80}\n", flush=True)
|
|
317
|
+
raise RuntimeError(error_msg) from None
|
|
251
318
|
except Exception as e:
|
|
252
319
|
return {"ok": False, "error": str(e), "type": "compile"}
|
|
253
320
|
|
|
@@ -279,18 +346,43 @@ class VerilogEngine(StatefulEngine):
|
|
|
279
346
|
}
|
|
280
347
|
except subprocess.TimeoutExpired:
|
|
281
348
|
return {"ok": False, "error": "Simulation timeout", "type": "simulate"}
|
|
349
|
+
except FileNotFoundError:
|
|
350
|
+
error_msg = (
|
|
351
|
+
"🚨 CRITICAL ERROR: 'vvp' executable not found! 🚨\n"
|
|
352
|
+
"The Verilog simulator (vvp) is not installed in this environment.\n"
|
|
353
|
+
"This will cause ALL simulate operations to fail and result in ZERO rewards.\n"
|
|
354
|
+
"Fix: Add 'iverilog' to apt_packages in the Modal deployment config (provides both iverilog and vvp).\n"
|
|
355
|
+
"Location: examples/task_apps/verilog/task_app/grpo_verilog.py -> modal=ModalDeploymentConfig(apt_packages=('iverilog',))"
|
|
356
|
+
)
|
|
357
|
+
print(f"\n{'='*80}\n{error_msg}\n{'='*80}\n", flush=True)
|
|
358
|
+
raise RuntimeError(error_msg) from None
|
|
282
359
|
except Exception as e:
|
|
283
360
|
return {"ok": False, "error": str(e), "type": "simulate"}
|
|
284
361
|
|
|
285
362
|
async def submit(self) -> Dict[str, Any]:
|
|
286
363
|
"""Submit solution for grading."""
|
|
287
|
-
#
|
|
288
|
-
#
|
|
364
|
+
# Check if the last simulation passed
|
|
365
|
+
# Parse the last simulation output to determine if tests passed
|
|
366
|
+
passed = False
|
|
367
|
+
detail = "No simulation run yet"
|
|
368
|
+
|
|
369
|
+
if self._last_simulate_output:
|
|
370
|
+
stdout = self._last_simulate_output
|
|
371
|
+
passed = (
|
|
372
|
+
"ALL_TESTS_PASSED" in stdout
|
|
373
|
+
or ("Mismatches: 0 " in stdout and "samples" in stdout)
|
|
374
|
+
or ("no mismatches" in stdout.lower() and "errors" not in stdout.lower())
|
|
375
|
+
)
|
|
376
|
+
if passed:
|
|
377
|
+
detail = "All tests passed"
|
|
378
|
+
else:
|
|
379
|
+
detail = "Tests failed - please review simulation output"
|
|
380
|
+
|
|
289
381
|
return {
|
|
290
382
|
"ok": True,
|
|
291
383
|
"type": "submit",
|
|
292
|
-
"passed":
|
|
293
|
-
"detail":
|
|
384
|
+
"passed": passed,
|
|
385
|
+
"detail": detail,
|
|
294
386
|
"submitted": True,
|
|
295
387
|
}
|
|
296
388
|
|
|
@@ -14,8 +14,10 @@ big “backend.production” code-base.
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
16
|
import gzip
|
|
17
|
+
import hashlib
|
|
17
18
|
import json
|
|
18
19
|
import logging
|
|
20
|
+
import os
|
|
19
21
|
import pickle
|
|
20
22
|
import sqlite3
|
|
21
23
|
from collections.abc import Iterable
|
|
@@ -32,11 +34,6 @@ log = logging.getLogger(__name__)
|
|
|
32
34
|
# --------------------------------------------------------------------------- #
|
|
33
35
|
# lightweight metadata record #
|
|
34
36
|
# --------------------------------------------------------------------------- #
|
|
35
|
-
import hashlib
|
|
36
|
-
import logging
|
|
37
|
-
import os
|
|
38
|
-
|
|
39
|
-
log = logging.getLogger(__name__)
|
|
40
37
|
|
|
41
38
|
# Default directory for storing snapshots relative to some base path
|
|
42
39
|
# This could be configured via environment variables or settings later.
|
|
@@ -256,7 +253,9 @@ class TrajectoryTreeStore:
|
|
|
256
253
|
def reconstruct_actions(self, snap_id: str) -> tuple[Any, ...]:
|
|
257
254
|
"""Return the sequence of *actions* from the root → `snap_id`."""
|
|
258
255
|
actions = []
|
|
259
|
-
for child, parent in zip(
|
|
256
|
+
for child, parent in zip(
|
|
257
|
+
self.path_to_root(snap_id)[:-1], self.path_to_root(snap_id)[1:], strict=False
|
|
258
|
+
):
|
|
260
259
|
actions.append(self.graph.edges[parent, child]["action"])
|
|
261
260
|
return tuple(reversed(actions))
|
|
262
261
|
|
|
@@ -1,6 +1,17 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os # Added to ensure os is available before use
|
|
2
3
|
import sys
|
|
3
4
|
|
|
5
|
+
import synth_ai.environments.examples.crafter_classic.environment as cc
|
|
6
|
+
import synth_ai.environments.examples.crafter_custom.environment as ccustom
|
|
7
|
+
from fastapi import FastAPI
|
|
8
|
+
from synth_ai.environments.service.core_routes import api_router
|
|
9
|
+
from synth_ai.environments.service.external_registry import (
|
|
10
|
+
ExternalRegistryConfig,
|
|
11
|
+
load_external_environments,
|
|
12
|
+
)
|
|
13
|
+
from synth_ai.environments.service.registry import list_supported_env_types, register_environment
|
|
14
|
+
|
|
4
15
|
# Ensure repository root is on PYTHONPATH for dev installs
|
|
5
16
|
# Current file path: <repo>/synth_ai/environments/service/app.py
|
|
6
17
|
# We want sys.path to include <repo>, NOT <repo>/synth_ai to avoid shadowing stdlib 'http'
|
|
@@ -16,15 +27,6 @@ if _repo_root not in sys.path:
|
|
|
16
27
|
sys.path.insert(0, _repo_root)
|
|
17
28
|
|
|
18
29
|
print(f"SYS.PATH IN APP.PY: {sys.path}")
|
|
19
|
-
import logging
|
|
20
|
-
|
|
21
|
-
from fastapi import FastAPI
|
|
22
|
-
from synth_ai.environments.service.core_routes import api_router
|
|
23
|
-
from synth_ai.environments.service.external_registry import (
|
|
24
|
-
ExternalRegistryConfig,
|
|
25
|
-
load_external_environments,
|
|
26
|
-
)
|
|
27
|
-
from synth_ai.environments.service.registry import list_supported_env_types, register_environment
|
|
28
30
|
|
|
29
31
|
# Configure logging with more detail
|
|
30
32
|
logging.basicConfig(
|
|
@@ -38,11 +40,8 @@ logger = logging.getLogger(__name__)
|
|
|
38
40
|
logging.getLogger("uvicorn.access").setLevel(logging.INFO)
|
|
39
41
|
|
|
40
42
|
# Register built-in environments at import time
|
|
41
|
-
import synth_ai.environments.examples.crafter_classic.environment as cc
|
|
42
43
|
|
|
43
44
|
register_environment("CrafterClassic", cc.CrafterClassicEnvironment)
|
|
44
|
-
import synth_ai.environments.examples.crafter_custom.environment as ccustom
|
|
45
|
-
|
|
46
45
|
register_environment("CrafterCustom", ccustom.CrafterCustomEnvironment)
|
|
47
46
|
|
|
48
47
|
# Register Wordle example environment
|
|
@@ -97,15 +97,12 @@ def create_task_instance_for_environment(
|
|
|
97
97
|
task.initial_engine_snapshot["seed"] = config["seed"]
|
|
98
98
|
|
|
99
99
|
# For CrafterClassic, also handle difficulty
|
|
100
|
-
if env_name == "CrafterClassic" and config:
|
|
101
|
-
|
|
102
|
-
task.initial_engine_snapshot["difficulty"] = config["difficulty"]
|
|
100
|
+
if env_name == "CrafterClassic" and config and "difficulty" in config:
|
|
101
|
+
task.initial_engine_snapshot["difficulty"] = config["difficulty"]
|
|
103
102
|
|
|
104
103
|
# For MiniGrid, handle environment selection
|
|
105
|
-
if env_name == "MiniGrid" and config:
|
|
106
|
-
|
|
107
|
-
if "env_name" in config:
|
|
108
|
-
task.initial_engine_snapshot["env_name"] = config["env_name"]
|
|
104
|
+
if env_name == "MiniGrid" and config and "env_name" in config:
|
|
105
|
+
task.initial_engine_snapshot["env_name"] = config["env_name"]
|
|
109
106
|
|
|
110
107
|
return task
|
|
111
108
|
|
|
@@ -951,7 +948,9 @@ async def register_environment_api(request: RegisterEnvironmentRequest) -> dict[
|
|
|
951
948
|
) from e
|
|
952
949
|
except Exception as e:
|
|
953
950
|
logger.error(f"Failed to register environment {request.name}: {e}")
|
|
954
|
-
raise HTTPException(
|
|
951
|
+
raise HTTPException(
|
|
952
|
+
status_code=500, detail=f"Failed to register environment: {str(e)}"
|
|
953
|
+
) from e
|
|
955
954
|
|
|
956
955
|
|
|
957
956
|
@api_router.delete("/registry/environments/{env_name}")
|
|
@@ -984,7 +983,9 @@ async def unregister_environment_api(env_name: str) -> dict[str, Any]:
|
|
|
984
983
|
|
|
985
984
|
except Exception as e:
|
|
986
985
|
logger.error(f"Failed to unregister environment {env_name}: {e}")
|
|
987
|
-
raise HTTPException(
|
|
986
|
+
raise HTTPException(
|
|
987
|
+
status_code=500, detail=f"Failed to unregister environment: {str(e)}"
|
|
988
|
+
) from e
|
|
988
989
|
|
|
989
990
|
|
|
990
991
|
@api_router.get("/registry/environments")
|