synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +60 -2
- synth_ai/api/train/builders.py +347 -39
- synth_ai/api/train/cli.py +895 -160
- synth_ai/api/train/config_finder.py +103 -25
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +70 -20
- synth_ai/api/train/pollers.py +29 -4
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +6 -4
- synth_ai/api/train/utils.py +64 -52
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +85 -63
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +156 -116
- synth_ai/cli/root.py +131 -132
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +2284 -257
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +579 -291
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +50 -30
- synth_ai/task/apps/__init__.py +63 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +165 -64
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +59 -66
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +65 -31
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +44 -28
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +73 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -258
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -107
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/task/apps/grpo_crafter.py +0 -438
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
- synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
- {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py → py.typed} +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Ultra-Rich Reward Shaping for Pallet Town First Section
|
|
3
|
+
|
|
4
|
+
This module provides fine-grained reward components that track important
|
|
5
|
+
achievements in the initial Pallet Town sequence: leaving the house, finding
|
|
6
|
+
Oak's lab, talking to Oak, starting the rival battle, attacking and damaging
|
|
7
|
+
the opponent, winning the battle, getting a party member, and leaving the lab.
|
|
8
|
+
|
|
9
|
+
Each milestone is carefully weighted to provide dense, meaningful feedback
|
|
10
|
+
for reinforcement learning agents learning to play Pokemon Red.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Any, Dict
|
|
14
|
+
|
|
15
|
+
from synth_ai.environments.environment.rewards.core import RewardComponent
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LeaveBedroomReward(RewardComponent):
|
|
19
|
+
"""
|
|
20
|
+
Reward for going downstairs from bedroom to main floor.
|
|
21
|
+
This is the first meaningful action in the game.
|
|
22
|
+
|
|
23
|
+
Reward: +20 points (one-time)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self):
|
|
27
|
+
self.triggered = False
|
|
28
|
+
|
|
29
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
30
|
+
if self.triggered:
|
|
31
|
+
return 0.0
|
|
32
|
+
|
|
33
|
+
prev_map = action.get("prev_map_id", -1)
|
|
34
|
+
current_map = state.get("map_id", -1)
|
|
35
|
+
|
|
36
|
+
# Detect moving from bedroom (map 38/0x26) to downstairs (map 37/0x25)
|
|
37
|
+
# In Red's house, bedroom is map 38, downstairs is map 37
|
|
38
|
+
if prev_map == 38 and current_map == 37:
|
|
39
|
+
self.triggered = True
|
|
40
|
+
return 20.0
|
|
41
|
+
return 0.0
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ExitHouseFirstTimeReward(RewardComponent):
|
|
45
|
+
"""
|
|
46
|
+
Reward for leaving the starting house and entering Pallet Town.
|
|
47
|
+
This is a major milestone showing the agent understands doors.
|
|
48
|
+
|
|
49
|
+
Reward: +30 points (one-time)
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self):
|
|
53
|
+
self.triggered = False
|
|
54
|
+
|
|
55
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
56
|
+
if self.triggered:
|
|
57
|
+
return 0.0
|
|
58
|
+
|
|
59
|
+
prev_map = action.get("prev_map_id", -1)
|
|
60
|
+
current_map = state.get("map_id", -1)
|
|
61
|
+
|
|
62
|
+
# Exit from house (map 37) to Pallet Town (likely map 0-36 range)
|
|
63
|
+
# Detect leaving house interior to outdoor area
|
|
64
|
+
if prev_map == 37 and current_map != 37 and current_map != 38:
|
|
65
|
+
self.triggered = True
|
|
66
|
+
return 30.0
|
|
67
|
+
return 0.0
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class FindOakLabReward(RewardComponent):
|
|
71
|
+
"""
|
|
72
|
+
Reward for discovering and entering Oak's Lab for the first time.
|
|
73
|
+
This shows the agent can navigate the town and find the lab.
|
|
74
|
+
|
|
75
|
+
Reward: +40 points (one-time)
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(self):
|
|
79
|
+
self.lab_found = False
|
|
80
|
+
|
|
81
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
82
|
+
if self.lab_found:
|
|
83
|
+
return 0.0
|
|
84
|
+
|
|
85
|
+
prev_map = action.get("prev_map_id", -1)
|
|
86
|
+
current_map = state.get("map_id", -1)
|
|
87
|
+
|
|
88
|
+
# Entering Oak's lab (map 3/0x03) from Pallet Town (map 0)
|
|
89
|
+
if prev_map == 0 and current_map == 3:
|
|
90
|
+
self.lab_found = True
|
|
91
|
+
return 40.0
|
|
92
|
+
return 0.0
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class TalkToOakReward(RewardComponent):
|
|
96
|
+
"""
|
|
97
|
+
Reward for first conversation with Professor Oak in the lab.
|
|
98
|
+
This is detected by text box activation in Oak's lab.
|
|
99
|
+
|
|
100
|
+
Reward: +50 points (one-time)
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(self):
|
|
104
|
+
self.oak_talked_to = False
|
|
105
|
+
|
|
106
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
107
|
+
if self.oak_talked_to:
|
|
108
|
+
return 0.0
|
|
109
|
+
|
|
110
|
+
# Detect first dialogue in Oak's lab
|
|
111
|
+
if state.get("map_id", -1) == 3 and state.get("text_box_active", False):
|
|
112
|
+
prev_text_active = action.get("prev_text_box_active", False)
|
|
113
|
+
if not prev_text_active:
|
|
114
|
+
self.oak_talked_to = True
|
|
115
|
+
return 50.0
|
|
116
|
+
return 0.0
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class ReceiveStarterPokemonReward(RewardComponent):
|
|
120
|
+
"""
|
|
121
|
+
Reward for receiving your first Pokemon from Oak.
|
|
122
|
+
This is a major story milestone detected by party count changing from 0 to 1.
|
|
123
|
+
|
|
124
|
+
Reward: +100 points (one-time)
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
def __init__(self):
|
|
128
|
+
self.starter_received = False
|
|
129
|
+
|
|
130
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
131
|
+
if self.starter_received:
|
|
132
|
+
return 0.0
|
|
133
|
+
|
|
134
|
+
# Detect receiving first Pokemon
|
|
135
|
+
prev_party_count = action.get("prev_party_count", 0)
|
|
136
|
+
current_party_count = state.get("party_count", 0)
|
|
137
|
+
|
|
138
|
+
if prev_party_count == 0 and current_party_count == 1:
|
|
139
|
+
# Verify we're in Oak's lab
|
|
140
|
+
if state.get("map_id", -1) == 3:
|
|
141
|
+
self.starter_received = True
|
|
142
|
+
return 100.0
|
|
143
|
+
return 0.0
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class EnterFirstBattleReward(RewardComponent):
|
|
147
|
+
"""
|
|
148
|
+
Reward for entering the first rival battle in Oak's lab.
|
|
149
|
+
This shows the agent has progressed through dialogue and triggered the battle.
|
|
150
|
+
|
|
151
|
+
Reward: +75 points (one-time)
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
def __init__(self):
|
|
155
|
+
self.first_battle_entered = False
|
|
156
|
+
|
|
157
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
158
|
+
if self.first_battle_entered:
|
|
159
|
+
return 0.0
|
|
160
|
+
|
|
161
|
+
# Detect entering battle for the first time
|
|
162
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
|
163
|
+
current_in_battle = state.get("in_battle", False)
|
|
164
|
+
|
|
165
|
+
if not prev_in_battle and current_in_battle:
|
|
166
|
+
# Verify we're in Oak's lab (rival battle)
|
|
167
|
+
if state.get("map_id", -1) == 3:
|
|
168
|
+
self.first_battle_entered = True
|
|
169
|
+
return 75.0
|
|
170
|
+
return 0.0
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class DealDamageToRivalReward(RewardComponent):
|
|
174
|
+
"""
|
|
175
|
+
Reward for successfully attacking and damaging the rival's Pokemon.
|
|
176
|
+
This is detected by a decrease in enemy HP during battle.
|
|
177
|
+
|
|
178
|
+
Reward: +5 points per HP damage dealt (cumulative, capped at 10 total rewards)
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
def __init__(self):
|
|
182
|
+
self.damage_instances = 0
|
|
183
|
+
self.max_instances = 10
|
|
184
|
+
self.prev_enemy_hp = None
|
|
185
|
+
|
|
186
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
187
|
+
if self.damage_instances >= self.max_instances:
|
|
188
|
+
return 0.0
|
|
189
|
+
|
|
190
|
+
# Track damage during battle
|
|
191
|
+
if state.get("in_battle", False):
|
|
192
|
+
current_enemy_hp = state.get("enemy_hp_current", 0)
|
|
193
|
+
prev_enemy_hp = action.get("prev_enemy_hp_current", current_enemy_hp)
|
|
194
|
+
|
|
195
|
+
# Detect HP decrease (damage dealt)
|
|
196
|
+
if prev_enemy_hp > current_enemy_hp > 0:
|
|
197
|
+
damage = prev_enemy_hp - current_enemy_hp
|
|
198
|
+
self.damage_instances += 1
|
|
199
|
+
# Give +5 points per instance of damage
|
|
200
|
+
return 5.0
|
|
201
|
+
|
|
202
|
+
return 0.0
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class ReduceEnemyHPByHalfReward(RewardComponent):
|
|
206
|
+
"""
|
|
207
|
+
Reward for reducing enemy HP below 50% for the first time.
|
|
208
|
+
This shows the agent is making significant progress in battle.
|
|
209
|
+
|
|
210
|
+
Reward: +25 points (one-time)
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
def __init__(self):
|
|
214
|
+
self.half_hp_achieved = False
|
|
215
|
+
|
|
216
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
217
|
+
if self.half_hp_achieved:
|
|
218
|
+
return 0.0
|
|
219
|
+
|
|
220
|
+
if state.get("in_battle", False):
|
|
221
|
+
enemy_hp_pct = state.get("enemy_hp_percentage", 0.0)
|
|
222
|
+
prev_enemy_hp_pct = action.get("prev_enemy_hp_percentage", 100.0)
|
|
223
|
+
|
|
224
|
+
# Detect crossing below 50% threshold
|
|
225
|
+
if prev_enemy_hp_pct >= 50.0 and enemy_hp_pct < 50.0:
|
|
226
|
+
self.half_hp_achieved = True
|
|
227
|
+
return 25.0
|
|
228
|
+
|
|
229
|
+
return 0.0
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class ReduceEnemyHPToLowReward(RewardComponent):
|
|
233
|
+
"""
|
|
234
|
+
Reward for reducing enemy HP below 25% (critical/low HP).
|
|
235
|
+
This shows the agent is close to winning the battle.
|
|
236
|
+
|
|
237
|
+
Reward: +35 points (one-time)
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
def __init__(self):
|
|
241
|
+
self.low_hp_achieved = False
|
|
242
|
+
|
|
243
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
244
|
+
if self.low_hp_achieved:
|
|
245
|
+
return 0.0
|
|
246
|
+
|
|
247
|
+
if state.get("in_battle", False):
|
|
248
|
+
enemy_hp_pct = state.get("enemy_hp_percentage", 0.0)
|
|
249
|
+
prev_enemy_hp_pct = action.get("prev_enemy_hp_percentage", 100.0)
|
|
250
|
+
|
|
251
|
+
# Detect crossing below 25% threshold
|
|
252
|
+
if prev_enemy_hp_pct >= 25.0 and enemy_hp_pct < 25.0:
|
|
253
|
+
self.low_hp_achieved = True
|
|
254
|
+
return 35.0
|
|
255
|
+
|
|
256
|
+
return 0.0
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class WinFirstBattleReward(RewardComponent):
|
|
260
|
+
"""
|
|
261
|
+
Reward for winning the first battle against the rival.
|
|
262
|
+
This is the culmination of the battle sequence.
|
|
263
|
+
|
|
264
|
+
Reward: +150 points (one-time)
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
def __init__(self):
|
|
268
|
+
self.first_battle_won = False
|
|
269
|
+
|
|
270
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
271
|
+
if self.first_battle_won:
|
|
272
|
+
return 0.0
|
|
273
|
+
|
|
274
|
+
# Detect winning a battle (transition from in_battle to not in_battle with win outcome)
|
|
275
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
|
276
|
+
current_in_battle = state.get("in_battle", False)
|
|
277
|
+
battle_outcome = state.get("battle_outcome", 0)
|
|
278
|
+
|
|
279
|
+
# battle_outcome: 0=ongoing, 1=win, 2=lose
|
|
280
|
+
if prev_in_battle and not current_in_battle and battle_outcome == 1:
|
|
281
|
+
# Verify it's in Oak's lab (the rival battle)
|
|
282
|
+
if state.get("map_id", -1) == 3:
|
|
283
|
+
self.first_battle_won = True
|
|
284
|
+
return 150.0
|
|
285
|
+
|
|
286
|
+
return 0.0
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class ExitLabAfterBattleReward(RewardComponent):
|
|
290
|
+
"""
|
|
291
|
+
Reward for leaving Oak's Lab after receiving Pokemon and winning the battle.
|
|
292
|
+
This completes the initial Pallet Town sequence.
|
|
293
|
+
|
|
294
|
+
Reward: +60 points (one-time, requires having a party member)
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
def __init__(self):
|
|
298
|
+
self.exited_with_pokemon = False
|
|
299
|
+
|
|
300
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
301
|
+
if self.exited_with_pokemon:
|
|
302
|
+
return 0.0
|
|
303
|
+
|
|
304
|
+
prev_map = action.get("prev_map_id", -1)
|
|
305
|
+
current_map = state.get("map_id", -1)
|
|
306
|
+
|
|
307
|
+
# Exit from lab (map 3) to town (map 0)
|
|
308
|
+
if prev_map == 3 and current_map == 0:
|
|
309
|
+
# Verify we have at least one Pokemon
|
|
310
|
+
if state.get("party_count", 0) > 0:
|
|
311
|
+
self.exited_with_pokemon = True
|
|
312
|
+
return 60.0
|
|
313
|
+
|
|
314
|
+
return 0.0
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
class FirstBattleEfficiencyReward(RewardComponent):
|
|
318
|
+
"""
|
|
319
|
+
Reward for winning the first battle efficiently (in fewer turns).
|
|
320
|
+
Encourages the agent to learn optimal battle strategies early.
|
|
321
|
+
|
|
322
|
+
Reward: +20 points if won in ≤5 turns, +10 if ≤8 turns
|
|
323
|
+
"""
|
|
324
|
+
|
|
325
|
+
def __init__(self):
|
|
326
|
+
self.efficiency_rewarded = False
|
|
327
|
+
self.max_turns_seen = 0
|
|
328
|
+
|
|
329
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
330
|
+
if self.efficiency_rewarded:
|
|
331
|
+
return 0.0
|
|
332
|
+
|
|
333
|
+
# Track turn count during battle
|
|
334
|
+
if state.get("in_battle", False):
|
|
335
|
+
self.max_turns_seen = max(self.max_turns_seen, state.get("battle_turn", 0))
|
|
336
|
+
|
|
337
|
+
# When battle ends with a win, assess efficiency
|
|
338
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
|
339
|
+
current_in_battle = state.get("in_battle", False)
|
|
340
|
+
battle_outcome = state.get("battle_outcome", 0)
|
|
341
|
+
|
|
342
|
+
if prev_in_battle and not current_in_battle and battle_outcome == 1:
|
|
343
|
+
if state.get("map_id", -1) == 3: # Rival battle in lab
|
|
344
|
+
self.efficiency_rewarded = True
|
|
345
|
+
if self.max_turns_seen <= 5:
|
|
346
|
+
return 20.0
|
|
347
|
+
elif self.max_turns_seen <= 8:
|
|
348
|
+
return 10.0
|
|
349
|
+
|
|
350
|
+
return 0.0
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
class KeepPokemonHealthyReward(RewardComponent):
|
|
354
|
+
"""
|
|
355
|
+
Reward for keeping your Pokemon's HP above 50% during the first battle.
|
|
356
|
+
Encourages defensive play and resource management.
|
|
357
|
+
|
|
358
|
+
Reward: +30 points (one-time, checked at end of first battle)
|
|
359
|
+
"""
|
|
360
|
+
|
|
361
|
+
def __init__(self):
|
|
362
|
+
self.health_bonus_given = False
|
|
363
|
+
|
|
364
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
365
|
+
if self.health_bonus_given:
|
|
366
|
+
return 0.0
|
|
367
|
+
|
|
368
|
+
# Check health status when battle ends
|
|
369
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
|
370
|
+
current_in_battle = state.get("in_battle", False)
|
|
371
|
+
battle_outcome = state.get("battle_outcome", 0)
|
|
372
|
+
|
|
373
|
+
if prev_in_battle and not current_in_battle and battle_outcome == 1:
|
|
374
|
+
if state.get("map_id", -1) == 3: # Rival battle in lab
|
|
375
|
+
# Check if first Pokemon has >50% HP
|
|
376
|
+
if len(state.get("party_pokemon", [])) > 0:
|
|
377
|
+
first_pokemon = state.get("party_pokemon", [])[0]
|
|
378
|
+
hp_pct = first_pokemon.get("hp_percentage", 0)
|
|
379
|
+
if hp_pct > 50.0:
|
|
380
|
+
self.health_bonus_given = True
|
|
381
|
+
return 30.0
|
|
382
|
+
|
|
383
|
+
return 0.0
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
class NavigationSpeedReward(RewardComponent):
|
|
387
|
+
"""
|
|
388
|
+
Reward for completing the Pallet Town sequence quickly (by step count).
|
|
389
|
+
Encourages efficient navigation and minimal wandering.
|
|
390
|
+
|
|
391
|
+
Reward: Scales based on step count (fewer steps = higher reward)
|
|
392
|
+
"""
|
|
393
|
+
|
|
394
|
+
def __init__(self):
|
|
395
|
+
self.step_count = 0
|
|
396
|
+
self.sequence_complete = False
|
|
397
|
+
self.reward_given = False
|
|
398
|
+
|
|
399
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
400
|
+
if self.reward_given:
|
|
401
|
+
return 0.0
|
|
402
|
+
|
|
403
|
+
# Track steps
|
|
404
|
+
self.step_count += 1
|
|
405
|
+
|
|
406
|
+
# Check if sequence is complete (exited lab with Pokemon after battle)
|
|
407
|
+
prev_map = action.get("prev_map_id", -1)
|
|
408
|
+
current_map = state.get("map_id", -1)
|
|
409
|
+
|
|
410
|
+
if prev_map == 3 and current_map == 0: # Exiting lab
|
|
411
|
+
if state.get("party_count", 0) > 0: # Have Pokemon
|
|
412
|
+
self.sequence_complete = True
|
|
413
|
+
self.reward_given = True
|
|
414
|
+
|
|
415
|
+
# Award points based on efficiency
|
|
416
|
+
# Optimal path is roughly 30-40 steps
|
|
417
|
+
if self.step_count <= 40:
|
|
418
|
+
return 50.0 # Very efficient
|
|
419
|
+
elif self.step_count <= 60:
|
|
420
|
+
return 30.0 # Good
|
|
421
|
+
elif self.step_count <= 80:
|
|
422
|
+
return 15.0 # Acceptable
|
|
423
|
+
else:
|
|
424
|
+
return 5.0 # Completed but slow
|
|
425
|
+
|
|
426
|
+
return 0.0
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
# Composite reward for the complete Pallet Town sequence
|
|
430
|
+
class PalletTownProgressionCompositeReward(RewardComponent):
|
|
431
|
+
"""
|
|
432
|
+
Composite reward that combines all Pallet Town progression milestones.
|
|
433
|
+
|
|
434
|
+
Total possible points: ~600+
|
|
435
|
+
- Leave bedroom: 20
|
|
436
|
+
- Exit house: 30
|
|
437
|
+
- Find lab: 40
|
|
438
|
+
- Talk to Oak: 50
|
|
439
|
+
- Get starter: 100
|
|
440
|
+
- Enter battle: 75
|
|
441
|
+
- Deal damage: 50 (10 instances × 5)
|
|
442
|
+
- Half HP: 25
|
|
443
|
+
- Low HP: 35
|
|
444
|
+
- Win battle: 150
|
|
445
|
+
- Exit lab: 60
|
|
446
|
+
- Efficiency: 20
|
|
447
|
+
- Keep healthy: 30
|
|
448
|
+
- Navigation: 50
|
|
449
|
+
|
|
450
|
+
This provides dense, meaningful feedback throughout the entire sequence.
|
|
451
|
+
"""
|
|
452
|
+
|
|
453
|
+
def __init__(self):
|
|
454
|
+
self.components = [
|
|
455
|
+
LeaveBedroomReward(),
|
|
456
|
+
ExitHouseFirstTimeReward(),
|
|
457
|
+
FindOakLabReward(),
|
|
458
|
+
TalkToOakReward(),
|
|
459
|
+
ReceiveStarterPokemonReward(),
|
|
460
|
+
EnterFirstBattleReward(),
|
|
461
|
+
DealDamageToRivalReward(),
|
|
462
|
+
ReduceEnemyHPByHalfReward(),
|
|
463
|
+
ReduceEnemyHPToLowReward(),
|
|
464
|
+
WinFirstBattleReward(),
|
|
465
|
+
ExitLabAfterBattleReward(),
|
|
466
|
+
FirstBattleEfficiencyReward(),
|
|
467
|
+
KeepPokemonHealthyReward(),
|
|
468
|
+
NavigationSpeedReward(),
|
|
469
|
+
]
|
|
470
|
+
|
|
471
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
|
472
|
+
total_reward = 0.0
|
|
473
|
+
for component in self.components:
|
|
474
|
+
reward = await component.score(state, action)
|
|
475
|
+
total_reward += reward
|
|
476
|
+
return total_reward
|
|
477
|
+
|
|
@@ -85,6 +85,33 @@ def extract_inventory(memory) -> List[Dict[str, Any]]:
|
|
|
85
85
|
return inventory
|
|
86
86
|
|
|
87
87
|
|
|
88
|
+
def extract_battle_state(memory) -> Dict[str, Any]:
|
|
89
|
+
"""Extract battle-specific state"""
|
|
90
|
+
in_battle = get_byte(memory, IN_BATTLE_FLAG) > 0
|
|
91
|
+
|
|
92
|
+
if not in_battle:
|
|
93
|
+
return {
|
|
94
|
+
"enemy_hp_current": 0,
|
|
95
|
+
"enemy_hp_max": 0,
|
|
96
|
+
"enemy_level": 0,
|
|
97
|
+
"enemy_species_id": 0,
|
|
98
|
+
"enemy_hp_percentage": 0.0,
|
|
99
|
+
"battle_turn": 0,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
enemy_hp_current = get_word(memory, ENEMY_HP_CURRENT)
|
|
103
|
+
enemy_hp_max = get_word(memory, ENEMY_HP_MAX)
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
"enemy_hp_current": enemy_hp_current,
|
|
107
|
+
"enemy_hp_max": enemy_hp_max,
|
|
108
|
+
"enemy_level": get_byte(memory, ENEMY_LEVEL),
|
|
109
|
+
"enemy_species_id": get_byte(memory, ENEMY_SPECIES),
|
|
110
|
+
"enemy_hp_percentage": round((enemy_hp_current / enemy_hp_max * 100) if enemy_hp_max > 0 else 0, 1),
|
|
111
|
+
"battle_turn": get_byte(memory, BATTLE_TURN),
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
88
115
|
def extract_game_state(memory) -> Dict[str, Any]:
|
|
89
116
|
"""Extract comprehensive game state from Game Boy memory"""
|
|
90
117
|
# Get party and inventory details
|
|
@@ -93,6 +120,9 @@ def extract_game_state(memory) -> Dict[str, Any]:
|
|
|
93
120
|
|
|
94
121
|
# Get money
|
|
95
122
|
money = get_bcd_3byte(memory, MONEY)
|
|
123
|
+
|
|
124
|
+
# Get battle state
|
|
125
|
+
battle_state = extract_battle_state(memory)
|
|
96
126
|
|
|
97
127
|
# Basic game state
|
|
98
128
|
state = {
|
|
@@ -111,6 +141,8 @@ def extract_game_state(memory) -> Dict[str, Any]:
|
|
|
111
141
|
"party_pokemon": party,
|
|
112
142
|
"inventory_count": len(inventory),
|
|
113
143
|
"inventory_items": inventory,
|
|
144
|
+
# Battle state
|
|
145
|
+
**battle_state,
|
|
114
146
|
# Legacy fields for compatibility (use first Pokemon if available)
|
|
115
147
|
"party_level": party[0]["level"] if party else 0,
|
|
116
148
|
"party_hp_current": party[0]["hp_current"] if party else 0,
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
import base64
|
|
5
|
+
import time
|
|
6
|
+
from io import BytesIO
|
|
4
7
|
|
|
5
8
|
from pydantic import BaseModel, Field
|
|
6
9
|
|
|
@@ -17,6 +20,14 @@ from synth_ai.environments.environment.tools import (
|
|
|
17
20
|
)
|
|
18
21
|
from synth_ai.environments.reproducibility.core import ReproducibleEnvironment
|
|
19
22
|
from synth_ai.environments.stateful.core import StatefulEnvironment
|
|
23
|
+
from synth_ai.tracing_v3.abstractions import EnvironmentEvent, TimeRecord
|
|
24
|
+
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
|
25
|
+
try: # optional for image encoding
|
|
26
|
+
import numpy as _np # type: ignore
|
|
27
|
+
from PIL import Image as _PILImage # type: ignore
|
|
28
|
+
except Exception: # pragma: no cover - optional dependency
|
|
29
|
+
_np = None # type: ignore
|
|
30
|
+
_PILImage = None # type: ignore
|
|
20
31
|
|
|
21
32
|
# Import logging configuration to suppress JAX debug messages
|
|
22
33
|
from .engine import (
|
|
@@ -113,6 +124,7 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
|
|
|
113
124
|
task_instance: Optional[PokemonRedTaskInstance] = None,
|
|
114
125
|
custom_step_obs: Optional[GetObservationCallable] = None,
|
|
115
126
|
custom_ckpt_obs: Optional[GetObservationCallable] = None,
|
|
127
|
+
tracer: Optional[SessionTracer] = None,
|
|
116
128
|
):
|
|
117
129
|
self.name = "PokemonRed"
|
|
118
130
|
self.task_instance = task_instance or DEFAULT_TASK_INSTANCE
|
|
@@ -121,6 +133,7 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
|
|
|
121
133
|
custom_ckpt_obs or PokemonRedObservationCallable()
|
|
122
134
|
)
|
|
123
135
|
self.engine = PokemonRedEngine(self.task_instance)
|
|
136
|
+
self.tracer = tracer
|
|
124
137
|
|
|
125
138
|
# Register tools
|
|
126
139
|
self._press_button_tool = PressButtonTool(self.engine)
|
|
@@ -195,6 +208,27 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
|
|
|
195
208
|
if tool_result.error and hasattr(pub_state, "error_info"):
|
|
196
209
|
pub_state.error_info = tool_result.error
|
|
197
210
|
|
|
211
|
+
# Record EnvironmentEvent for tracing if tracer is available
|
|
212
|
+
if self.tracer and hasattr(priv_state, 'reward_last_step'):
|
|
213
|
+
# Get state information for the event
|
|
214
|
+
prev_state = getattr(self.engine, '_previous_state', None)
|
|
215
|
+
terminated = getattr(priv_state, 'terminated', False)
|
|
216
|
+
truncated = getattr(priv_state, 'truncated', False)
|
|
217
|
+
|
|
218
|
+
# Convert states to dict for serialization
|
|
219
|
+
pub_state_dict = pub_state.__dict__ if hasattr(pub_state, '__dict__') else pub_state
|
|
220
|
+
|
|
221
|
+
env_event = EnvironmentEvent(
|
|
222
|
+
system_instance_id="pokemon_red_env",
|
|
223
|
+
time_record=TimeRecord(event_time=time.time()),
|
|
224
|
+
reward=float(priv_state.reward_last_step),
|
|
225
|
+
terminated=terminated,
|
|
226
|
+
truncated=truncated,
|
|
227
|
+
system_state_before=prev_state if prev_state else None,
|
|
228
|
+
system_state_after=pub_state_dict,
|
|
229
|
+
)
|
|
230
|
+
await self.tracer.record_event(env_event)
|
|
231
|
+
|
|
198
232
|
return await self._to_observation(
|
|
199
233
|
priv_state, pub_state, self.custom_step_observation_callable
|
|
200
234
|
)
|
|
@@ -220,6 +254,58 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
|
|
|
220
254
|
"""Convert states to observation using the specified callback"""
|
|
221
255
|
active_obs_cb = obs_cb or PokemonRedObservationCallable()
|
|
222
256
|
observation = await active_obs_cb.get_observation(pub, priv)
|
|
257
|
+
|
|
258
|
+
# Include raw state fields for reward calculation
|
|
259
|
+
if isinstance(observation, dict):
|
|
260
|
+
observation["map_id"] = pub.world.map_id if pub.world else None
|
|
261
|
+
observation["player_x"] = pub.world.player_x if pub.world else None
|
|
262
|
+
observation["player_y"] = pub.world.player_y if pub.world else None
|
|
263
|
+
observation["party_count"] = len(pub.party) if pub.party else 0
|
|
264
|
+
observation["party_pokemon"] = [
|
|
265
|
+
{
|
|
266
|
+
"species_id": p.species_id,
|
|
267
|
+
"level": p.level,
|
|
268
|
+
"hp_current": p.hp_current,
|
|
269
|
+
"hp_max": p.hp_max,
|
|
270
|
+
"hp_percentage": (p.hp_current / p.hp_max * 100) if p.hp_max > 0 else 0,
|
|
271
|
+
}
|
|
272
|
+
for p in (pub.party or [])
|
|
273
|
+
]
|
|
274
|
+
observation["in_battle"] = pub.system.in_battle if pub.system else False
|
|
275
|
+
observation["battle_outcome"] = pub.system.battle_outcome if pub.system else 0
|
|
276
|
+
observation["text_box_active"] = pub.system.text_box_active if pub.system else False
|
|
277
|
+
observation["enemy_hp_current"] = pub.system.enemy_hp_current if pub.system else 0
|
|
278
|
+
observation["enemy_hp_max"] = pub.system.enemy_hp_max if pub.system else 0
|
|
279
|
+
observation["enemy_hp_percentage"] = pub.system.enemy_hp_percentage if pub.system else 0.0
|
|
280
|
+
observation["badges"] = pub.progress.badges if pub.progress else 0
|
|
281
|
+
# Attach latest PNG frame for VLM agents if available
|
|
282
|
+
try:
|
|
283
|
+
emulator = getattr(self.engine, "emulator", None)
|
|
284
|
+
screen = getattr(emulator, "screen", None)
|
|
285
|
+
if screen is not None and _np is not None and _PILImage is not None:
|
|
286
|
+
# Prefer documented ndarray property if present
|
|
287
|
+
frame = getattr(screen, "ndarray", None)
|
|
288
|
+
if frame is None and hasattr(screen, "image"):
|
|
289
|
+
frame = screen.image
|
|
290
|
+
if isinstance(frame, _np.ndarray) and frame.ndim == 3 and frame.shape[0] > 0 and frame.shape[1] > 0:
|
|
291
|
+
array_uint8 = (
|
|
292
|
+
frame.astype("uint8") if frame.dtype != _np.uint8 else frame
|
|
293
|
+
)
|
|
294
|
+
# PyBoy gives RGBA; convert to RGB
|
|
295
|
+
if array_uint8.shape[-1] == 4:
|
|
296
|
+
array_uint8 = array_uint8[:, :, :3]
|
|
297
|
+
img = _PILImage.fromarray(array_uint8, mode="RGB")
|
|
298
|
+
buf = BytesIO()
|
|
299
|
+
img.save(buf, format="PNG")
|
|
300
|
+
encoded = base64.b64encode(buf.getvalue()).decode("ascii")
|
|
301
|
+
if isinstance(observation, dict):
|
|
302
|
+
observation["observation_image_base64"] = encoded
|
|
303
|
+
observation["observation_image_format"] = "png"
|
|
304
|
+
observation["observation_image_width"] = int(array_uint8.shape[1])
|
|
305
|
+
observation["observation_image_height"] = int(array_uint8.shape[0])
|
|
306
|
+
observation["observation_image_data_url"] = f"data:image/png;base64,{encoded}"
|
|
307
|
+
except Exception:
|
|
308
|
+
pass
|
|
223
309
|
if extra_obs and isinstance(observation, dict):
|
|
224
310
|
observation.update(extra_obs)
|
|
225
311
|
return observation
|