synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +190 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
- examples/sft/evaluate.py +2 -0
- examples/sft/generate_traces.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +56 -26
- examples/swe/task_app/hosted/rollout.py +42 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +799 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/warming_up_to_rl/groq_test.py +2 -0
- examples/warming_up_to_rl/run_local_rollout.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
- examples/warming_up_to_rl/run_rollout_remote.py +2 -0
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +2 -2
- synth_ai/api/models/supported.py +1 -0
- synth_ai/api/train/builders.py +25 -11
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +10 -10
- synth_ai/api/train/configs/rl.py +5 -4
- synth_ai/api/train/configs/sft.py +4 -3
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +48 -59
- synth_ai/cli/_modal_wrapper.py +3 -2
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +14 -7
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/recent.py +1 -1
- synth_ai/cli/rl_demo.py +8 -7
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/status.py +1 -1
- synth_ai/cli/task_apps.py +1922 -190
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/tui.py +57 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/evals/client.py +58 -61
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +9 -9
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +24 -5
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +257 -0
- synth_ai/task/contracts.py +138 -39
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +56 -0
- synth_ai/task/rubrics/loaders.py +152 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
- synth_ai/task/server.py +8 -7
- synth_ai/task/trace_correlation_helpers.py +315 -0
- synth_ai/task/validators.py +413 -6
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +5 -5
- synth_ai/tracing_v3/session_tracer.py +16 -6
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/daemon.py +8 -7
- synth_ai/tracing_v3/turso/native_manager.py +66 -43
- synth_ai/tracing_v3/utils.py +3 -3
- synth_ai/tui/__init__.py +5 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/cli/__init__.py +1 -0
- synth_ai/tui/cli/query_experiments.py +164 -0
- synth_ai/tui/cli/query_experiments_v3.py +164 -0
- synth_ai/tui/dashboard.py +906 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
- examples/agora_ex/README_MoE.md +0 -224
- examples/agora_ex/__init__.py +0 -7
- examples/agora_ex/agora_ex.py +0 -65
- examples/agora_ex/agora_ex_task_app.py +0 -590
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
- examples/agora_ex/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/system_prompt_CURRENT.md +0 -63
- examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
- examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
- synth_ai/rubrics/__init__.py +0 -22
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
# Pokémon Red Task App
|
|
2
|
+
|
|
3
|
+
A reinforcement learning environment for Pokémon Red using PyBoy emulation with VLM support.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Full Game Boy Emulation**: Uses PyBoy to run authentic Pokémon Red ROM
|
|
8
|
+
- **VLM Support**: Base64-encoded PNG frames for vision models (GPT-4V, Qwen-VL, etc.)
|
|
9
|
+
- **Policy Proxy**: OpenAI/Groq API integration for LLM-driven gameplay
|
|
10
|
+
- **Rich State Extraction**: Comprehensive game state from RAM (HP, position, party, battle data)
|
|
11
|
+
- **Reward Shaping**: Ultra-dense reward functions for RL training
|
|
12
|
+
- **Instant Start**: Pre-configured init state skips intro (starts in Red's bedroom)
|
|
13
|
+
|
|
14
|
+
## Quick Start
|
|
15
|
+
|
|
16
|
+
### 1. Start the Task App Server
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# From synth-ai root
|
|
20
|
+
uv run -m synth_ai task-app serve pokemon_red --port 8913
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### 2. Run a Random Rollout
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
import httpx
|
|
27
|
+
import asyncio
|
|
28
|
+
|
|
29
|
+
async def test_rollout():
|
|
30
|
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
31
|
+
response = await client.post(
|
|
32
|
+
"http://127.0.0.1:8913/rollout",
|
|
33
|
+
json={
|
|
34
|
+
"ops": [
|
|
35
|
+
{"button": "DOWN", "frames": 10},
|
|
36
|
+
{"button": "A", "frames": 20},
|
|
37
|
+
{"button": "RIGHT", "frames": 15},
|
|
38
|
+
],
|
|
39
|
+
"policy": {"config": {}},
|
|
40
|
+
},
|
|
41
|
+
)
|
|
42
|
+
result = response.json()
|
|
43
|
+
print(f"Steps: {len(result['steps'])}")
|
|
44
|
+
|
|
45
|
+
asyncio.run(test_rollout())
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### 3. Run with VLM Policy
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
# Using Qwen-VL via Groq
|
|
52
|
+
uv run python examples/task_apps/pokemon_red/test_pallet_town_rewards.py
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Reward Functions
|
|
56
|
+
|
|
57
|
+
### Pallet Town Progression (Recommended for Beginners)
|
|
58
|
+
|
|
59
|
+
**Location**: `synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py`
|
|
60
|
+
|
|
61
|
+
Ultra-rich reward shaping for the opening sequence:
|
|
62
|
+
|
|
63
|
+
| Milestone | Reward | Description |
|
|
64
|
+
|-----------|--------|-------------|
|
|
65
|
+
| Leave bedroom | +20 | Go downstairs |
|
|
66
|
+
| Exit house | +30 | Enter Pallet Town |
|
|
67
|
+
| Find Oak's lab | +40 | Discover and enter lab |
|
|
68
|
+
| Talk to Oak | +50 | First dialogue |
|
|
69
|
+
| Get starter | +100 | Receive your first Pokémon |
|
|
70
|
+
| Enter battle | +75 | Start rival battle |
|
|
71
|
+
| Deal damage | +50 | Attack rival (10×5) |
|
|
72
|
+
| Half HP | +25 | Reduce enemy to <50% HP |
|
|
73
|
+
| Low HP | +35 | Reduce enemy to <25% HP |
|
|
74
|
+
| Win battle | +150 | Defeat rival |
|
|
75
|
+
| Exit lab | +60 | Leave with Pokémon |
|
|
76
|
+
| **Efficiency bonuses** | +100 | Fast navigation, healthy Pokémon |
|
|
77
|
+
|
|
78
|
+
**Total: ~600-700 points**
|
|
79
|
+
|
|
80
|
+
See [`PALLET_TOWN_REWARDS.md`](../../../synth_ai/environments/examples/red/engine_helpers/reward_library/PALLET_TOWN_REWARDS.md) for full documentation.
|
|
81
|
+
|
|
82
|
+
### Usage in Training
|
|
83
|
+
|
|
84
|
+
```toml
|
|
85
|
+
# pallet_town_rl_config.toml
|
|
86
|
+
[reward]
|
|
87
|
+
reward_type = "composite"
|
|
88
|
+
reward_class = "synth_ai.environments.examples.red.engine_helpers.reward_library.pallet_town_progression.PalletTownProgressionCompositeReward"
|
|
89
|
+
|
|
90
|
+
[training]
|
|
91
|
+
algorithm = "ppo"
|
|
92
|
+
max_steps_per_episode = 500
|
|
93
|
+
num_episodes = 1000
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## State Schema
|
|
97
|
+
|
|
98
|
+
The environment exposes comprehensive game state:
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
{
|
|
102
|
+
# Position
|
|
103
|
+
"map_id": int, # Current location
|
|
104
|
+
"player_x": int,
|
|
105
|
+
"player_y": int,
|
|
106
|
+
|
|
107
|
+
# Party
|
|
108
|
+
"party_count": int,
|
|
109
|
+
"party_pokemon": [
|
|
110
|
+
{
|
|
111
|
+
"species_id": int,
|
|
112
|
+
"level": int,
|
|
113
|
+
"hp_current": int,
|
|
114
|
+
"hp_max": int,
|
|
115
|
+
"hp_percentage": float,
|
|
116
|
+
"xp": int,
|
|
117
|
+
}
|
|
118
|
+
],
|
|
119
|
+
|
|
120
|
+
# Battle
|
|
121
|
+
"in_battle": bool,
|
|
122
|
+
"battle_outcome": int, # 0=ongoing, 1=win, 2=lose
|
|
123
|
+
"enemy_hp_current": int,
|
|
124
|
+
"enemy_hp_max": int,
|
|
125
|
+
"enemy_hp_percentage": float,
|
|
126
|
+
"enemy_level": int,
|
|
127
|
+
"enemy_species_id": int,
|
|
128
|
+
"battle_turn": int,
|
|
129
|
+
|
|
130
|
+
# Dialogue & UI
|
|
131
|
+
"text_box_active": bool,
|
|
132
|
+
"menu_state": int,
|
|
133
|
+
|
|
134
|
+
# Progress
|
|
135
|
+
"badges": int, # Bitfield of earned badges
|
|
136
|
+
"money": int,
|
|
137
|
+
|
|
138
|
+
# VLM Support
|
|
139
|
+
"observation_image_base64": str, # PNG frame for vision models
|
|
140
|
+
}
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Action Space
|
|
144
|
+
|
|
145
|
+
### Button Actions
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
{
|
|
149
|
+
"button": "A" | "B" | "START" | "SELECT" | "UP" | "DOWN" | "LEFT" | "RIGHT",
|
|
150
|
+
"frames": int, # How long to hold the button (60fps)
|
|
151
|
+
}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Policy-Driven Actions
|
|
155
|
+
|
|
156
|
+
When using LLM policies, the task app proxies requests to OpenAI/Groq:
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
{
|
|
160
|
+
"policy": {
|
|
161
|
+
"config": {
|
|
162
|
+
"model": "gpt-4-turbo",
|
|
163
|
+
"api_key": "...",
|
|
164
|
+
# or for Groq:
|
|
165
|
+
# "model": "qwen-2.5-7b",
|
|
166
|
+
# "base_url": "https://api.groq.com/v1",
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Files
|
|
173
|
+
|
|
174
|
+
- **`task_app.py`**: Main task app entry point
|
|
175
|
+
- **`pallet_town_rl_config.toml`**: Training config for Pallet Town sequence
|
|
176
|
+
- **`test_pallet_town_rewards.py`**: Reward function test/demo script
|
|
177
|
+
- **`create_red_init_state.py`** (repo root): Script to generate init state
|
|
178
|
+
- **`Pokemon - Red Version (USA, Europe) (SGB Enhanced).gb`**: Your ROM (not committed)
|
|
179
|
+
|
|
180
|
+
## Creating Init States
|
|
181
|
+
|
|
182
|
+
The default init state starts in Red's bedroom with intro skipped. To create custom states:
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
# See /Users/joshpurtell/Documents/GitHub/synth-ai/create_red_init_state.py
|
|
186
|
+
from pyboy import PyBoy
|
|
187
|
+
|
|
188
|
+
emulator = PyBoy("path/to/rom.gb", window="null")
|
|
189
|
+
|
|
190
|
+
# Navigate to desired starting point
|
|
191
|
+
# ... (button presses)
|
|
192
|
+
|
|
193
|
+
# Save state
|
|
194
|
+
with open("custom_init.state", "wb") as f:
|
|
195
|
+
emulator.save_state(f)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Memory Addresses
|
|
199
|
+
|
|
200
|
+
Key RAM addresses are defined in `synth_ai/environments/examples/red/engine_helpers/memory_map.py`:
|
|
201
|
+
|
|
202
|
+
- `MAP_ID = 0xD35E`
|
|
203
|
+
- `PLAYER_X/Y = 0xD362/0xD361`
|
|
204
|
+
- `IN_BATTLE_FLAG = 0xD057`
|
|
205
|
+
- `ENEMY_HP_CURRENT = 0xCFE6`
|
|
206
|
+
- `PARTY_COUNT = 0xD163`
|
|
207
|
+
- `BADGE_FLAGS = 0xD356`
|
|
208
|
+
- (and many more)
|
|
209
|
+
|
|
210
|
+
## Troubleshooting
|
|
211
|
+
|
|
212
|
+
### ROM Not Found
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
# Set environment variable
|
|
216
|
+
export POKEMON_RED_ROM_PATH="/path/to/pokemon_red.gb"
|
|
217
|
+
|
|
218
|
+
# Or copy ROM to expected location
|
|
219
|
+
cp "Pokemon - Red Version.gb" synth_ai/environments/examples/red/roms/pokemon_red.gb
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### PyBoy Not Installed
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
uv add pyboy
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Server Won't Start (Port in Use)
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
# Kill existing server
|
|
232
|
+
lsof -ti :8913 | xargs -r kill -9
|
|
233
|
+
|
|
234
|
+
# Or use a different port
|
|
235
|
+
uv run -m synth_ai task-app serve pokemon_red --port 8914
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Examples
|
|
239
|
+
|
|
240
|
+
### 1. Policy Evaluation with GPT-5-nano
|
|
241
|
+
|
|
242
|
+
Evaluate a GPT-5-nano policy across 10 episodes (10 policy calls each):
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
# From synth-ai root
|
|
246
|
+
cd /Users/joshpurtell/Documents/GitHub/synth-ai
|
|
247
|
+
|
|
248
|
+
# 1. Make sure OpenAI API key is in .env
|
|
249
|
+
echo "OPENAI_API_KEY=sk-..." >> .env
|
|
250
|
+
|
|
251
|
+
# 2. Start the task app server (in background)
|
|
252
|
+
nohup sh -c 'printf "n\n" | uv run -m synth_ai task-app serve pokemon_red --port 8913 --no-reload' > nohup_pokemon.log 2>&1 &
|
|
253
|
+
|
|
254
|
+
# Wait for startup
|
|
255
|
+
sleep 8
|
|
256
|
+
|
|
257
|
+
# 3. Run the evaluation
|
|
258
|
+
uv run python examples/task_apps/pokemon_red/eval_pokemon_red_policy.py
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
**Expected Output:**
|
|
262
|
+
```
|
|
263
|
+
================================================================================
|
|
264
|
+
POKÉMON RED - POLICY EVALUATION
|
|
265
|
+
================================================================================
|
|
266
|
+
|
|
267
|
+
Task: Pallet Town Progression
|
|
268
|
+
Policy: gpt-5-nano
|
|
269
|
+
Episodes: 10
|
|
270
|
+
Max steps per episode: 10
|
|
271
|
+
|
|
272
|
+
✓ Server is healthy
|
|
273
|
+
✓ API key loaded
|
|
274
|
+
|
|
275
|
+
🎮 Running 10 episodes in parallel...
|
|
276
|
+
|
|
277
|
+
================================================================================
|
|
278
|
+
RESULTS SUMMARY
|
|
279
|
+
================================================================================
|
|
280
|
+
|
|
281
|
+
+-----------+----------+---------+-------------+---------+----------+--------------+
|
|
282
|
+
| Episode | Reward | Steps | Final Map | Party | Badges | Milestones |
|
|
283
|
+
+===========+==========+=========+=============+=========+==========+==============+
|
|
284
|
+
| 1 | 0 | 10 | Map38 | 0 | 0 | 0 |
|
|
285
|
+
| 2 | 0 | 9 | Map38 | 0 | 0 | 0 |
|
|
286
|
+
| 9 | 20 | 10 | Map38 | 0 | 0 | 1 |
|
|
287
|
+
+-----------+----------+---------+-------------+---------+----------+--------------+
|
|
288
|
+
|
|
289
|
+
Statistics:
|
|
290
|
+
Mean reward: 2.00
|
|
291
|
+
Max reward: 20.00
|
|
292
|
+
Success rate: 10% reached first milestone
|
|
293
|
+
|
|
294
|
+
Best Episode (#9):
|
|
295
|
+
Total reward: 20.0
|
|
296
|
+
Milestones achieved:
|
|
297
|
+
Step 5: Moved from Map38 to Map37 (+20.0)
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
**Key Features:**
|
|
301
|
+
- ✅ **Action Batching**: Each policy call returns 5-10 actions via `execute_sequence` tool
|
|
302
|
+
- ✅ **Parallel Execution**: All 10 episodes run concurrently
|
|
303
|
+
- ✅ **Rich Metrics**: Rewards, steps, maps, party status, milestones tracked
|
|
304
|
+
- ✅ **Fast Evaluation**: ~2-3 minutes for 10 episodes (vs 50+ min without batching)
|
|
305
|
+
|
|
306
|
+
**Customize the Evaluation:**
|
|
307
|
+
|
|
308
|
+
```python
|
|
309
|
+
# In eval_pokemon_red_policy.py
|
|
310
|
+
NUM_EPISODES = 10 # Number of episodes to run
|
|
311
|
+
MAX_STEPS_PER_EPISODE = 10 # Policy calls per episode (each returns 5-10 actions)
|
|
312
|
+
MODEL = "gpt-5-nano" # Or "gpt-4-turbo", "qwen-2.5-7b", etc.
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
### 2. Test Script (Random Actions)
|
|
316
|
+
|
|
317
|
+
```bash
|
|
318
|
+
cd /Users/joshpurtell/Documents/GitHub/synth-ai
|
|
319
|
+
uv run python test_pokemon_red_rollout.py
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### 3. Reward Function Demo
|
|
323
|
+
|
|
324
|
+
```bash
|
|
325
|
+
uv run python examples/task_apps/pokemon_red/test_pallet_town_rewards.py
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
Output:
|
|
329
|
+
```
|
|
330
|
+
======================================================================
|
|
331
|
+
PALLET TOWN PROGRESSION - REWARD SIMULATION
|
|
332
|
+
======================================================================
|
|
333
|
+
|
|
334
|
+
✓ Leave bedroom (Map 1→2): +20 points
|
|
335
|
+
✓ Exit house to Pallet Town (Map 2→0): +30 points
|
|
336
|
+
✓ Find and enter Oak's Lab (Map 0→3): +40 points
|
|
337
|
+
...
|
|
338
|
+
======================================================================
|
|
339
|
+
TOTAL REWARD: 705 points
|
|
340
|
+
======================================================================
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
## Future Work
|
|
344
|
+
|
|
345
|
+
- [ ] Route 1 exploration rewards
|
|
346
|
+
- [ ] Wild Pokémon encounter rewards
|
|
347
|
+
- [ ] Capture mechanics rewards
|
|
348
|
+
- [ ] Gym battle rewards
|
|
349
|
+
- [ ] Badge collection rewards
|
|
350
|
+
- [ ] Multi-environment curriculum (Pallet → Viridian → Pewter)
|
|
351
|
+
|
|
352
|
+
## Credits
|
|
353
|
+
|
|
354
|
+
- **PyBoy**: Game Boy emulator - https://github.com/Baekalfen/PyBoy
|
|
355
|
+
- **Pokémon Red Disassembly**: RAM map reference - https://github.com/pret/pokered
|
|
356
|
+
- **Datacrystal.org**: Memory address documentation
|
|
357
|
+
|