synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -1
- examples/swe/task_app/grpo_swe_mini.py +55 -26
- examples/swe/task_app/hosted/rollout.py +40 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/task_app/__init__.py +2 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +18 -13
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +25 -3
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +10 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
- examples/task_apps/pokemon_red/task_app.py +606 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +2 -2
- synth_ai/api/train/builders.py +25 -11
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +10 -10
- synth_ai/api/train/configs/rl.py +5 -4
- synth_ai/api/train/configs/sft.py +4 -3
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +7 -51
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +14 -7
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/rl_demo.py +8 -7
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/task_apps.py +1707 -186
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +30 -4
- synth_ai/evals/client.py +58 -61
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +16 -16
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +14 -5
- synth_ai/task/contracts.py +124 -38
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +53 -0
- synth_ai/task/rubrics/loaders.py +133 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +113 -0
- synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
- synth_ai/task/server.py +8 -7
- synth_ai/task/validators.py +269 -6
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +5 -5
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/native_manager.py +3 -3
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +214 -101
- examples/agora_ex/README_MoE.md +0 -224
- examples/agora_ex/__init__.py +0 -7
- examples/agora_ex/agora_ex.py +0 -65
- examples/agora_ex/agora_ex_task_app.py +0 -590
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
- examples/agora_ex/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/system_prompt_CURRENT.md +0 -63
- examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
- examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
- synth_ai/rubrics/__init__.py +0 -22
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Agent modules for Pokemon Emerald speedrunning agent
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from utils.vlm import VLM
|
|
6
|
+
from .action import action_step
|
|
7
|
+
from .memory import memory_step
|
|
8
|
+
from .perception import perception_step
|
|
9
|
+
from .planning import planning_step
|
|
10
|
+
from .simple import SimpleAgent, get_simple_agent, simple_mode_processing_multiprocess, configure_simple_agent_defaults
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Agent:
|
|
14
|
+
"""
|
|
15
|
+
Unified agent interface that encapsulates all agent logic.
|
|
16
|
+
The client just calls agent.step(game_state) and gets back an action.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, args=None):
|
|
20
|
+
"""
|
|
21
|
+
Initialize the agent based on configuration.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
args: Command line arguments with agent configuration
|
|
25
|
+
"""
|
|
26
|
+
# Extract configuration
|
|
27
|
+
backend = args.backend if args else "gemini"
|
|
28
|
+
model_name = args.model_name if args else "gemini-2.5-flash"
|
|
29
|
+
simple_mode = args.simple if args else False
|
|
30
|
+
|
|
31
|
+
# Initialize VLM
|
|
32
|
+
self.vlm = VLM(backend=backend, model_name=model_name)
|
|
33
|
+
print(f" VLM: {backend}/{model_name}")
|
|
34
|
+
|
|
35
|
+
# Initialize agent mode
|
|
36
|
+
self.simple_mode = simple_mode
|
|
37
|
+
if simple_mode:
|
|
38
|
+
# Use global SimpleAgent instance to enable checkpoint persistence
|
|
39
|
+
self.simple_agent = get_simple_agent(self.vlm)
|
|
40
|
+
print(f" Mode: Simple (direct frame->action)")
|
|
41
|
+
else:
|
|
42
|
+
# Four-module agent context
|
|
43
|
+
self.context = {
|
|
44
|
+
'perception_output': None,
|
|
45
|
+
'planning_output': None,
|
|
46
|
+
'memory': []
|
|
47
|
+
}
|
|
48
|
+
print(f" Mode: Four-module architecture")
|
|
49
|
+
|
|
50
|
+
def step(self, game_state):
|
|
51
|
+
"""
|
|
52
|
+
Process a game state and return an action.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
game_state: Dictionary containing:
|
|
56
|
+
- screenshot: PIL Image
|
|
57
|
+
- game_state: Dict with game memory data
|
|
58
|
+
- visual: Dict with visual observations
|
|
59
|
+
- audio: Dict with audio observations
|
|
60
|
+
- progress: Dict with milestone progress
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
dict: Contains 'action' and optionally 'reasoning'
|
|
64
|
+
"""
|
|
65
|
+
if self.simple_mode:
|
|
66
|
+
# Simple mode - delegate to SimpleAgent
|
|
67
|
+
return self.simple_agent.step(game_state)
|
|
68
|
+
else:
|
|
69
|
+
# Four-module processing
|
|
70
|
+
try:
|
|
71
|
+
# 1. Perception - understand what's happening
|
|
72
|
+
perception_output = perception_step(
|
|
73
|
+
self.vlm,
|
|
74
|
+
game_state,
|
|
75
|
+
self.context.get('memory', [])
|
|
76
|
+
)
|
|
77
|
+
self.context['perception_output'] = perception_output
|
|
78
|
+
|
|
79
|
+
# 2. Planning - decide strategy
|
|
80
|
+
planning_output = planning_step(
|
|
81
|
+
self.vlm,
|
|
82
|
+
perception_output,
|
|
83
|
+
self.context.get('memory', [])
|
|
84
|
+
)
|
|
85
|
+
self.context['planning_output'] = planning_output
|
|
86
|
+
|
|
87
|
+
# 3. Memory - update context
|
|
88
|
+
memory_output = memory_step(
|
|
89
|
+
perception_output,
|
|
90
|
+
planning_output,
|
|
91
|
+
self.context.get('memory', [])
|
|
92
|
+
)
|
|
93
|
+
self.context['memory'] = memory_output
|
|
94
|
+
|
|
95
|
+
# 4. Action - choose button press
|
|
96
|
+
action_output = action_step(
|
|
97
|
+
self.vlm,
|
|
98
|
+
game_state,
|
|
99
|
+
planning_output,
|
|
100
|
+
perception_output
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
return action_output
|
|
104
|
+
|
|
105
|
+
except Exception as e:
|
|
106
|
+
print(f"❌ Agent error: {e}")
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
__all__ = [
|
|
111
|
+
'Agent',
|
|
112
|
+
'action_step',
|
|
113
|
+
'memory_step',
|
|
114
|
+
'perception_step',
|
|
115
|
+
'planning_step',
|
|
116
|
+
'SimpleAgent',
|
|
117
|
+
'get_simple_agent',
|
|
118
|
+
'simple_mode_processing_multiprocess',
|
|
119
|
+
'configure_simple_agent_defaults'
|
|
120
|
+
]
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import random
|
|
3
|
+
import sys
|
|
4
|
+
from agent.system_prompt import system_prompt
|
|
5
|
+
from utils.state_formatter import format_state_for_llm, format_state_summary, get_movement_options, get_party_health_summary
|
|
6
|
+
from utils.vlm import VLM
|
|
7
|
+
|
|
8
|
+
# Set up module logging
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
def action_step(memory_context, current_plan, latest_observation, frame, state_data, recent_actions, vlm):
|
|
12
|
+
"""
|
|
13
|
+
Decide and perform the next action button(s) based on memory, plan, observation, and comprehensive state.
|
|
14
|
+
Returns a list of action buttons as strings.
|
|
15
|
+
"""
|
|
16
|
+
# Get formatted state context and useful summaries
|
|
17
|
+
state_context = format_state_for_llm(state_data)
|
|
18
|
+
state_summary = format_state_summary(state_data)
|
|
19
|
+
movement_options = get_movement_options(state_data)
|
|
20
|
+
party_health = get_party_health_summary(state_data)
|
|
21
|
+
|
|
22
|
+
logger.info("[ACTION] Starting action decision")
|
|
23
|
+
logger.info(f"[ACTION] State: {state_summary}")
|
|
24
|
+
logger.info(f"[ACTION] Party health: {party_health['healthy_count']}/{party_health['total_count']} healthy")
|
|
25
|
+
if movement_options:
|
|
26
|
+
logger.info(f"[ACTION] Movement options: {movement_options}")
|
|
27
|
+
|
|
28
|
+
# Build enhanced action context
|
|
29
|
+
action_context = []
|
|
30
|
+
|
|
31
|
+
# Extract key info for context
|
|
32
|
+
game_data = state_data.get('game', {})
|
|
33
|
+
|
|
34
|
+
# Battle vs Overworld context
|
|
35
|
+
if game_data.get('in_battle', False):
|
|
36
|
+
action_context.append("=== BATTLE MODE ===")
|
|
37
|
+
battle_info = game_data.get('battle_info', {})
|
|
38
|
+
if battle_info:
|
|
39
|
+
if 'player_pokemon' in battle_info:
|
|
40
|
+
player_pkmn = battle_info['player_pokemon']
|
|
41
|
+
action_context.append(f"Your Pokemon: {player_pkmn.get('species_name', player_pkmn.get('species', 'Unknown'))} (Lv.{player_pkmn.get('level', '?')}) HP: {player_pkmn.get('current_hp', '?')}/{player_pkmn.get('max_hp', '?')}")
|
|
42
|
+
if 'opponent_pokemon' in battle_info:
|
|
43
|
+
opp_pkmn = battle_info['opponent_pokemon']
|
|
44
|
+
action_context.append(f"Opponent: {opp_pkmn.get('species_name', opp_pkmn.get('species', 'Unknown'))} (Lv.{opp_pkmn.get('level', '?')}) HP: {opp_pkmn.get('current_hp', '?')}/{opp_pkmn.get('max_hp', '?')}")
|
|
45
|
+
else:
|
|
46
|
+
action_context.append("=== OVERWORLD MODE ===")
|
|
47
|
+
|
|
48
|
+
# Movement options from utility
|
|
49
|
+
if movement_options:
|
|
50
|
+
action_context.append("Movement Options:")
|
|
51
|
+
for direction, description in movement_options.items():
|
|
52
|
+
action_context.append(f" {direction}: {description}")
|
|
53
|
+
|
|
54
|
+
# Party health summary
|
|
55
|
+
if party_health['total_count'] > 0:
|
|
56
|
+
action_context.append("=== PARTY STATUS ===")
|
|
57
|
+
action_context.append(f"Healthy Pokemon: {party_health['healthy_count']}/{party_health['total_count']}")
|
|
58
|
+
if party_health['critical_pokemon']:
|
|
59
|
+
action_context.append("Critical Pokemon:")
|
|
60
|
+
for critical in party_health['critical_pokemon']:
|
|
61
|
+
action_context.append(f" {critical}")
|
|
62
|
+
|
|
63
|
+
# Recent actions context
|
|
64
|
+
if recent_actions:
|
|
65
|
+
action_context.append(f"Recent Actions: {', '.join(list(recent_actions)[-5:])}")
|
|
66
|
+
|
|
67
|
+
context_str = "\n".join(action_context)
|
|
68
|
+
|
|
69
|
+
action_prompt = f"""
|
|
70
|
+
★★★ COMPREHENSIVE GAME STATE DATA ★★★
|
|
71
|
+
|
|
72
|
+
{state_context}
|
|
73
|
+
|
|
74
|
+
★★★ ENHANCED ACTION CONTEXT ★★★
|
|
75
|
+
|
|
76
|
+
{context_str}
|
|
77
|
+
|
|
78
|
+
★★★ ACTION DECISION TASK ★★★
|
|
79
|
+
|
|
80
|
+
You are the agent playing Pokemon Emerald with a speedrunning mindset. Make quick, efficient decisions.
|
|
81
|
+
|
|
82
|
+
Memory Context: {memory_context}
|
|
83
|
+
Current Plan: {current_plan if current_plan else 'No plan yet'}
|
|
84
|
+
Latest Observation: {latest_observation}
|
|
85
|
+
|
|
86
|
+
Based on the comprehensive state information above, decide your next action(s):
|
|
87
|
+
|
|
88
|
+
BATTLE STRATEGY:
|
|
89
|
+
- If in battle: Choose moves strategically based on type effectiveness and damage
|
|
90
|
+
- Consider switching pokemon if current one is weak/low HP
|
|
91
|
+
- Use items if pokemon is in critical condition
|
|
92
|
+
|
|
93
|
+
NAVIGATION STRATEGY:
|
|
94
|
+
- Use movement options analysis above for efficient navigation
|
|
95
|
+
- Avoid blocked tiles (marked as BLOCKED)
|
|
96
|
+
- Consider tall grass: avoid if party is weak, seek if need to train/catch
|
|
97
|
+
- Navigate around water unless you have Surf
|
|
98
|
+
- Use coordinates to track progress toward objectives
|
|
99
|
+
|
|
100
|
+
MENU/DIALOGUE STRATEGY:
|
|
101
|
+
- If in dialogue: A to advance text, B to cancel/skip if possible
|
|
102
|
+
- If in menu: Navigate with UP/DOWN/LEFT/RIGHT, A to select, B to cancel/back out
|
|
103
|
+
- If stuck in menu/interface: B repeatedly to exit to overworld
|
|
104
|
+
- In Pokemon Center: A to talk to Nurse Joy, A to confirm healing
|
|
105
|
+
|
|
106
|
+
HEALTH MANAGEMENT:
|
|
107
|
+
- If pokemon are low HP/fainted, head to Pokemon Center
|
|
108
|
+
- If no healthy pokemon, prioritize healing immediately
|
|
109
|
+
- Consider terrain: avoid wild encounters if party is weak
|
|
110
|
+
|
|
111
|
+
EFFICIENCY RULES:
|
|
112
|
+
1. Output sequences of actions when you know what's coming (e.g., "RIGHT, RIGHT, RIGHT, A" to enter a door)
|
|
113
|
+
2. For dialogue: "A, A, A, A, A" to mash through
|
|
114
|
+
3. For movement: repeat directions based on movement options (e.g., "UP, UP, UP, UP" if UP shows "Normal path")
|
|
115
|
+
4. If uncertain, output single action and reassess
|
|
116
|
+
5. Use traversability data: move toward open paths, avoid obstacles
|
|
117
|
+
6. If movement doesn't change coordinates (e.g., RIGHT but X doesn't increase), check map for walls (#) blocking your path
|
|
118
|
+
|
|
119
|
+
Valid buttons: A, B, SELECT, START, UP, DOWN, LEFT, RIGHT, L, R
|
|
120
|
+
- A: Interact with NPCs/objects, confirm selections, advance dialogue, use moves in battle
|
|
121
|
+
- B: Cancel menus, back out of interfaces, run faster (with running shoes), flee from battle
|
|
122
|
+
- START: Open main menu (Title sequence, Pokedex, Pokemon, Bag, etc.)
|
|
123
|
+
- SELECT: Use registered key item (typically unused)
|
|
124
|
+
- UP/DOWN/LEFT/RIGHT: Move character, navigate menus, select options
|
|
125
|
+
- L/R: Cycle through pages in some menus, switch Pokemon in battle (rare usage)
|
|
126
|
+
|
|
127
|
+
⚠️ CRITICAL WARNING: NEVER save the game using the in-game save menu! Saving will crash the entire run and end your progress. If you encounter a save prompt in the game, press B to cancel it immediately!
|
|
128
|
+
|
|
129
|
+
Return ONLY the button name(s) as a comma-separated list, nothing else.
|
|
130
|
+
Maximum 10 actions in sequence. Avoid repeating same button more than 6 times.
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
# Construct complete prompt for VLM
|
|
134
|
+
complete_prompt = system_prompt + action_prompt
|
|
135
|
+
|
|
136
|
+
action_response = vlm.get_text_query(complete_prompt, "ACTION").strip().upper()
|
|
137
|
+
valid_buttons = ['A', 'B', 'SELECT', 'START', 'UP', 'DOWN', 'LEFT', 'RIGHT', 'L', 'R']
|
|
138
|
+
|
|
139
|
+
# Split the response by commas and clean up
|
|
140
|
+
actions = [btn.strip() for btn in action_response.split(',') if btn.strip() in valid_buttons]
|
|
141
|
+
|
|
142
|
+
print(f"Parsed actions: {actions}")
|
|
143
|
+
if len(actions) == 0:
|
|
144
|
+
print("❌ No valid actions parsed - using default 'A'")
|
|
145
|
+
print("-" * 80 + "\n")
|
|
146
|
+
|
|
147
|
+
# Limit to maximum 10 actions and prevent excessive repetition
|
|
148
|
+
actions = actions[:10]
|
|
149
|
+
|
|
150
|
+
# If no valid actions found, make intelligent default based on state
|
|
151
|
+
if not actions:
|
|
152
|
+
if game_data.get('in_battle', False):
|
|
153
|
+
actions = ['A'] # Attack in battle
|
|
154
|
+
elif party_health['total_count'] == 0:
|
|
155
|
+
actions = ['A', 'A', 'A'] # Try to progress dialogue/menu
|
|
156
|
+
else:
|
|
157
|
+
actions = [random.choice(['A', 'RIGHT', 'UP', 'DOWN', 'LEFT'])] # Random exploration
|
|
158
|
+
|
|
159
|
+
logger.info(f"[ACTION] Actions decided: {', '.join(actions)}")
|
|
160
|
+
return actions
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections import deque
|
|
3
|
+
from agent.system_prompt import system_prompt
|
|
4
|
+
from utils.state_formatter import format_state_summary, get_party_health_summary
|
|
5
|
+
from utils.vlm import VLM
|
|
6
|
+
|
|
7
|
+
# Set up module logging
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
def extract_key_state_info(state_data):
|
|
11
|
+
"""Extract key information from comprehensive state for memory storage using the utility functions"""
|
|
12
|
+
# Use the state formatter utilities for consistency
|
|
13
|
+
state_summary = format_state_summary(state_data)
|
|
14
|
+
party_health = get_party_health_summary(state_data)
|
|
15
|
+
|
|
16
|
+
# Extract additional info
|
|
17
|
+
player_data = state_data.get('player', {})
|
|
18
|
+
game_data = state_data.get('game', {})
|
|
19
|
+
map_info = state_data.get('map', {})
|
|
20
|
+
|
|
21
|
+
key_info = {
|
|
22
|
+
'state_summary': state_summary,
|
|
23
|
+
'player_name': player_data.get('name', 'Player'),
|
|
24
|
+
'money': player_data.get('money') or game_data.get('money', 0),
|
|
25
|
+
'current_map': player_data.get('location', 'Unknown Location'),
|
|
26
|
+
'in_battle': game_data.get('in_battle', False),
|
|
27
|
+
'party_health': f"{party_health['healthy_count']}/{party_health['total_count']}",
|
|
28
|
+
'critical_pokemon': party_health['critical_pokemon']
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
# Position info
|
|
32
|
+
if 'coordinates' in player_data:
|
|
33
|
+
key_info['position'] = player_data['coordinates']
|
|
34
|
+
elif 'position' in player_data:
|
|
35
|
+
key_info['position'] = player_data['position']
|
|
36
|
+
else:
|
|
37
|
+
key_info['position'] = {}
|
|
38
|
+
|
|
39
|
+
# Battle opponent
|
|
40
|
+
if game_data.get('battle_info'):
|
|
41
|
+
battle = game_data['battle_info']
|
|
42
|
+
opponent_pokemon = battle.get('opponent_pokemon', {})
|
|
43
|
+
key_info['battle_opponent'] = opponent_pokemon.get('species_name', opponent_pokemon.get('species', 'Unknown Pokemon'))
|
|
44
|
+
|
|
45
|
+
# Traversability summary
|
|
46
|
+
if 'traversability' in map_info and map_info['traversability']:
|
|
47
|
+
traversability = map_info['traversability']
|
|
48
|
+
total_tiles = sum(len(row) for row in traversability)
|
|
49
|
+
blocked_count = sum(1 for row in traversability for cell in row if str(cell) in ['0', '0'])
|
|
50
|
+
passable_tiles = total_tiles - blocked_count
|
|
51
|
+
key_info['traversability_summary'] = f"{passable_tiles}/{total_tiles} passable"
|
|
52
|
+
else:
|
|
53
|
+
key_info['traversability_summary'] = "No data"
|
|
54
|
+
|
|
55
|
+
return key_info
|
|
56
|
+
|
|
57
|
+
def memory_step(memory_context, current_plan, recent_actions, observation_buffer, vlm):
|
|
58
|
+
"""
|
|
59
|
+
Maintain a rolling buffer of the previous 50 actions and observations with state information.
|
|
60
|
+
Returns updated memory_context with the most recent 50 entries and key insights.
|
|
61
|
+
"""
|
|
62
|
+
# Initialize memory buffer if it doesn't exist
|
|
63
|
+
if not hasattr(memory_step, 'memory_buffer'):
|
|
64
|
+
memory_step.memory_buffer = deque(maxlen=50)
|
|
65
|
+
|
|
66
|
+
logger.info(f"[MEMORY] Processing {len(observation_buffer)} new observations")
|
|
67
|
+
|
|
68
|
+
# Add new observations with state info to the buffer
|
|
69
|
+
for obs in observation_buffer:
|
|
70
|
+
state_info = extract_key_state_info(obs.get('state', {}))
|
|
71
|
+
memory_step.memory_buffer.append({
|
|
72
|
+
"type": "observation",
|
|
73
|
+
"frame_id": obs["frame_id"],
|
|
74
|
+
"content": obs["observation"],
|
|
75
|
+
"state": state_info
|
|
76
|
+
})
|
|
77
|
+
logger.info(f"[MEMORY] Added observation frame {obs['frame_id']}: {state_info['state_summary']}")
|
|
78
|
+
|
|
79
|
+
# Add recent actions to the buffer
|
|
80
|
+
for action in recent_actions:
|
|
81
|
+
memory_step.memory_buffer.append({
|
|
82
|
+
"type": "action",
|
|
83
|
+
"content": action
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
# Create a formatted memory context from the buffer with state insights
|
|
87
|
+
memory_entries = []
|
|
88
|
+
key_events = []
|
|
89
|
+
|
|
90
|
+
# Track significant state changes
|
|
91
|
+
previous_map = None
|
|
92
|
+
previous_battle_state = None
|
|
93
|
+
|
|
94
|
+
for i, entry in enumerate(memory_step.memory_buffer):
|
|
95
|
+
if entry["type"] == "observation":
|
|
96
|
+
frame_id = entry['frame_id']
|
|
97
|
+
description = entry['content']
|
|
98
|
+
state = entry.get('state', {})
|
|
99
|
+
|
|
100
|
+
# Use the consistent state summary
|
|
101
|
+
state_summary = state.get('state_summary', '')
|
|
102
|
+
|
|
103
|
+
# Check for significant events
|
|
104
|
+
current_map = state.get('current_map', 'Unknown Location')
|
|
105
|
+
current_battle = state.get('in_battle', False)
|
|
106
|
+
|
|
107
|
+
if current_map != previous_map and previous_map is not None:
|
|
108
|
+
key_events.append(f"Moved from {previous_map} to {current_map}")
|
|
109
|
+
logger.info(f"[MEMORY] Key event: Map change from {previous_map} to {current_map}")
|
|
110
|
+
|
|
111
|
+
if current_battle != previous_battle_state:
|
|
112
|
+
if current_battle:
|
|
113
|
+
opponent = state.get('battle_opponent', 'Unknown Pokemon')
|
|
114
|
+
key_events.append(f"Entered battle vs {opponent}")
|
|
115
|
+
logger.info(f"[MEMORY] Key event: Entered battle vs {opponent}")
|
|
116
|
+
else:
|
|
117
|
+
key_events.append("Exited battle")
|
|
118
|
+
logger.info("[MEMORY] Key event: Exited battle")
|
|
119
|
+
|
|
120
|
+
previous_map = current_map
|
|
121
|
+
previous_battle_state = current_battle
|
|
122
|
+
|
|
123
|
+
# Format observation entry
|
|
124
|
+
if isinstance(description, dict):
|
|
125
|
+
desc_text = description.get('description', str(description))
|
|
126
|
+
else:
|
|
127
|
+
desc_text = str(description)
|
|
128
|
+
|
|
129
|
+
memory_entries.append(f"Frame {frame_id}: {desc_text} [{state_summary}]")
|
|
130
|
+
else:
|
|
131
|
+
memory_entries.append(f"Action: {entry['content']}")
|
|
132
|
+
|
|
133
|
+
# Get current state summary from the latest observation
|
|
134
|
+
current_state_summary = ""
|
|
135
|
+
if observation_buffer:
|
|
136
|
+
latest_state = extract_key_state_info(observation_buffer[-1].get('state', {}))
|
|
137
|
+
current_state_summary = latest_state.get('state_summary', 'No state data')
|
|
138
|
+
|
|
139
|
+
# Combine into comprehensive memory context
|
|
140
|
+
memory_context = f"""★★★ COMPREHENSIVE MEMORY CONTEXT ★★★
|
|
141
|
+
|
|
142
|
+
CURRENT STATE: {current_state_summary}
|
|
143
|
+
|
|
144
|
+
CURRENT PLAN: {current_plan if current_plan else 'No plan yet'}
|
|
145
|
+
|
|
146
|
+
KEY EVENTS: {' -> '.join(key_events[-5:]) if key_events else 'None recently'}
|
|
147
|
+
|
|
148
|
+
RECENT MEMORY (last 50 entries):
|
|
149
|
+
{chr(10).join(memory_entries[-30:])}""" # Show last 30 entries to avoid too much text
|
|
150
|
+
|
|
151
|
+
logger.info(f"[MEMORY] Memory context updated with {len(memory_entries)} total entries")
|
|
152
|
+
logger.info(f"[MEMORY] Current state: {current_state_summary}")
|
|
153
|
+
logger.info(f"[MEMORY] Key events: {len(key_events)} tracked")
|
|
154
|
+
|
|
155
|
+
return memory_context
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import logging
|
|
3
|
+
from utils.vlm import VLM
|
|
4
|
+
from utils.state_formatter import format_state_for_llm, format_state_summary
|
|
5
|
+
from agent.system_prompt import system_prompt
|
|
6
|
+
|
|
7
|
+
# Set up module logging
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
def perception_step(frame, state_data, vlm):
|
|
11
|
+
"""
|
|
12
|
+
Observe and describe your current situation using both visual and comprehensive state data.
|
|
13
|
+
Returns (observation, slow_thinking_needed)
|
|
14
|
+
"""
|
|
15
|
+
# Format the comprehensive state context using the utility
|
|
16
|
+
state_context = format_state_for_llm(state_data)
|
|
17
|
+
|
|
18
|
+
# Log the state data being used
|
|
19
|
+
state_summary = format_state_summary(state_data)
|
|
20
|
+
logger.info("[PERCEPTION] Processing frame with comprehensive state data")
|
|
21
|
+
logger.info(f"[PERCEPTION] State: {state_summary}")
|
|
22
|
+
logger.info(f"[PERCEPTION] State context length: {len(state_context)} characters")
|
|
23
|
+
|
|
24
|
+
perception_prompt = f"""
|
|
25
|
+
★★★ COMPREHENSIVE GAME STATE DATA ★★★
|
|
26
|
+
|
|
27
|
+
{state_context}
|
|
28
|
+
|
|
29
|
+
★★★ VISUAL ANALYSIS TASK ★★★
|
|
30
|
+
|
|
31
|
+
You are the agent, actively playing Pokemon Emerald. Observe and describe your current situation in detail using both the visual frame and the comprehensive game state data above.
|
|
32
|
+
|
|
33
|
+
Based on the visual frame and the above state data, describe your current situation:
|
|
34
|
+
- CUTSCENE or TITLE SCREEN: What does the cutscene or title screen show?
|
|
35
|
+
- MAP: You are navigating a terrain (city, forest, grassland, etc.). Are there any interactable locations (NPCs, items, doors)? What are the traversable vs. non-traversable areas? Use your position coordinates to understand where you are.
|
|
36
|
+
- BATTLE: Analyze the battle situation using both visual and state data. What moves are available? What's the strategy?
|
|
37
|
+
- DIALOGUE: What is the character telling you? How important is this information? Can you respond to the NPC?
|
|
38
|
+
- MENU: What menu are you in? What options are available? What should you select based on your current needs?
|
|
39
|
+
|
|
40
|
+
Combine visual observation with the state data to give a complete picture of the current situation.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
observation = vlm.get_query(frame, system_prompt + perception_prompt, "PERCEPTION")
|
|
44
|
+
|
|
45
|
+
# Determine if slow thinking is needed based on visual scene and state changes
|
|
46
|
+
scene_check_prompt = f"""
|
|
47
|
+
★★★ COMPREHENSIVE GAME STATE DATA ★★★
|
|
48
|
+
|
|
49
|
+
{state_context}
|
|
50
|
+
|
|
51
|
+
★★★ SLOW THINKING DECISION ★★★
|
|
52
|
+
|
|
53
|
+
Based on the current state and visual frame above:
|
|
54
|
+
|
|
55
|
+
Does this scene represent a significant change that requires planning? Consider:
|
|
56
|
+
- Entering/exiting battle
|
|
57
|
+
- Reaching a new map/location
|
|
58
|
+
- Encountering important NPCs or story events
|
|
59
|
+
- Significant changes in pokemon party or game state
|
|
60
|
+
|
|
61
|
+
Answer YES or NO.
|
|
62
|
+
"""
|
|
63
|
+
scene_response = vlm.get_query(frame, scene_check_prompt, "PERCEPTION-SCENE_CHECK").strip().lower()
|
|
64
|
+
slow_thinking_needed = ("yes" in scene_response)
|
|
65
|
+
|
|
66
|
+
observation = {"description": observation, "state_data": state_context}
|
|
67
|
+
|
|
68
|
+
logger.info(f"[PERCEPTION] Slow thinking needed: {slow_thinking_needed}")
|
|
69
|
+
return observation, slow_thinking_needed
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from utils.vlm import VLM
|
|
3
|
+
from utils.state_formatter import format_state_for_llm, format_state_summary
|
|
4
|
+
from agent.system_prompt import system_prompt
|
|
5
|
+
|
|
6
|
+
# Set up module logging
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
def planning_step(memory_context, current_plan, slow_thinking_needed, state_data, vlm):
|
|
10
|
+
"""
|
|
11
|
+
Decide and update your high-level plan based on memory context, current state, and the need for slow thinking.
|
|
12
|
+
Returns updated plan.
|
|
13
|
+
"""
|
|
14
|
+
# Get formatted state context
|
|
15
|
+
state_context = format_state_for_llm(state_data)
|
|
16
|
+
state_summary = format_state_summary(state_data)
|
|
17
|
+
|
|
18
|
+
logger.info("[PLANNING] Starting planning step")
|
|
19
|
+
logger.info(f"[PLANNING] State: {state_summary}")
|
|
20
|
+
logger.info(f"[PLANNING] Slow thinking needed: {slow_thinking_needed}")
|
|
21
|
+
|
|
22
|
+
# Check if current plan is accomplished
|
|
23
|
+
if current_plan:
|
|
24
|
+
plan_check_prompt = f"""
|
|
25
|
+
★★★ COMPREHENSIVE GAME STATE DATA ★★★
|
|
26
|
+
|
|
27
|
+
{state_context}
|
|
28
|
+
|
|
29
|
+
★★★ PLAN ASSESSMENT TASK ★★★
|
|
30
|
+
|
|
31
|
+
You are the agent playing Pokemon Emerald. Assess your current situation and plan progress.
|
|
32
|
+
|
|
33
|
+
Current Plan: {current_plan}
|
|
34
|
+
Memory Context: {memory_context}
|
|
35
|
+
|
|
36
|
+
Considering your current location, pokemon party, money, traversability, and recent actions:
|
|
37
|
+
Have you accomplished your current plan? Answer YES or NO, and explain briefly.
|
|
38
|
+
|
|
39
|
+
Consider these factors:
|
|
40
|
+
- Did you reach your target location?
|
|
41
|
+
- Did you complete the intended battle/gym challenge?
|
|
42
|
+
- Did you acquire the needed pokemon/items?
|
|
43
|
+
- Are you stuck due to terrain or party status?
|
|
44
|
+
- Do you need to adapt due to wild encounters or water obstacles?
|
|
45
|
+
"""
|
|
46
|
+
plan_status = vlm.get_text_query(system_prompt + plan_check_prompt, "PLANNING-ASSESSMENT")
|
|
47
|
+
if "yes" in plan_status.lower():
|
|
48
|
+
current_plan = None
|
|
49
|
+
logger.info("[PLANNING] Current plan marked as completed")
|
|
50
|
+
|
|
51
|
+
# Generate new plan if needed
|
|
52
|
+
if current_plan is None or slow_thinking_needed:
|
|
53
|
+
planning_prompt = f"""
|
|
54
|
+
★★★ COMPREHENSIVE GAME STATE DATA ★★★
|
|
55
|
+
|
|
56
|
+
{state_context}
|
|
57
|
+
|
|
58
|
+
★★★ STRATEGIC PLANNING TASK ★★★
|
|
59
|
+
|
|
60
|
+
You are the agent playing Pokemon Emerald with a speedrunning mindset. Create an efficient strategic plan.
|
|
61
|
+
|
|
62
|
+
Memory Context: {memory_context}
|
|
63
|
+
|
|
64
|
+
Analyze your situation and create a strategic plan:
|
|
65
|
+
|
|
66
|
+
1. IMMEDIATE GOAL: What should you focus on right now? Consider:
|
|
67
|
+
- If in battle: What's your battle strategy based on pokemon HP/levels?
|
|
68
|
+
- If on map: Navigate efficiently using traversability data
|
|
69
|
+
- If in menu/dialogue: How to progress efficiently?
|
|
70
|
+
- Do you need to heal pokemon at Pokemon Center?
|
|
71
|
+
- Are there terrain obstacles (water, blocked paths) to navigate?
|
|
72
|
+
|
|
73
|
+
2. SHORT-TERM OBJECTIVES (next few actions):
|
|
74
|
+
- Specific steps to achieve your immediate goal
|
|
75
|
+
- Account for your current pokemon party health and levels
|
|
76
|
+
- Consider terrain: avoid/seek tall grass, navigate around obstacles
|
|
77
|
+
- Money management for items/healing
|
|
78
|
+
|
|
79
|
+
3. LONG-TERM STRATEGY:
|
|
80
|
+
- How does this fit into beating the game quickly?
|
|
81
|
+
- What gym leader or major milestone to target next?
|
|
82
|
+
- Pokemon catching/training priorities based on current party
|
|
83
|
+
- Route optimization considering terrain types
|
|
84
|
+
|
|
85
|
+
4. EFFICIENCY NOTES:
|
|
86
|
+
- How to minimize backtracking using map layout
|
|
87
|
+
- Shortcuts or sequence breaks considering terrain
|
|
88
|
+
- Wild encounter management (avoid/seek based on needs)
|
|
89
|
+
|
|
90
|
+
Format as a clear, actionable plan focusing on speed and efficiency.
|
|
91
|
+
"""
|
|
92
|
+
current_plan = vlm.get_text_query(system_prompt + planning_prompt, "PLANNING-CREATION")
|
|
93
|
+
logger.info("[PLANNING] New plan created")
|
|
94
|
+
|
|
95
|
+
logger.info(f"[PLANNING] Final plan: {current_plan[:300]}..." if len(current_plan) > 300 else f"[PLANNING] Final plan: {current_plan}")
|
|
96
|
+
return current_plan
|