synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -1
- examples/swe/task_app/grpo_swe_mini.py +55 -26
- examples/swe/task_app/hosted/rollout.py +40 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/task_app/__init__.py +2 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +18 -13
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +25 -3
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +10 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
- examples/task_apps/pokemon_red/task_app.py +606 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +2 -2
- synth_ai/api/train/builders.py +25 -11
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +10 -10
- synth_ai/api/train/configs/rl.py +5 -4
- synth_ai/api/train/configs/sft.py +4 -3
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +7 -51
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +14 -7
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/rl_demo.py +8 -7
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/task_apps.py +1707 -186
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +30 -4
- synth_ai/evals/client.py +58 -61
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +16 -16
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +14 -5
- synth_ai/task/contracts.py +124 -38
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +53 -0
- synth_ai/task/rubrics/loaders.py +133 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +113 -0
- synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
- synth_ai/task/server.py +8 -7
- synth_ai/task/validators.py +269 -6
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +5 -5
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/native_manager.py +3 -3
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +214 -101
- examples/agora_ex/README_MoE.md +0 -224
- examples/agora_ex/__init__.py +0 -7
- examples/agora_ex/agora_ex.py +0 -65
- examples/agora_ex/agora_ex_task_app.py +0 -590
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
- examples/agora_ex/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/system_prompt_CURRENT.md +0 -63
- examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
- examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
- synth_ai/rubrics/__init__.py +0 -22
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1502 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simple Agent Module
|
|
3
|
+
|
|
4
|
+
Provides a streamlined approach for direct frame + state -> action processing,
|
|
5
|
+
with enhanced history tracking to prevent getting stuck in loops.
|
|
6
|
+
|
|
7
|
+
Key improvements over the original simple mode:
|
|
8
|
+
- Location-based stuck detection (tracks repeated actions at same coordinates)
|
|
9
|
+
- Context-aware history (overworld/battle/menu/dialogue awareness)
|
|
10
|
+
- Memory management to fit within LLM context limits
|
|
11
|
+
- Detailed history tracking with timestamps and game state summaries
|
|
12
|
+
- Smart context switching that helps agent avoid infinite loops
|
|
13
|
+
- Configurable history window sizes for different use cases
|
|
14
|
+
- Chain of thought reasoning with structured LLM responses
|
|
15
|
+
- Objectives system with automatic and manual completion tracking
|
|
16
|
+
- Dynamic goal setting and progress monitoring
|
|
17
|
+
|
|
18
|
+
The agent maintains objectives (go to location, battle trainer, etc.) that are
|
|
19
|
+
automatically tracked and marked complete when achieved. The LLM can also
|
|
20
|
+
manually complete objectives and create new ones dynamically through structured
|
|
21
|
+
commands. It uses chain of thought reasoning to make better decisions while
|
|
22
|
+
considering current objectives. All state including objectives is forwarded
|
|
23
|
+
to support external monitoring and debugging.
|
|
24
|
+
|
|
25
|
+
Configuration defaults (can be customized):
|
|
26
|
+
- 100 previous state/location entries (with context and reasoning)
|
|
27
|
+
- 50 recent button presses tracked
|
|
28
|
+
- 15 history entries shown to LLM in prompts
|
|
29
|
+
- 20 recent actions shown to LLM in prompts
|
|
30
|
+
- Automatic memory management to stay within LLM context limits
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
import logging
|
|
34
|
+
import os
|
|
35
|
+
import sys
|
|
36
|
+
from collections import deque
|
|
37
|
+
from dataclasses import dataclass, field
|
|
38
|
+
from datetime import datetime
|
|
39
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
40
|
+
import numpy as np
|
|
41
|
+
from PIL import Image
|
|
42
|
+
|
|
43
|
+
from utils.state_formatter import format_state_for_llm
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
# Configurable parameters for history tracking
|
|
48
|
+
DEFAULT_MAX_HISTORY_ENTRIES = 100 # Previous states/locations with context
|
|
49
|
+
DEFAULT_MAX_RECENT_ACTIONS = 50 # Recent button presses
|
|
50
|
+
DEFAULT_HISTORY_DISPLAY_COUNT = 30 # Number of history entries shown to LLM
|
|
51
|
+
DEFAULT_ACTIONS_DISPLAY_COUNT = 40 # Number of recent actions shown to LLM
|
|
52
|
+
|
|
53
|
+
def configure_simple_agent_defaults(max_history_entries: int = None, max_recent_actions: int = None,
|
|
54
|
+
history_display_count: int = None, actions_display_count: int = None):
|
|
55
|
+
"""Configure default parameters for all new SimpleAgent instances"""
|
|
56
|
+
global DEFAULT_MAX_HISTORY_ENTRIES, DEFAULT_MAX_RECENT_ACTIONS
|
|
57
|
+
global DEFAULT_HISTORY_DISPLAY_COUNT, DEFAULT_ACTIONS_DISPLAY_COUNT
|
|
58
|
+
|
|
59
|
+
if max_history_entries is not None:
|
|
60
|
+
DEFAULT_MAX_HISTORY_ENTRIES = max_history_entries
|
|
61
|
+
if max_recent_actions is not None:
|
|
62
|
+
DEFAULT_MAX_RECENT_ACTIONS = max_recent_actions
|
|
63
|
+
if history_display_count is not None:
|
|
64
|
+
DEFAULT_HISTORY_DISPLAY_COUNT = history_display_count
|
|
65
|
+
if actions_display_count is not None:
|
|
66
|
+
DEFAULT_ACTIONS_DISPLAY_COUNT = actions_display_count
|
|
67
|
+
|
|
68
|
+
logger.info(f"Updated SimpleAgent defaults: {DEFAULT_MAX_HISTORY_ENTRIES} history, {DEFAULT_MAX_RECENT_ACTIONS} actions, "
|
|
69
|
+
f"display {DEFAULT_HISTORY_DISPLAY_COUNT}/{DEFAULT_ACTIONS_DISPLAY_COUNT}")
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class Objective:
|
|
73
|
+
"""Single objective/goal for the agent"""
|
|
74
|
+
id: str
|
|
75
|
+
description: str
|
|
76
|
+
objective_type: str # "location", "battle", "item", "dialogue", "custom"
|
|
77
|
+
target_value: Optional[Any] = None # Specific target (coords, trainer name, item name, etc.)
|
|
78
|
+
completed: bool = False
|
|
79
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
80
|
+
completed_at: Optional[datetime] = None
|
|
81
|
+
progress_notes: str = ""
|
|
82
|
+
storyline: bool = False # True for main storyline objectives (auto-verified), False for agent sub-objectives
|
|
83
|
+
milestone_id: Optional[str] = None # Emulator milestone ID for storyline objectives
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class HistoryEntry:
|
|
87
|
+
"""Single entry in the agent's history"""
|
|
88
|
+
timestamp: datetime
|
|
89
|
+
player_coords: Optional[Tuple[int, int]]
|
|
90
|
+
map_id: Optional[int]
|
|
91
|
+
context: str # "overworld", "battle", "menu", "dialogue"
|
|
92
|
+
action_taken: str
|
|
93
|
+
game_state_summary: str
|
|
94
|
+
|
|
95
|
+
@dataclass
|
|
96
|
+
class SimpleAgentState:
|
|
97
|
+
"""Maintains history and state for the simple agent"""
|
|
98
|
+
# Note: We don't use defaults here because they're captured at class definition time
|
|
99
|
+
history: deque = None
|
|
100
|
+
recent_actions: deque = None
|
|
101
|
+
stuck_detection: Dict[str, int] = field(default_factory=dict)
|
|
102
|
+
step_counter: int = 0
|
|
103
|
+
objectives: List[Objective] = field(default_factory=list)
|
|
104
|
+
objectives_updated: bool = False
|
|
105
|
+
failed_movements: Dict[str, List[str]] = field(default_factory=dict) # coord_key -> [failed_directions]
|
|
106
|
+
npc_interactions: Dict[str, str] = field(default_factory=dict) # coord_key -> interaction_notes
|
|
107
|
+
|
|
108
|
+
def __post_init__(self):
|
|
109
|
+
"""Initialize deques with current default values"""
|
|
110
|
+
if self.history is None:
|
|
111
|
+
self.history = deque(maxlen=DEFAULT_MAX_HISTORY_ENTRIES)
|
|
112
|
+
if self.recent_actions is None:
|
|
113
|
+
self.recent_actions = deque(maxlen=DEFAULT_MAX_RECENT_ACTIONS)
|
|
114
|
+
|
|
115
|
+
class SimpleAgent:
|
|
116
|
+
"""
|
|
117
|
+
Simple agent that processes frame + state -> action directly with history tracking
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(self, vlm, max_history_entries: int = None, max_recent_actions: int = None,
|
|
121
|
+
history_display_count: int = None, actions_display_count: int = None):
|
|
122
|
+
self.vlm = vlm
|
|
123
|
+
|
|
124
|
+
# Use current global defaults if not specified
|
|
125
|
+
max_history_entries = max_history_entries or DEFAULT_MAX_HISTORY_ENTRIES
|
|
126
|
+
max_recent_actions = max_recent_actions or DEFAULT_MAX_RECENT_ACTIONS
|
|
127
|
+
history_display_count = history_display_count or DEFAULT_HISTORY_DISPLAY_COUNT
|
|
128
|
+
actions_display_count = actions_display_count or DEFAULT_ACTIONS_DISPLAY_COUNT
|
|
129
|
+
|
|
130
|
+
self.state = SimpleAgentState()
|
|
131
|
+
self.state.history = deque(maxlen=max_history_entries)
|
|
132
|
+
self.state.recent_actions = deque(maxlen=max_recent_actions)
|
|
133
|
+
|
|
134
|
+
# Display parameters for LLM prompts
|
|
135
|
+
self.history_display_count = history_display_count
|
|
136
|
+
self.actions_display_count = actions_display_count
|
|
137
|
+
|
|
138
|
+
# Initialize storyline objectives for Emerald progression
|
|
139
|
+
self._initialize_storyline_objectives()
|
|
140
|
+
|
|
141
|
+
def _initialize_storyline_objectives(self):
|
|
142
|
+
"""Initialize the main storyline objectives for Pokémon Emerald progression"""
|
|
143
|
+
storyline_objectives = [
|
|
144
|
+
{
|
|
145
|
+
"id": "story_game_start",
|
|
146
|
+
"description": "Complete title sequence and begin the game",
|
|
147
|
+
"objective_type": "system",
|
|
148
|
+
"target_value": "Game Running",
|
|
149
|
+
"milestone_id": "GAME_RUNNING"
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
"id": "story_littleroot_town",
|
|
153
|
+
"description": "Arrive in Littleroot Town and explore the area",
|
|
154
|
+
"objective_type": "location",
|
|
155
|
+
"target_value": "Littleroot Town",
|
|
156
|
+
"milestone_id": "LITTLEROOT_TOWN"
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
"id": "story_route_101",
|
|
160
|
+
"description": "Travel north to Route 101 and encounter Prof. Birch",
|
|
161
|
+
"objective_type": "location",
|
|
162
|
+
"target_value": "Route 101",
|
|
163
|
+
"milestone_id": "ROUTE_101"
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
"id": "story_starter_chosen",
|
|
167
|
+
"description": "Choose starter Pokémon and receive first party member",
|
|
168
|
+
"objective_type": "pokemon",
|
|
169
|
+
"target_value": "Starter Pokémon",
|
|
170
|
+
"milestone_id": "STARTER_CHOSEN"
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
"id": "story_oldale_town",
|
|
174
|
+
"description": "Continue journey to Oldale Town",
|
|
175
|
+
"objective_type": "location",
|
|
176
|
+
"target_value": "Oldale Town",
|
|
177
|
+
"milestone_id": "OLDALE_TOWN"
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
"id": "story_route_103",
|
|
181
|
+
"description": "Travel to Route 103 to meet rival",
|
|
182
|
+
"objective_type": "location",
|
|
183
|
+
"target_value": "Route 103",
|
|
184
|
+
"milestone_id": "ROUTE_103"
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
"id": "story_route_102",
|
|
188
|
+
"description": "Return through Route 102 toward Petalburg City",
|
|
189
|
+
"objective_type": "location",
|
|
190
|
+
"target_value": "Route 102",
|
|
191
|
+
"milestone_id": "ROUTE_102"
|
|
192
|
+
},
|
|
193
|
+
{
|
|
194
|
+
"id": "story_petalburg_city",
|
|
195
|
+
"description": "Navigate to Petalburg City and visit Dad's gym",
|
|
196
|
+
"objective_type": "location",
|
|
197
|
+
"target_value": "Petalburg City",
|
|
198
|
+
"milestone_id": "PETALBURG_CITY"
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
"id": "story_route_104",
|
|
202
|
+
"description": "Travel north through Route 104 toward Petalburg Woods",
|
|
203
|
+
"objective_type": "location",
|
|
204
|
+
"target_value": "Route 104",
|
|
205
|
+
"milestone_id": "ROUTE_104"
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
"id": "story_petalburg_woods",
|
|
209
|
+
"description": "Navigate through Petalburg Woods to help Devon researcher",
|
|
210
|
+
"objective_type": "location",
|
|
211
|
+
"target_value": "Petalburg Woods",
|
|
212
|
+
"milestone_id": "PETALBURG_WOODS"
|
|
213
|
+
},
|
|
214
|
+
{
|
|
215
|
+
"id": "story_rustboro_city",
|
|
216
|
+
"description": "Arrive in Rustboro City and deliver Devon Goods",
|
|
217
|
+
"objective_type": "location",
|
|
218
|
+
"target_value": "Rustboro City",
|
|
219
|
+
"milestone_id": "RUSTBORO_CITY"
|
|
220
|
+
},
|
|
221
|
+
{
|
|
222
|
+
"id": "story_rustboro_gym",
|
|
223
|
+
"description": "Enter the Rustboro Gym and prepare for Roxanne battle",
|
|
224
|
+
"objective_type": "location",
|
|
225
|
+
"target_value": "Rustboro Gym",
|
|
226
|
+
"milestone_id": None # Gym entry doesn't have separate milestone
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
"id": "story_stone_badge",
|
|
230
|
+
"description": "Defeat Roxanne and earn the Stone Badge",
|
|
231
|
+
"objective_type": "battle",
|
|
232
|
+
"target_value": "Stone Badge",
|
|
233
|
+
"milestone_id": "STONE_BADGE"
|
|
234
|
+
}
|
|
235
|
+
]
|
|
236
|
+
|
|
237
|
+
# Add storyline objectives to the state
|
|
238
|
+
for obj_data in storyline_objectives:
|
|
239
|
+
objective = Objective(
|
|
240
|
+
id=obj_data["id"],
|
|
241
|
+
description=obj_data["description"],
|
|
242
|
+
objective_type=obj_data["objective_type"],
|
|
243
|
+
target_value=obj_data["target_value"],
|
|
244
|
+
completed=False,
|
|
245
|
+
progress_notes="Storyline objective - verified by emulator milestones",
|
|
246
|
+
storyline=True,
|
|
247
|
+
milestone_id=obj_data["milestone_id"]
|
|
248
|
+
)
|
|
249
|
+
self.state.objectives.append(objective)
|
|
250
|
+
|
|
251
|
+
logger.info(f"Initialized {len(storyline_objectives)} storyline objectives for Emerald progression")
|
|
252
|
+
|
|
253
|
+
def get_game_context(self, game_state: Dict[str, Any]) -> str:
|
|
254
|
+
"""Determine current game context (overworld, battle, menu, dialogue)"""
|
|
255
|
+
try:
|
|
256
|
+
# Check if in title sequence first
|
|
257
|
+
player_location = game_state.get("player", {}).get("location", "")
|
|
258
|
+
if player_location == "TITLE_SEQUENCE":
|
|
259
|
+
return "title"
|
|
260
|
+
|
|
261
|
+
# Check game state for title/intro
|
|
262
|
+
game_state_value = game_state.get("game", {}).get("game_state", "").lower()
|
|
263
|
+
if "title" in game_state_value or "intro" in game_state_value:
|
|
264
|
+
return "title"
|
|
265
|
+
|
|
266
|
+
# Check if player name is not set (indicates title sequence)
|
|
267
|
+
player_name = game_state.get("player", {}).get("name", "").strip()
|
|
268
|
+
if not player_name or player_name == "????????":
|
|
269
|
+
return "title"
|
|
270
|
+
|
|
271
|
+
# Check if in battle
|
|
272
|
+
is_in_battle = game_state.get("game", {}).get("is_in_battle", False)
|
|
273
|
+
if is_in_battle:
|
|
274
|
+
logger.debug(f"Detected battle context")
|
|
275
|
+
return "battle"
|
|
276
|
+
|
|
277
|
+
# Check if dialogue is active
|
|
278
|
+
dialogue_state = game_state.get("game", {}).get("dialogue", {})
|
|
279
|
+
if dialogue_state.get("active", False) or dialogue_state.get("text", "").strip():
|
|
280
|
+
return "dialogue"
|
|
281
|
+
|
|
282
|
+
# Check if in menu (simplified detection)
|
|
283
|
+
# Could be enhanced with more sophisticated menu detection
|
|
284
|
+
player_state = game_state.get("player", {})
|
|
285
|
+
if player_state.get("in_menu", False):
|
|
286
|
+
return "menu"
|
|
287
|
+
|
|
288
|
+
# Default to overworld
|
|
289
|
+
return "overworld"
|
|
290
|
+
|
|
291
|
+
except Exception as e:
|
|
292
|
+
logger.warning(f"Error determining game context: {e}")
|
|
293
|
+
return "unknown"
|
|
294
|
+
|
|
295
|
+
def get_player_coords(self, game_state: Dict[str, Any]) -> Optional[Tuple[int, int]]:
|
|
296
|
+
"""Extract player coordinates from game state"""
|
|
297
|
+
try:
|
|
298
|
+
player = game_state.get("player", {})
|
|
299
|
+
# Try position.x/y first (standard format)
|
|
300
|
+
position = player.get("position", {})
|
|
301
|
+
if position:
|
|
302
|
+
x = position.get("x")
|
|
303
|
+
y = position.get("y")
|
|
304
|
+
if x is not None and y is not None:
|
|
305
|
+
return (x, y)
|
|
306
|
+
|
|
307
|
+
# Fallback: try direct x/y on player
|
|
308
|
+
x = player.get("x")
|
|
309
|
+
y = player.get("y")
|
|
310
|
+
if x is not None and y is not None:
|
|
311
|
+
return (x, y)
|
|
312
|
+
except Exception as e:
|
|
313
|
+
logger.warning(f"Error getting player coords: {e}")
|
|
314
|
+
return None
|
|
315
|
+
|
|
316
|
+
def get_map_id(self, game_state: Dict[str, Any]) -> Optional[int]:
|
|
317
|
+
"""Extract map ID from game state"""
|
|
318
|
+
try:
|
|
319
|
+
return game_state.get("map", {}).get("id")
|
|
320
|
+
except Exception as e:
|
|
321
|
+
logger.warning(f"Error getting map ID: {e}")
|
|
322
|
+
return None
|
|
323
|
+
|
|
324
|
+
def add_objective(self, description: str, objective_type: str, target_value: Any = None) -> str:
|
|
325
|
+
"""Add a new objective and return its ID"""
|
|
326
|
+
obj_id = f"obj_{len(self.state.objectives)}_{int(datetime.now().timestamp())}"
|
|
327
|
+
objective = Objective(
|
|
328
|
+
id=obj_id,
|
|
329
|
+
description=description,
|
|
330
|
+
objective_type=objective_type,
|
|
331
|
+
target_value=target_value
|
|
332
|
+
)
|
|
333
|
+
self.state.objectives.append(objective)
|
|
334
|
+
self.state.objectives_updated = True
|
|
335
|
+
logger.info(f"Added objective: {description}")
|
|
336
|
+
return obj_id
|
|
337
|
+
|
|
338
|
+
def complete_objective(self, obj_id: str, progress_notes: str = ""):
|
|
339
|
+
"""Mark an objective as completed (storyline objectives cannot be manually completed)"""
|
|
340
|
+
for obj in self.state.objectives:
|
|
341
|
+
if obj.id == obj_id and not obj.completed:
|
|
342
|
+
# Prevent manual completion of storyline objectives
|
|
343
|
+
if obj.storyline:
|
|
344
|
+
logger.warning(f"Cannot manually complete storyline objective: {obj.description}. These are verified by emulator milestones.")
|
|
345
|
+
return False
|
|
346
|
+
|
|
347
|
+
obj.completed = True
|
|
348
|
+
obj.completed_at = datetime.now()
|
|
349
|
+
obj.progress_notes = progress_notes
|
|
350
|
+
self.state.objectives_updated = True
|
|
351
|
+
logger.info(f"Completed objective: {obj.description}")
|
|
352
|
+
return True
|
|
353
|
+
return False
|
|
354
|
+
|
|
355
|
+
def get_active_objectives(self) -> List[Objective]:
|
|
356
|
+
"""Get list of uncompleted objectives"""
|
|
357
|
+
return [obj for obj in self.state.objectives if not obj.completed]
|
|
358
|
+
|
|
359
|
+
def get_completed_objectives(self) -> List[Objective]:
|
|
360
|
+
"""Get list of completed objectives"""
|
|
361
|
+
return [obj for obj in self.state.objectives if obj.completed]
|
|
362
|
+
|
|
363
|
+
def check_objective_completion(self, game_state: Dict[str, Any]) -> List[str]:
|
|
364
|
+
"""Check if any objectives should be marked as completed based on game state"""
|
|
365
|
+
completed_ids = []
|
|
366
|
+
coords = self.get_player_coords(game_state)
|
|
367
|
+
context = self.get_game_context(game_state)
|
|
368
|
+
map_id = self.get_map_id(game_state)
|
|
369
|
+
|
|
370
|
+
for obj in self.get_active_objectives():
|
|
371
|
+
should_complete = False
|
|
372
|
+
notes = ""
|
|
373
|
+
|
|
374
|
+
if obj.objective_type == "location" and coords and obj.target_value:
|
|
375
|
+
# Check if player reached target location
|
|
376
|
+
# Note: target_value is a string (location name) for storyline objectives
|
|
377
|
+
# Location objectives are completed via milestone verification, not coordinate checking
|
|
378
|
+
# This section is for dynamically added coordinate-based objectives
|
|
379
|
+
if isinstance(obj.target_value, (tuple, list)) and len(obj.target_value) == 2:
|
|
380
|
+
target_x, target_y = obj.target_value
|
|
381
|
+
if abs(coords[0] - target_x) <= 2 and abs(coords[1] - target_y) <= 2:
|
|
382
|
+
should_complete = True
|
|
383
|
+
notes = f"Reached location ({coords[0]}, {coords[1]})"
|
|
384
|
+
|
|
385
|
+
elif obj.objective_type == "battle" and context == "battle":
|
|
386
|
+
# Objective completed when battle starts
|
|
387
|
+
should_complete = True
|
|
388
|
+
notes = "Entered battle"
|
|
389
|
+
|
|
390
|
+
elif obj.objective_type == "dialogue" and context == "dialogue":
|
|
391
|
+
# Objective completed when dialogue starts
|
|
392
|
+
should_complete = True
|
|
393
|
+
notes = "Started dialogue"
|
|
394
|
+
|
|
395
|
+
elif obj.objective_type == "map" and map_id and obj.target_value:
|
|
396
|
+
# Check if player reached target map
|
|
397
|
+
if map_id == obj.target_value:
|
|
398
|
+
should_complete = True
|
|
399
|
+
notes = f"Reached map {map_id}"
|
|
400
|
+
|
|
401
|
+
if should_complete:
|
|
402
|
+
self.complete_objective(obj.id, notes)
|
|
403
|
+
completed_ids.append(obj.id)
|
|
404
|
+
|
|
405
|
+
return completed_ids
|
|
406
|
+
|
|
407
|
+
def check_storyline_milestones(self, game_state: Dict[str, Any]) -> List[str]:
|
|
408
|
+
"""Check emulator milestones and auto-complete corresponding storyline objectives"""
|
|
409
|
+
completed_ids = []
|
|
410
|
+
|
|
411
|
+
# Get milestones from the game state (if available)
|
|
412
|
+
milestones = game_state.get("milestones", {})
|
|
413
|
+
if not milestones:
|
|
414
|
+
# No milestone data available, skip checking
|
|
415
|
+
return completed_ids
|
|
416
|
+
|
|
417
|
+
for obj in self.get_active_objectives():
|
|
418
|
+
# Only check storyline objectives with milestone IDs
|
|
419
|
+
if obj.storyline and obj.milestone_id and not obj.completed:
|
|
420
|
+
# Check if the corresponding emulator milestone is completed
|
|
421
|
+
milestone_completed = milestones.get(obj.milestone_id, {}).get("completed", False)
|
|
422
|
+
|
|
423
|
+
if milestone_completed:
|
|
424
|
+
# Auto-complete the storyline objective
|
|
425
|
+
obj.completed = True
|
|
426
|
+
obj.completed_at = datetime.now()
|
|
427
|
+
obj.progress_notes = f"Auto-completed by emulator milestone: {obj.milestone_id}"
|
|
428
|
+
self.state.objectives_updated = True
|
|
429
|
+
completed_ids.append(obj.id)
|
|
430
|
+
logger.info(f"Auto-completed storyline objective via milestone {obj.milestone_id}: {obj.description}")
|
|
431
|
+
|
|
432
|
+
return completed_ids
|
|
433
|
+
|
|
434
|
+
def detect_stuck_pattern(self, coords: Optional[Tuple[int, int]], context: str, game_state: Dict[str, Any] = None) -> bool:
|
|
435
|
+
"""Detect if the agent appears to be stuck in a location/context"""
|
|
436
|
+
# Don't trigger stuck detection during contexts where staying in place is expected
|
|
437
|
+
if context in ["battle", "dialogue", "menu", "title"]:
|
|
438
|
+
logger.debug(f"Skipping stuck detection - context: {context}")
|
|
439
|
+
return False
|
|
440
|
+
|
|
441
|
+
# Need valid coordinates for stuck detection
|
|
442
|
+
if not coords or coords[0] is None or coords[1] is None:
|
|
443
|
+
return False
|
|
444
|
+
|
|
445
|
+
# Check for title sequence if game state is available
|
|
446
|
+
if game_state:
|
|
447
|
+
# Check if in title sequence (no player name or invalid coordinates)
|
|
448
|
+
player_name = game_state.get("player", {}).get("name", "").strip()
|
|
449
|
+
if not player_name or player_name == "????????":
|
|
450
|
+
return False
|
|
451
|
+
|
|
452
|
+
# Check if game state indicates title/intro
|
|
453
|
+
game_state_value = game_state.get("game", {}).get("game_state", "").lower()
|
|
454
|
+
if "title" in game_state_value or "intro" in game_state_value:
|
|
455
|
+
return False
|
|
456
|
+
|
|
457
|
+
# Check location for title sequence
|
|
458
|
+
player_location = game_state.get("player", {}).get("location", "")
|
|
459
|
+
if player_location == "TITLE_SEQUENCE":
|
|
460
|
+
return False
|
|
461
|
+
|
|
462
|
+
key = f"{coords[0]}_{coords[1]}_{context}"
|
|
463
|
+
self.state.stuck_detection[key] = self.state.stuck_detection.get(key, 0) + 1
|
|
464
|
+
|
|
465
|
+
# Consider stuck if we've been in the same location/context for 8+ consecutive steps
|
|
466
|
+
return self.state.stuck_detection[key] >= 8
|
|
467
|
+
|
|
468
|
+
def is_black_frame(self, frame) -> bool:
|
|
469
|
+
"""
|
|
470
|
+
Check if the frame is mostly black (transition/loading screen).
|
|
471
|
+
|
|
472
|
+
Args:
|
|
473
|
+
frame: PIL Image or numpy array
|
|
474
|
+
|
|
475
|
+
Returns:
|
|
476
|
+
bool: True if frame is mostly black, False otherwise
|
|
477
|
+
"""
|
|
478
|
+
try:
|
|
479
|
+
|
|
480
|
+
# Convert to PIL Image if needed
|
|
481
|
+
if hasattr(frame, 'convert'): # It's already a PIL Image
|
|
482
|
+
img = frame
|
|
483
|
+
elif hasattr(frame, 'shape'): # It's a numpy array
|
|
484
|
+
img = Image.fromarray(frame)
|
|
485
|
+
else:
|
|
486
|
+
return False # Unknown type, assume not black
|
|
487
|
+
|
|
488
|
+
# Convert to numpy array for analysis
|
|
489
|
+
img_array = np.array(img)
|
|
490
|
+
|
|
491
|
+
# Calculate the mean brightness
|
|
492
|
+
# For RGB images, average across all channels
|
|
493
|
+
if len(img_array.shape) == 3:
|
|
494
|
+
mean_brightness = np.mean(img_array)
|
|
495
|
+
else:
|
|
496
|
+
mean_brightness = np.mean(img_array)
|
|
497
|
+
|
|
498
|
+
# Also check the standard deviation to catch completely uniform frames
|
|
499
|
+
std_dev = np.std(img_array)
|
|
500
|
+
|
|
501
|
+
# A frame is considered "black" if:
|
|
502
|
+
# 1. Mean brightness is very low (< 10 out of 255)
|
|
503
|
+
# 2. OR standard deviation is very low (< 5) indicating uniform color
|
|
504
|
+
is_black = mean_brightness < 10 or (mean_brightness < 30 and std_dev < 5)
|
|
505
|
+
|
|
506
|
+
if is_black:
|
|
507
|
+
logger.debug(f"Black frame detected: mean_brightness={mean_brightness:.2f}, std_dev={std_dev:.2f}")
|
|
508
|
+
|
|
509
|
+
return is_black
|
|
510
|
+
|
|
511
|
+
except Exception as e:
|
|
512
|
+
logger.warning(f"Error checking for black frame: {e}")
|
|
513
|
+
return False # On error, assume not black to continue processing
|
|
514
|
+
|
|
515
|
+
def get_relevant_history_summary(self, current_context: str, coords: Optional[Tuple[int, int]]) -> str:
|
|
516
|
+
"""Get a concise summary of relevant recent history"""
|
|
517
|
+
# current_context and coords could be used for more sophisticated filtering in the future
|
|
518
|
+
_ = current_context, coords # Acknowledge unused parameters for now
|
|
519
|
+
if not self.state.history:
|
|
520
|
+
return "No previous history."
|
|
521
|
+
|
|
522
|
+
# Get last N entries based on display count
|
|
523
|
+
recent_entries = list(self.state.history)[-self.history_display_count:]
|
|
524
|
+
|
|
525
|
+
# Format for LLM consumption
|
|
526
|
+
summary_lines = []
|
|
527
|
+
for i, entry in enumerate(recent_entries, 1):
|
|
528
|
+
coord_str = f"({entry.player_coords[0]},{entry.player_coords[1]})" if entry.player_coords else "(?)"
|
|
529
|
+
summary_lines.append(f"{i}. {entry.context} at {coord_str}: {entry.action_taken}")
|
|
530
|
+
|
|
531
|
+
return "\n".join(summary_lines)
|
|
532
|
+
|
|
533
|
+
def get_stuck_warning(self, coords: Optional[Tuple[int, int]], context: str, game_state: Dict[str, Any] = None) -> str:
|
|
534
|
+
"""Generate warning text if stuck pattern detected"""
|
|
535
|
+
# Never show stuck warning in title sequence
|
|
536
|
+
if context == "title":
|
|
537
|
+
return ""
|
|
538
|
+
|
|
539
|
+
if self.detect_stuck_pattern(coords, context, game_state):
|
|
540
|
+
return "\n⚠️ WARNING: You appear to be stuck at this location/context. Try a different approach!\n" \
|
|
541
|
+
"💡 TIP: If you try an action like RIGHT but coordinates don't change from (X,Y) to (X+1,Y), there's likely an obstacle. Check the map around player P for walls (#) or other barriers blocking your path."
|
|
542
|
+
return ""
|
|
543
|
+
|
|
544
|
+
def create_game_state_summary(self, game_state: Dict[str, Any]) -> str:
|
|
545
|
+
"""Create a concise summary of the current game state"""
|
|
546
|
+
try:
|
|
547
|
+
game_info = game_state.get("game", {})
|
|
548
|
+
|
|
549
|
+
summary_parts = []
|
|
550
|
+
|
|
551
|
+
# Player location
|
|
552
|
+
coords = self.get_player_coords(game_state)
|
|
553
|
+
if coords:
|
|
554
|
+
summary_parts.append(f"Player at ({coords[0]}, {coords[1]})")
|
|
555
|
+
|
|
556
|
+
# Map info
|
|
557
|
+
map_id = self.get_map_id(game_state)
|
|
558
|
+
if map_id:
|
|
559
|
+
summary_parts.append(f"Map {map_id}")
|
|
560
|
+
|
|
561
|
+
# Context-specific info
|
|
562
|
+
context = self.get_game_context(game_state)
|
|
563
|
+
if context == "battle":
|
|
564
|
+
summary_parts.append("In battle")
|
|
565
|
+
elif context == "dialogue":
|
|
566
|
+
dialogue_text = game_info.get("dialogue", {}).get("text", "")
|
|
567
|
+
if dialogue_text:
|
|
568
|
+
summary_parts.append(f"Dialogue: {dialogue_text}")
|
|
569
|
+
|
|
570
|
+
return " | ".join(summary_parts) if summary_parts else "Unknown state"
|
|
571
|
+
|
|
572
|
+
except Exception as e:
|
|
573
|
+
logger.warning(f"Error creating game state summary: {e}")
|
|
574
|
+
return "Error reading state"
|
|
575
|
+
|
|
576
|
+
def step(self, game_state: Dict[str, Any]) -> Dict[str, Any]:
|
|
577
|
+
"""
|
|
578
|
+
Compatibility method for client that expects agent.step(game_state)
|
|
579
|
+
|
|
580
|
+
Args:
|
|
581
|
+
game_state: Complete game state dictionary (should include 'frame')
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
Dictionary with 'action' and optional 'reasoning'
|
|
585
|
+
"""
|
|
586
|
+
frame = game_state.get('frame')
|
|
587
|
+
if frame is None:
|
|
588
|
+
logger.error("🚫 No frame in game_state for SimpleAgent.step")
|
|
589
|
+
return {"action": "WAIT", "reasoning": "No frame available"}
|
|
590
|
+
|
|
591
|
+
action = self.process_step(frame, game_state)
|
|
592
|
+
return {"action": action, "reasoning": "Simple agent decision"}
|
|
593
|
+
|
|
594
|
+
def process_step(self, frame, game_state: Dict[str, Any]) -> str:
|
|
595
|
+
"""
|
|
596
|
+
Main processing step for simple mode with history tracking
|
|
597
|
+
|
|
598
|
+
Args:
|
|
599
|
+
frame: Current game frame (PIL Image or similar)
|
|
600
|
+
game_state: Complete game state dictionary
|
|
601
|
+
|
|
602
|
+
Returns:
|
|
603
|
+
Action string or list of actions
|
|
604
|
+
"""
|
|
605
|
+
# CRITICAL: Validate frame before any VLM processing
|
|
606
|
+
if frame is None:
|
|
607
|
+
logger.error("🚫 CRITICAL: SimpleAgent.process_step called with None frame - cannot proceed")
|
|
608
|
+
return "WAIT"
|
|
609
|
+
|
|
610
|
+
# Validate frame is a proper image
|
|
611
|
+
if not (hasattr(frame, 'save') or hasattr(frame, 'shape')):
|
|
612
|
+
logger.error(f"🚫 CRITICAL: SimpleAgent.process_step called with invalid frame type {type(frame)} - cannot proceed")
|
|
613
|
+
return "WAIT"
|
|
614
|
+
|
|
615
|
+
# Additional PIL Image validation
|
|
616
|
+
if hasattr(frame, 'size'):
|
|
617
|
+
width, height = frame.size
|
|
618
|
+
if width <= 0 or height <= 0:
|
|
619
|
+
logger.error(f"🚫 CRITICAL: SimpleAgent.process_step called with invalid frame size {width}x{height} - cannot proceed")
|
|
620
|
+
return "WAIT"
|
|
621
|
+
|
|
622
|
+
# Check for black frame (transition screen)
|
|
623
|
+
if self.is_black_frame(frame):
|
|
624
|
+
logger.info("⏳ Black frame detected (likely a transition), waiting for next frame...")
|
|
625
|
+
return "WAIT" # Return WAIT to skip this frame and wait for the next one
|
|
626
|
+
|
|
627
|
+
try:
|
|
628
|
+
# Increment step counter
|
|
629
|
+
self.state.step_counter += 1
|
|
630
|
+
|
|
631
|
+
# Get current state info
|
|
632
|
+
coords = self.get_player_coords(game_state)
|
|
633
|
+
context = self.get_game_context(game_state)
|
|
634
|
+
map_id = self.get_map_id(game_state)
|
|
635
|
+
|
|
636
|
+
# Format the current state for LLM (includes movement preview)
|
|
637
|
+
formatted_state = format_state_for_llm(game_state)
|
|
638
|
+
|
|
639
|
+
# Get movement memory for the current area
|
|
640
|
+
movement_memory = ""
|
|
641
|
+
if coords:
|
|
642
|
+
movement_memory = self.get_area_movement_memory(coords)
|
|
643
|
+
|
|
644
|
+
# Check for objective completion first
|
|
645
|
+
self.check_objective_completion(game_state)
|
|
646
|
+
|
|
647
|
+
# Check storyline milestones and auto-complete objectives
|
|
648
|
+
self.check_storyline_milestones(game_state)
|
|
649
|
+
|
|
650
|
+
# Get relevant history and stuck detection
|
|
651
|
+
history_summary = self.get_relevant_history_summary(context, coords)
|
|
652
|
+
stuck_warning = self.get_stuck_warning(coords, context, game_state)
|
|
653
|
+
recent_actions_str = ', '.join(list(self.state.recent_actions)[-self.actions_display_count:]) if self.state.recent_actions else 'None'
|
|
654
|
+
|
|
655
|
+
# Format objectives for LLM
|
|
656
|
+
active_objectives = self.get_active_objectives()
|
|
657
|
+
completed_objectives_list = self.get_completed_objectives()
|
|
658
|
+
objectives_summary = self._format_objectives_for_llm(active_objectives, completed_objectives_list)
|
|
659
|
+
|
|
660
|
+
# Build pathfinding rules section (only if not in title sequence)
|
|
661
|
+
pathfinding_rules = ""
|
|
662
|
+
if context != "title":
|
|
663
|
+
pathfinding_rules = """
|
|
664
|
+
🚨 PATHFINDING RULES:
|
|
665
|
+
1. **SINGLE STEP FIRST**: Always prefer single actions (UP, DOWN, LEFT, RIGHT, A, B) unless you're 100% certain about multi-step paths
|
|
666
|
+
2. **CHECK EVERY STEP**: Before chaining movements, verify EACH step in your sequence using the MOVEMENT PREVIEW and map
|
|
667
|
+
3. **BLOCKED = STOP**: If ANY step shows BLOCKED in the movement preview, the entire sequence will fail
|
|
668
|
+
4. **NO BLIND CHAINS**: Never chain movements through areas you can't see or verify as walkable
|
|
669
|
+
5. **PERFORM PATHFINDING**: Find a path to a target location (X',Y') from the player position (X,Y) on the map. DO NOT TRAVERSE THROUGH OBSTACLES (#) -- it will not work.
|
|
670
|
+
|
|
671
|
+
💡 SMART MOVEMENT STRATEGY:
|
|
672
|
+
- Use MOVEMENT PREVIEW to see exactly what happens with each direction
|
|
673
|
+
- If your target requires multiple steps, plan ONE step at a time
|
|
674
|
+
- Only chain 2-3 moves if ALL intermediate tiles are confirmed WALKABLE
|
|
675
|
+
- When stuck, try a different direction rather than repeating the same blocked move
|
|
676
|
+
|
|
677
|
+
EXAMPLE - DON'T DO THIS:
|
|
678
|
+
❌ "I want to go right 5 tiles" → "RIGHT, RIGHT, RIGHT, RIGHT, RIGHT" (may hit wall on step 2!)
|
|
679
|
+
|
|
680
|
+
EXAMPLE - DO THIS INSTEAD:
|
|
681
|
+
✅ Check movement preview → "RIGHT shows (X+1,Y) WALKABLE" → "RIGHT" (single safe step)
|
|
682
|
+
✅ Next turn, check again → "RIGHT shows (X+2,Y) WALKABLE" → "RIGHT" (another safe step)
|
|
683
|
+
|
|
684
|
+
💡 SMART NAVIGATION:
|
|
685
|
+
- Check the VISUAL FRAME for NPCs (people/trainers) before moving - they're not always on the map!
|
|
686
|
+
- Review MOVEMENT MEMORY for locations where you've failed to move before
|
|
687
|
+
- Only explore areas marked with ? (these are confirmed explorable edges)
|
|
688
|
+
- Avoid areas surrounded by # (walls) - they're fully blocked
|
|
689
|
+
- Use doors (D), stairs (S), or walk around obstacles when pathfinding suggests it
|
|
690
|
+
|
|
691
|
+
💡 NPC & OBSTACLE HANDLING:
|
|
692
|
+
- If you see NPCs in the image, avoid walking into them or interact with A/B if needed
|
|
693
|
+
- If a movement fails (coordinates don't change), that location likely has an NPC or obstacle
|
|
694
|
+
- Use your MOVEMENT MEMORY to remember problem areas and plan around them
|
|
695
|
+
- NPCs can trigger battles or dialogue, which may be useful for objectives
|
|
696
|
+
"""
|
|
697
|
+
|
|
698
|
+
# Create enhanced prompt with objectives, history context and chain of thought request
|
|
699
|
+
prompt = f"""You are playing Pokemon Emerald. Progress quickly to the milestones by balancing exploration and exploitation of things you know.
|
|
700
|
+
Based on the current game frame and state information, think through your next move and choose the best button action.
|
|
701
|
+
|
|
702
|
+
RECENT ACTION HISTORY (last {self.actions_display_count} actions):
|
|
703
|
+
{recent_actions_str}
|
|
704
|
+
|
|
705
|
+
LOCATION/CONTEXT HISTORY (last {self.history_display_count} steps):
|
|
706
|
+
{history_summary}
|
|
707
|
+
|
|
708
|
+
CURRENT OBJECTIVES:
|
|
709
|
+
{objectives_summary}
|
|
710
|
+
|
|
711
|
+
CURRENT GAME STATE:
|
|
712
|
+
{formatted_state}
|
|
713
|
+
|
|
714
|
+
{movement_memory}
|
|
715
|
+
|
|
716
|
+
{stuck_warning}
|
|
717
|
+
|
|
718
|
+
Available actions: A, B, START, SELECT, UP, DOWN, LEFT, RIGHT
|
|
719
|
+
|
|
720
|
+
IMPORTANT: Please think step by step before choosing your action. Structure your response like this:
|
|
721
|
+
|
|
722
|
+
ANALYSIS:
|
|
723
|
+
[Analyze what you see in the frame and current game state - what's happening? where are you? what should you be doing?
|
|
724
|
+
IMPORTANT: Look carefully at the game image for NPCs (people, trainers) that might not be shown on the map. NPCs appear as sprite characters and can block movement or trigger battles/dialogue.]
|
|
725
|
+
|
|
726
|
+
OBJECTIVES:
|
|
727
|
+
[Review your current objectives. You have main storyline objectives (story_*) that track overall Emerald progression - these are automatically verified and you CANNOT manually complete them. You can create your own sub-objectives to help achieve the main goals. Do any need to be updated, added, or marked as complete?
|
|
728
|
+
- Add sub-objectives: ADD_OBJECTIVE: type:description:target_value (e.g., "ADD_OBJECTIVE: location:Find Pokemon Center in town:(15,20)" or "ADD_OBJECTIVE: item:Buy Pokeballs:5")
|
|
729
|
+
- Complete sub-objectives only: COMPLETE_OBJECTIVE: objective_id:notes (e.g., "COMPLETE_OBJECTIVE: my_sub_obj_123:Successfully bought Pokeballs")
|
|
730
|
+
- NOTE: Do NOT try to complete storyline objectives (story_*) - they auto-complete when milestones are reached]
|
|
731
|
+
|
|
732
|
+
PLAN:
|
|
733
|
+
[Think about your immediate goal - what do you want to accomplish in the next few actions? Consider your current objectives and recent history.
|
|
734
|
+
Check MOVEMENT MEMORY for areas you've had trouble with before and plan your route accordingly.]
|
|
735
|
+
|
|
736
|
+
REASONING:
|
|
737
|
+
[Explain why you're choosing this specific action. Reference the MOVEMENT PREVIEW and MOVEMENT MEMORY sections. Check the visual frame for NPCs before moving. If you see NPCs in the image, avoid walking into them. Consider any failed movements or known obstacles from your memory.]
|
|
738
|
+
|
|
739
|
+
ACTION:
|
|
740
|
+
[Your final action choice - PREFER SINGLE ACTIONS like 'RIGHT' or 'A'. Only use multiple actions like 'UP, UP, RIGHT' if you've verified each step is WALKABLE in the movement preview and map.]
|
|
741
|
+
|
|
742
|
+
{pathfinding_rules}
|
|
743
|
+
|
|
744
|
+
Context: {context} | Coords: {coords} """
|
|
745
|
+
|
|
746
|
+
# Print complete prompt to terminal for debugging
|
|
747
|
+
print("\n" + "="*120)
|
|
748
|
+
print("🤖 SIMPLE AGENT PROMPT SENT TO VLM:")
|
|
749
|
+
print("="*120)
|
|
750
|
+
|
|
751
|
+
# Print prompt in chunks to avoid terminal truncation
|
|
752
|
+
sys.stdout.write(prompt)
|
|
753
|
+
sys.stdout.write("\n")
|
|
754
|
+
sys.stdout.flush()
|
|
755
|
+
|
|
756
|
+
print("="*120)
|
|
757
|
+
print("🤖 END OF SIMPLE AGENT PROMPT")
|
|
758
|
+
print("="*120 + "\n")
|
|
759
|
+
sys.stdout.flush()
|
|
760
|
+
|
|
761
|
+
# Make VLM call - double-check frame validation before VLM
|
|
762
|
+
if frame and (hasattr(frame, 'save') or hasattr(frame, 'shape')):
|
|
763
|
+
print("🔍 Making VLM call...")
|
|
764
|
+
try:
|
|
765
|
+
response = self.vlm.get_query(frame, prompt, "simple_mode")
|
|
766
|
+
print(f"🔍 VLM response received: {response[:100]}..." if len(response) > 100 else f"🔍 VLM response: {response}")
|
|
767
|
+
except Exception as e:
|
|
768
|
+
print(f"❌ VLM call failed: {e}")
|
|
769
|
+
return "WAIT"
|
|
770
|
+
else:
|
|
771
|
+
logger.error("🚫 CRITICAL: About to call VLM but frame validation failed - this should never happen!")
|
|
772
|
+
return "WAIT"
|
|
773
|
+
|
|
774
|
+
# Extract action(s) from structured response
|
|
775
|
+
actions, reasoning = self._parse_structured_response(response, game_state)
|
|
776
|
+
|
|
777
|
+
# Check for failed movement by comparing previous coordinates
|
|
778
|
+
if len(self.state.history) > 0:
|
|
779
|
+
prev_coords = self.state.history[-1].player_coords
|
|
780
|
+
if prev_coords and coords:
|
|
781
|
+
# If coordinates didn't change and we attempted a movement, record it as failed
|
|
782
|
+
if (prev_coords == coords and
|
|
783
|
+
isinstance(actions, list) and len(actions) > 0 and
|
|
784
|
+
actions[0] in ['UP', 'DOWN', 'LEFT', 'RIGHT']):
|
|
785
|
+
self.record_failed_movement(coords, actions[0], "movement_blocked")
|
|
786
|
+
elif (prev_coords == coords and
|
|
787
|
+
isinstance(actions, str) and
|
|
788
|
+
actions in ['UP', 'DOWN', 'LEFT', 'RIGHT']):
|
|
789
|
+
self.record_failed_movement(coords, actions, "movement_blocked")
|
|
790
|
+
|
|
791
|
+
# Record this step in history with reasoning
|
|
792
|
+
game_state_summary = self.create_game_state_summary(game_state)
|
|
793
|
+
action_with_reasoning = f"{actions} | Reasoning: {reasoning}" if reasoning else str(actions)
|
|
794
|
+
history_entry = HistoryEntry(
|
|
795
|
+
timestamp=datetime.now(),
|
|
796
|
+
player_coords=coords,
|
|
797
|
+
map_id=map_id,
|
|
798
|
+
context=context,
|
|
799
|
+
action_taken=action_with_reasoning,
|
|
800
|
+
game_state_summary=game_state_summary
|
|
801
|
+
)
|
|
802
|
+
self.state.history.append(history_entry)
|
|
803
|
+
|
|
804
|
+
# Update recent actions
|
|
805
|
+
if isinstance(actions, list):
|
|
806
|
+
self.state.recent_actions.extend(actions)
|
|
807
|
+
else:
|
|
808
|
+
self.state.recent_actions.append(actions)
|
|
809
|
+
|
|
810
|
+
# Reset stuck detection for other locations when we move
|
|
811
|
+
if coords:
|
|
812
|
+
keys_to_reset = [k for k in self.state.stuck_detection.keys()
|
|
813
|
+
if not k.startswith(f"{coords[0]}_{coords[1]}")]
|
|
814
|
+
for key in keys_to_reset:
|
|
815
|
+
if self.state.stuck_detection[key] > 0:
|
|
816
|
+
self.state.stuck_detection[key] = max(0, self.state.stuck_detection[key] - 1)
|
|
817
|
+
|
|
818
|
+
# Update server with agent step and metrics (for agent thinking display)
|
|
819
|
+
self._update_server_metrics()
|
|
820
|
+
|
|
821
|
+
return actions
|
|
822
|
+
|
|
823
|
+
except Exception as e:
|
|
824
|
+
logger.error(f"Error in simple agent processing: {e}")
|
|
825
|
+
return ["A"] # Default safe action as list
|
|
826
|
+
|
|
827
|
+
def _update_server_metrics(self):
|
|
828
|
+
"""Update server with current agent step count and LLM metrics"""
|
|
829
|
+
try:
|
|
830
|
+
import requests
|
|
831
|
+
from utils.llm_logger import get_llm_logger
|
|
832
|
+
|
|
833
|
+
# Get current LLM metrics
|
|
834
|
+
llm_logger = get_llm_logger()
|
|
835
|
+
metrics = llm_logger.get_cumulative_metrics()
|
|
836
|
+
|
|
837
|
+
# Send metrics to server
|
|
838
|
+
try:
|
|
839
|
+
response = requests.post(
|
|
840
|
+
"http://localhost:8000/agent_step",
|
|
841
|
+
json={"metrics": metrics},
|
|
842
|
+
timeout=1
|
|
843
|
+
)
|
|
844
|
+
if response.status_code != 200:
|
|
845
|
+
logger.warning(f"Failed to update server metrics: {response.status_code}")
|
|
846
|
+
except requests.exceptions.RequestException:
|
|
847
|
+
# Silent fail - server might not be running or in different mode
|
|
848
|
+
pass
|
|
849
|
+
|
|
850
|
+
except Exception as e:
|
|
851
|
+
logger.warning(f"Error updating server metrics: {e}")
|
|
852
|
+
|
|
853
|
+
def _parse_actions(self, response: str, game_state: Dict[str, Any] = None) -> List[str]:
|
|
854
|
+
"""Parse action response from LLM into list of valid actions"""
|
|
855
|
+
response_upper = response.upper().strip()
|
|
856
|
+
valid_actions = ['A', 'B', 'START', 'SELECT', 'UP', 'DOWN', 'LEFT', 'RIGHT', 'WAIT']
|
|
857
|
+
|
|
858
|
+
# Parse multiple actions (could be comma or space separated)
|
|
859
|
+
actions_found = []
|
|
860
|
+
# Replace commas with spaces for consistent parsing
|
|
861
|
+
response_clean = response_upper.replace(',', ' ').replace('.', ' ')
|
|
862
|
+
tokens = response_clean.split()
|
|
863
|
+
|
|
864
|
+
for token in tokens:
|
|
865
|
+
if token in valid_actions:
|
|
866
|
+
actions_found.append(token)
|
|
867
|
+
if len(actions_found) >= 10: # Max 10 actions
|
|
868
|
+
break
|
|
869
|
+
|
|
870
|
+
# Validate movement sequences if we have game state
|
|
871
|
+
if game_state and len(actions_found) > 1:
|
|
872
|
+
# Check if this is a movement sequence
|
|
873
|
+
movement_actions = [a for a in actions_found if a in ['UP', 'DOWN', 'LEFT', 'RIGHT']]
|
|
874
|
+
if movement_actions:
|
|
875
|
+
# Validate the movement sequence
|
|
876
|
+
is_valid, reason = self.validate_movement_sequence(movement_actions, game_state)
|
|
877
|
+
if not is_valid:
|
|
878
|
+
logger.warning(f"Movement sequence validation failed: {reason}")
|
|
879
|
+
# Only take the first movement if sequence is invalid
|
|
880
|
+
if movement_actions:
|
|
881
|
+
actions_found = [movement_actions[0]]
|
|
882
|
+
logger.info(f"Reduced to single movement: {actions_found[0]}")
|
|
883
|
+
|
|
884
|
+
# If no valid actions found, use default
|
|
885
|
+
if not actions_found:
|
|
886
|
+
actions_found = ['A']
|
|
887
|
+
|
|
888
|
+
return actions_found
|
|
889
|
+
|
|
890
|
+
def _format_objectives_for_llm(self, active_objectives: List[Objective], completed_objectives: List[Objective]) -> str:
|
|
891
|
+
"""Format objectives for LLM consumption"""
|
|
892
|
+
lines = []
|
|
893
|
+
|
|
894
|
+
if active_objectives:
|
|
895
|
+
lines.append("🎯 ACTIVE OBJECTIVES:")
|
|
896
|
+
for i, obj in enumerate(active_objectives[:5], 1): # Show top 5 active
|
|
897
|
+
target_str = f" (Target: {obj.target_value})" if obj.target_value else ""
|
|
898
|
+
lines.append(f" {i}. [{obj.objective_type}] {obj.description}{target_str} [ID: {obj.id}]")
|
|
899
|
+
else:
|
|
900
|
+
lines.append("🎯 ACTIVE OBJECTIVES: None - Consider setting some goals!")
|
|
901
|
+
|
|
902
|
+
if completed_objectives:
|
|
903
|
+
recent_completed = completed_objectives[-3:] # Show last 3 completed
|
|
904
|
+
lines.append("✅ RECENTLY COMPLETED:")
|
|
905
|
+
for obj in recent_completed:
|
|
906
|
+
lines.append(f" ✓ [{obj.objective_type}] {obj.description}")
|
|
907
|
+
|
|
908
|
+
return "\n".join(lines)
|
|
909
|
+
|
|
910
|
+
def _parse_structured_response(self, response: str, game_state: Dict[str, Any] = None) -> Tuple[List[str], str]:
|
|
911
|
+
"""Parse structured chain-of-thought response and extract actions and reasoning"""
|
|
912
|
+
try:
|
|
913
|
+
# Extract sections from structured response
|
|
914
|
+
analysis = ""
|
|
915
|
+
objectives_section = ""
|
|
916
|
+
plan = ""
|
|
917
|
+
reasoning = ""
|
|
918
|
+
actions = []
|
|
919
|
+
|
|
920
|
+
# Split response into lines for processing
|
|
921
|
+
lines = response.split('\n')
|
|
922
|
+
current_section = None
|
|
923
|
+
|
|
924
|
+
for line in lines:
|
|
925
|
+
line = line.strip()
|
|
926
|
+
|
|
927
|
+
# Identify section headers
|
|
928
|
+
if line.upper().startswith('ANALYSIS:'):
|
|
929
|
+
current_section = 'analysis'
|
|
930
|
+
analysis = line[9:].strip() # Remove "ANALYSIS:" prefix
|
|
931
|
+
elif line.upper().startswith('OBJECTIVES:'):
|
|
932
|
+
current_section = 'objectives'
|
|
933
|
+
objectives_section = line[11:].strip() # Remove "OBJECTIVES:" prefix
|
|
934
|
+
elif line.upper().startswith('PLAN:'):
|
|
935
|
+
current_section = 'plan'
|
|
936
|
+
plan = line[5:].strip() # Remove "PLAN:" prefix
|
|
937
|
+
elif line.upper().startswith('REASONING:'):
|
|
938
|
+
current_section = 'reasoning'
|
|
939
|
+
reasoning = line[10:].strip() # Remove "REASONING:" prefix
|
|
940
|
+
elif line.upper().startswith('ACTION:'):
|
|
941
|
+
current_section = 'action'
|
|
942
|
+
# Extract actions from this line
|
|
943
|
+
action_text = line[7:].strip() # Remove "ACTION:" prefix
|
|
944
|
+
if action_text: # Only parse if there's content
|
|
945
|
+
actions = self._parse_actions(action_text, game_state)
|
|
946
|
+
elif line and current_section:
|
|
947
|
+
# Continue content of current section
|
|
948
|
+
if current_section == 'analysis':
|
|
949
|
+
analysis += " " + line
|
|
950
|
+
elif current_section == 'objectives':
|
|
951
|
+
objectives_section += " " + line
|
|
952
|
+
elif current_section == 'plan':
|
|
953
|
+
plan += " " + line
|
|
954
|
+
elif current_section == 'reasoning':
|
|
955
|
+
reasoning += " " + line
|
|
956
|
+
elif current_section == 'action':
|
|
957
|
+
# Additional action parsing from action section content
|
|
958
|
+
if line.strip(): # Only process non-empty lines
|
|
959
|
+
additional_actions = self._parse_actions(line, game_state)
|
|
960
|
+
actions.extend(additional_actions)
|
|
961
|
+
if len(actions) >= 10: # Max 10 actions
|
|
962
|
+
actions = actions[:10]
|
|
963
|
+
break
|
|
964
|
+
|
|
965
|
+
# Process objectives if mentioned
|
|
966
|
+
if objectives_section:
|
|
967
|
+
self._process_objectives_from_response(objectives_section)
|
|
968
|
+
|
|
969
|
+
# If no actions found in structured format, fall back to parsing entire response
|
|
970
|
+
if not actions:
|
|
971
|
+
actions = self._parse_actions(response, game_state)
|
|
972
|
+
|
|
973
|
+
# Create concise reasoning summary
|
|
974
|
+
reasoning_parts = []
|
|
975
|
+
if analysis:
|
|
976
|
+
reasoning_parts.append(f"Analysis: {analysis}")
|
|
977
|
+
if objectives_section:
|
|
978
|
+
reasoning_parts.append(f"Objectives: {objectives_section}")
|
|
979
|
+
if plan:
|
|
980
|
+
reasoning_parts.append(f"Plan: {plan}")
|
|
981
|
+
if reasoning:
|
|
982
|
+
reasoning_parts.append(f"Reasoning: {reasoning}")
|
|
983
|
+
|
|
984
|
+
full_reasoning = " | ".join(reasoning_parts) if reasoning_parts else "No reasoning provided"
|
|
985
|
+
|
|
986
|
+
return actions, full_reasoning
|
|
987
|
+
|
|
988
|
+
except Exception as e:
|
|
989
|
+
logger.warning(f"Error parsing structured response: {e}")
|
|
990
|
+
# Fall back to basic action parsing
|
|
991
|
+
return self._parse_actions(response, game_state), "Error parsing reasoning"
|
|
992
|
+
|
|
993
|
+
def _process_objectives_from_response(self, objectives_text: str):
|
|
994
|
+
"""Process objective management commands from LLM response"""
|
|
995
|
+
try:
|
|
996
|
+
# Look for ADD_OBJECTIVE and COMPLETE_OBJECTIVE commands
|
|
997
|
+
for line in objectives_text.split('\n'):
|
|
998
|
+
line = line.strip()
|
|
999
|
+
if line.upper().startswith('ADD_OBJECTIVE:'):
|
|
1000
|
+
# Parse format: ADD_OBJECTIVE: type:description:target_value
|
|
1001
|
+
content = line[14:].strip() # Remove "ADD_OBJECTIVE:" prefix
|
|
1002
|
+
parts = content.split(':', 2) # Split into max 3 parts
|
|
1003
|
+
|
|
1004
|
+
if len(parts) >= 2:
|
|
1005
|
+
obj_type = parts[0].strip()
|
|
1006
|
+
description = parts[1].strip()
|
|
1007
|
+
target_value = parts[2].strip() if len(parts) > 2 else None
|
|
1008
|
+
|
|
1009
|
+
# Parse target_value based on type
|
|
1010
|
+
parsed_target = self._parse_target_value(obj_type, target_value)
|
|
1011
|
+
|
|
1012
|
+
# Add the objective
|
|
1013
|
+
self.add_objective(description, obj_type, parsed_target)
|
|
1014
|
+
|
|
1015
|
+
elif line.upper().startswith('COMPLETE_OBJECTIVE:'):
|
|
1016
|
+
# Parse format: COMPLETE_OBJECTIVE: objective_id:notes
|
|
1017
|
+
content = line[19:].strip() # Remove "COMPLETE_OBJECTIVE:" prefix
|
|
1018
|
+
parts = content.split(':', 1) # Split into max 2 parts
|
|
1019
|
+
|
|
1020
|
+
if len(parts) >= 1:
|
|
1021
|
+
obj_id = parts[0].strip()
|
|
1022
|
+
notes = parts[1].strip() if len(parts) > 1 else "Manually completed by LLM"
|
|
1023
|
+
|
|
1024
|
+
# Complete the objective
|
|
1025
|
+
success = self.complete_objective(obj_id, notes)
|
|
1026
|
+
if success:
|
|
1027
|
+
logger.info(f"LLM manually completed objective: {obj_id}")
|
|
1028
|
+
else:
|
|
1029
|
+
logger.warning(f"LLM tried to complete non-existent or already completed objective: {obj_id}")
|
|
1030
|
+
|
|
1031
|
+
except Exception as e:
|
|
1032
|
+
logger.warning(f"Error processing objectives from response: {e}")
|
|
1033
|
+
|
|
1034
|
+
def _parse_target_value(self, obj_type: str, target_str: Optional[str]) -> Any:
|
|
1035
|
+
"""Parse target value based on objective type"""
|
|
1036
|
+
if not target_str:
|
|
1037
|
+
return None
|
|
1038
|
+
|
|
1039
|
+
try:
|
|
1040
|
+
if obj_type == "location":
|
|
1041
|
+
# Try to parse coordinates like "(15,20)" or "15,20"
|
|
1042
|
+
target_str = target_str.strip('()')
|
|
1043
|
+
if ',' in target_str:
|
|
1044
|
+
x, y = map(int, target_str.split(','))
|
|
1045
|
+
return (x, y)
|
|
1046
|
+
elif obj_type == "map":
|
|
1047
|
+
# Try to parse map ID as integer
|
|
1048
|
+
return int(target_str)
|
|
1049
|
+
else:
|
|
1050
|
+
# For other types, return as string
|
|
1051
|
+
return target_str
|
|
1052
|
+
except (ValueError, TypeError):
|
|
1053
|
+
# If parsing fails, return as string
|
|
1054
|
+
return target_str
|
|
1055
|
+
|
|
1056
|
+
def get_memory_usage_estimate(self) -> Dict[str, int]:
|
|
1057
|
+
"""Estimate current memory usage for context management"""
|
|
1058
|
+
history_chars = sum(len(str(entry)) for entry in self.state.history)
|
|
1059
|
+
recent_actions_chars = sum(len(action) for action in self.state.recent_actions)
|
|
1060
|
+
objectives_chars = sum(len(f"{obj.description} {obj.target_value}") for obj in self.state.objectives)
|
|
1061
|
+
|
|
1062
|
+
return {
|
|
1063
|
+
"history_entries": len(self.state.history),
|
|
1064
|
+
"history_chars": history_chars,
|
|
1065
|
+
"recent_actions": len(self.state.recent_actions),
|
|
1066
|
+
"recent_actions_chars": recent_actions_chars,
|
|
1067
|
+
"objectives_count": len(self.state.objectives),
|
|
1068
|
+
"objectives_chars": objectives_chars,
|
|
1069
|
+
"estimated_total_chars": history_chars + recent_actions_chars + objectives_chars
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
def get_objectives_state(self) -> Dict[str, Any]:
|
|
1073
|
+
"""Get objectives formatted for forwarding in game state"""
|
|
1074
|
+
return {
|
|
1075
|
+
"active": [
|
|
1076
|
+
{
|
|
1077
|
+
"id": obj.id,
|
|
1078
|
+
"description": obj.description,
|
|
1079
|
+
"type": obj.objective_type,
|
|
1080
|
+
"target": obj.target_value,
|
|
1081
|
+
"created_at": obj.created_at.isoformat()
|
|
1082
|
+
}
|
|
1083
|
+
for obj in self.get_active_objectives()
|
|
1084
|
+
],
|
|
1085
|
+
"completed": [
|
|
1086
|
+
{
|
|
1087
|
+
"id": obj.id,
|
|
1088
|
+
"description": obj.description,
|
|
1089
|
+
"type": obj.objective_type,
|
|
1090
|
+
"target": obj.target_value,
|
|
1091
|
+
"completed_at": obj.completed_at.isoformat() if obj.completed_at else None,
|
|
1092
|
+
"notes": obj.progress_notes
|
|
1093
|
+
}
|
|
1094
|
+
for obj in self.get_completed_objectives()[-5:] # Last 5 completed
|
|
1095
|
+
],
|
|
1096
|
+
"updated": self.state.objectives_updated
|
|
1097
|
+
}
|
|
1098
|
+
|
|
1099
|
+
def trim_history_for_context(self, max_chars: int = 4000):
|
|
1100
|
+
"""Trim history to fit within context limits"""
|
|
1101
|
+
# Preserve minimum history for context
|
|
1102
|
+
min_history = max(5, self.history_display_count // 2)
|
|
1103
|
+
min_actions = max(10, self.actions_display_count // 2)
|
|
1104
|
+
|
|
1105
|
+
while self.get_memory_usage_estimate()["estimated_total_chars"] > max_chars and len(self.state.history) > min_history:
|
|
1106
|
+
self.state.history.popleft()
|
|
1107
|
+
|
|
1108
|
+
while len(self.state.recent_actions) > min_actions and self.get_memory_usage_estimate()["estimated_total_chars"] > max_chars:
|
|
1109
|
+
self.state.recent_actions.popleft()
|
|
1110
|
+
|
|
1111
|
+
def reset_objectives_updated_flag(self):
|
|
1112
|
+
"""Reset the objectives updated flag (call after forwarding state)"""
|
|
1113
|
+
self.state.objectives_updated = False
|
|
1114
|
+
|
|
1115
|
+
def configure_history_limits(self, max_history_entries: int = None, max_recent_actions: int = None,
|
|
1116
|
+
history_display_count: int = None, actions_display_count: int = None):
|
|
1117
|
+
"""Configure history tracking parameters at runtime"""
|
|
1118
|
+
if max_history_entries is not None:
|
|
1119
|
+
# Create new deque with updated max length, preserving existing data
|
|
1120
|
+
existing_history = list(self.state.history)
|
|
1121
|
+
self.state.history = deque(existing_history, maxlen=max_history_entries)
|
|
1122
|
+
|
|
1123
|
+
if max_recent_actions is not None:
|
|
1124
|
+
# Create new deque with updated max length, preserving existing data
|
|
1125
|
+
existing_actions = list(self.state.recent_actions)
|
|
1126
|
+
self.state.recent_actions = deque(existing_actions, maxlen=max_recent_actions)
|
|
1127
|
+
|
|
1128
|
+
if history_display_count is not None:
|
|
1129
|
+
self.history_display_count = history_display_count
|
|
1130
|
+
|
|
1131
|
+
if actions_display_count is not None:
|
|
1132
|
+
self.actions_display_count = actions_display_count
|
|
1133
|
+
|
|
1134
|
+
logger.info(f"Updated history configuration: {len(self.state.history)}/{self.state.history.maxlen} history, "
|
|
1135
|
+
f"{len(self.state.recent_actions)}/{self.state.recent_actions.maxlen} actions, "
|
|
1136
|
+
f"display {self.history_display_count}/{self.actions_display_count}")
|
|
1137
|
+
|
|
1138
|
+
def load_history_from_llm_checkpoint(self, checkpoint_file: str):
|
|
1139
|
+
"""Load SimpleAgent history from LLM checkpoint file"""
|
|
1140
|
+
try:
|
|
1141
|
+
from utils.llm_logger import get_llm_logger
|
|
1142
|
+
import json
|
|
1143
|
+
import re
|
|
1144
|
+
from datetime import datetime
|
|
1145
|
+
|
|
1146
|
+
if not os.path.exists(checkpoint_file):
|
|
1147
|
+
logger.info(f"No checkpoint file found: {checkpoint_file}")
|
|
1148
|
+
return False
|
|
1149
|
+
|
|
1150
|
+
# Use LLM logger to restore cumulative metrics first
|
|
1151
|
+
llm_logger = get_llm_logger()
|
|
1152
|
+
if llm_logger:
|
|
1153
|
+
restored_step_count = llm_logger.load_checkpoint(checkpoint_file)
|
|
1154
|
+
if restored_step_count is not None:
|
|
1155
|
+
logger.info(f"✅ LLM logger restored checkpoint with {restored_step_count} steps")
|
|
1156
|
+
# Update SimpleAgent step counter to match LLM logger
|
|
1157
|
+
self.state.step_counter = restored_step_count
|
|
1158
|
+
|
|
1159
|
+
with open(checkpoint_file, 'r') as f:
|
|
1160
|
+
checkpoint_data = json.load(f)
|
|
1161
|
+
|
|
1162
|
+
log_entries = checkpoint_data.get("log_entries", [])
|
|
1163
|
+
restored_count = 0
|
|
1164
|
+
|
|
1165
|
+
for entry in log_entries:
|
|
1166
|
+
if entry.get("type") == "interaction" and "simple_mode" in entry.get("interaction_type", ""):
|
|
1167
|
+
try:
|
|
1168
|
+
# Extract state info from prompt
|
|
1169
|
+
prompt = entry.get("prompt", "")
|
|
1170
|
+
response = entry.get("response", "")
|
|
1171
|
+
timestamp_str = entry.get("timestamp", "")
|
|
1172
|
+
|
|
1173
|
+
# Parse coordinates from prompt
|
|
1174
|
+
coords_match = re.search(r"Position: X=(\d+), Y=(\d+)", prompt)
|
|
1175
|
+
coords = None
|
|
1176
|
+
if coords_match:
|
|
1177
|
+
coords = (int(coords_match.group(1)), int(coords_match.group(2)))
|
|
1178
|
+
|
|
1179
|
+
# Parse context from prompt
|
|
1180
|
+
context = "overworld" # default
|
|
1181
|
+
if "Game State: battle" in prompt:
|
|
1182
|
+
context = "battle"
|
|
1183
|
+
elif "DIALOGUE:" in prompt or "dialogue" in prompt.lower():
|
|
1184
|
+
context = "dialogue"
|
|
1185
|
+
elif "menu" in prompt.lower():
|
|
1186
|
+
context = "menu"
|
|
1187
|
+
|
|
1188
|
+
# Extract action from response
|
|
1189
|
+
action_taken = "UNKNOWN"
|
|
1190
|
+
if "ACTION:" in response:
|
|
1191
|
+
action_section = response.split("ACTION:")[-1].strip()
|
|
1192
|
+
action_line = action_section.split('\n')[0].strip()
|
|
1193
|
+
action_taken = action_line
|
|
1194
|
+
|
|
1195
|
+
# Parse timestamp
|
|
1196
|
+
timestamp = datetime.now()
|
|
1197
|
+
if timestamp_str:
|
|
1198
|
+
try:
|
|
1199
|
+
timestamp = datetime.fromisoformat(timestamp_str)
|
|
1200
|
+
except:
|
|
1201
|
+
pass
|
|
1202
|
+
|
|
1203
|
+
# Create simplified game state summary
|
|
1204
|
+
game_state_summary = f"Position: {coords}" if coords else "Position unknown"
|
|
1205
|
+
if coords:
|
|
1206
|
+
game_state_summary += f" | Context: {context}"
|
|
1207
|
+
|
|
1208
|
+
# Add reasoning summary
|
|
1209
|
+
reasoning = ""
|
|
1210
|
+
if "REASONING:" in response:
|
|
1211
|
+
reasoning_section = response.split("REASONING:")[-1].split("ACTION:")[0].strip()
|
|
1212
|
+
reasoning = reasoning_section
|
|
1213
|
+
|
|
1214
|
+
action_with_reasoning = f"{action_taken} | Reasoning: {reasoning}" if reasoning else action_taken
|
|
1215
|
+
|
|
1216
|
+
# Create history entry
|
|
1217
|
+
history_entry = HistoryEntry(
|
|
1218
|
+
timestamp=timestamp,
|
|
1219
|
+
player_coords=coords,
|
|
1220
|
+
map_id=None, # Not available in checkpoint
|
|
1221
|
+
context=context,
|
|
1222
|
+
action_taken=action_with_reasoning,
|
|
1223
|
+
game_state_summary=game_state_summary
|
|
1224
|
+
)
|
|
1225
|
+
|
|
1226
|
+
self.state.history.append(history_entry)
|
|
1227
|
+
|
|
1228
|
+
# Also add to recent actions if it's a valid action
|
|
1229
|
+
if action_taken and action_taken not in ["UNKNOWN", "WAIT"]:
|
|
1230
|
+
# Parse multiple actions if comma-separated
|
|
1231
|
+
actions = [a.strip() for a in action_taken.replace(',', ' ').split()]
|
|
1232
|
+
for action in actions:
|
|
1233
|
+
if action in ['UP', 'DOWN', 'LEFT', 'RIGHT', 'A', 'B', 'START', 'SELECT']:
|
|
1234
|
+
self.state.recent_actions.append(action)
|
|
1235
|
+
|
|
1236
|
+
restored_count += 1
|
|
1237
|
+
|
|
1238
|
+
except Exception as e:
|
|
1239
|
+
logger.warning(f"Error parsing checkpoint entry: {e}")
|
|
1240
|
+
continue
|
|
1241
|
+
|
|
1242
|
+
# Update step counter to match checkpoint
|
|
1243
|
+
self.state.step_counter = restored_count
|
|
1244
|
+
|
|
1245
|
+
logger.info(f"✅ Restored {restored_count} history entries from {checkpoint_file}")
|
|
1246
|
+
logger.info(f" History: {len(self.state.history)} entries")
|
|
1247
|
+
logger.info(f" Recent actions: {len(self.state.recent_actions)} actions")
|
|
1248
|
+
logger.info(f" Step counter: {self.state.step_counter}")
|
|
1249
|
+
|
|
1250
|
+
return True
|
|
1251
|
+
|
|
1252
|
+
except Exception as e:
|
|
1253
|
+
logger.error(f"❌ Failed to load history from checkpoint: {e}")
|
|
1254
|
+
import traceback
|
|
1255
|
+
traceback.print_exc()
|
|
1256
|
+
return False
|
|
1257
|
+
|
|
1258
|
+
def save_history_to_llm_checkpoint(self, checkpoint_file: str = None):
|
|
1259
|
+
"""Save SimpleAgent history using LLM logger checkpoint system"""
|
|
1260
|
+
try:
|
|
1261
|
+
from utils.llm_logger import get_llm_logger
|
|
1262
|
+
|
|
1263
|
+
# Get the global LLM logger instance
|
|
1264
|
+
llm_logger = get_llm_logger()
|
|
1265
|
+
if llm_logger is None:
|
|
1266
|
+
logger.warning("No LLM logger available for checkpoint saving")
|
|
1267
|
+
return False
|
|
1268
|
+
|
|
1269
|
+
# Save checkpoint using LLM logger which includes cumulative metrics
|
|
1270
|
+
# The LLM logger will handle saving log_entries AND cumulative_metrics
|
|
1271
|
+
# If checkpoint_file is None, it will use the cache folder
|
|
1272
|
+
llm_logger.save_checkpoint(checkpoint_file, agent_step_count=self.state.step_counter)
|
|
1273
|
+
|
|
1274
|
+
logger.info(f"💾 Saved LLM checkpoint to {checkpoint_file}")
|
|
1275
|
+
logger.info(f" Step counter: {self.state.step_counter}")
|
|
1276
|
+
logger.info(f" History: {len(self.state.history)} entries")
|
|
1277
|
+
logger.info(f" Recent actions: {len(self.state.recent_actions)} actions")
|
|
1278
|
+
return True
|
|
1279
|
+
|
|
1280
|
+
except Exception as e:
|
|
1281
|
+
logger.error(f"❌ Failed to save LLM checkpoint: {e}")
|
|
1282
|
+
import traceback
|
|
1283
|
+
traceback.print_exc()
|
|
1284
|
+
return False
|
|
1285
|
+
|
|
1286
|
+
def record_failed_movement(self, coords: Tuple[int, int], direction: str, reason: str = "blocked"):
|
|
1287
|
+
"""Record a failed movement attempt for future reference"""
|
|
1288
|
+
coord_key = f"{coords[0]},{coords[1]}"
|
|
1289
|
+
if coord_key not in self.state.failed_movements:
|
|
1290
|
+
self.state.failed_movements[coord_key] = []
|
|
1291
|
+
|
|
1292
|
+
failed_entry = f"{direction}:{reason}"
|
|
1293
|
+
if failed_entry not in self.state.failed_movements[coord_key]:
|
|
1294
|
+
self.state.failed_movements[coord_key].append(failed_entry)
|
|
1295
|
+
logger.info(f"Recorded failed movement: {coord_key} -> {direction} ({reason})")
|
|
1296
|
+
|
|
1297
|
+
def record_npc_interaction(self, coords: Tuple[int, int], interaction_type: str, notes: str = ""):
|
|
1298
|
+
"""Record an NPC interaction for future reference"""
|
|
1299
|
+
coord_key = f"{coords[0]},{coords[1]}"
|
|
1300
|
+
interaction_info = f"{interaction_type}: {notes}" if notes else interaction_type
|
|
1301
|
+
self.state.npc_interactions[coord_key] = interaction_info
|
|
1302
|
+
logger.info(f"Recorded NPC interaction: {coord_key} -> {interaction_info}")
|
|
1303
|
+
|
|
1304
|
+
def get_movement_memory(self, coords: Tuple[int, int]) -> str:
|
|
1305
|
+
"""Get memory about failed movements and interactions at specific coordinates"""
|
|
1306
|
+
coord_key = f"{coords[0]},{coords[1]}"
|
|
1307
|
+
memory_parts = []
|
|
1308
|
+
|
|
1309
|
+
# Check for failed movements
|
|
1310
|
+
if coord_key in self.state.failed_movements:
|
|
1311
|
+
failed_list = self.state.failed_movements[coord_key]
|
|
1312
|
+
memory_parts.append(f"Failed moves: {', '.join(failed_list)}")
|
|
1313
|
+
|
|
1314
|
+
# Check for NPC interactions
|
|
1315
|
+
if coord_key in self.state.npc_interactions:
|
|
1316
|
+
interaction = self.state.npc_interactions[coord_key]
|
|
1317
|
+
memory_parts.append(f"NPC: {interaction}")
|
|
1318
|
+
|
|
1319
|
+
return " | ".join(memory_parts) if memory_parts else ""
|
|
1320
|
+
|
|
1321
|
+
def get_area_movement_memory(self, center_coords: Tuple[int, int], radius: int = 7) -> str:
|
|
1322
|
+
"""Get movement memory for the area around the player"""
|
|
1323
|
+
cx, cy = center_coords
|
|
1324
|
+
memory_lines = []
|
|
1325
|
+
|
|
1326
|
+
# Check nearby coordinates for failed movements or NPC interactions
|
|
1327
|
+
nearby_memories = []
|
|
1328
|
+
for dx in range(-radius, radius + 1):
|
|
1329
|
+
for dy in range(-radius, radius + 1):
|
|
1330
|
+
if dx == 0 and dy == 0:
|
|
1331
|
+
continue # Skip current position
|
|
1332
|
+
|
|
1333
|
+
check_coords = (cx + dx, cy + dy)
|
|
1334
|
+
memory = self.get_movement_memory(check_coords)
|
|
1335
|
+
if memory:
|
|
1336
|
+
nearby_memories.append(f"({check_coords[0]},{check_coords[1]}): {memory}")
|
|
1337
|
+
|
|
1338
|
+
if nearby_memories:
|
|
1339
|
+
memory_lines.append("🧠 MOVEMENT MEMORY (nearby area):")
|
|
1340
|
+
for memory in nearby_memories[:5]: # Limit to 5 most relevant
|
|
1341
|
+
memory_lines.append(f" {memory}")
|
|
1342
|
+
|
|
1343
|
+
return "\n".join(memory_lines)
|
|
1344
|
+
|
|
1345
|
+
def analyze_movement_preview(self, game_state: Dict[str, Any]) -> Dict[str, Any]:
|
|
1346
|
+
"""
|
|
1347
|
+
Analyze the movement preview data from game state to find valid moves.
|
|
1348
|
+
|
|
1349
|
+
Returns:
|
|
1350
|
+
Dict with 'walkable_directions', 'blocked_directions', and 'special_tiles'
|
|
1351
|
+
"""
|
|
1352
|
+
walkable_directions = []
|
|
1353
|
+
blocked_directions = []
|
|
1354
|
+
special_tiles = {}
|
|
1355
|
+
|
|
1356
|
+
# Look for movement preview in the formatted state
|
|
1357
|
+
formatted_state = format_state_for_llm(game_state)
|
|
1358
|
+
lines = formatted_state.split('\n')
|
|
1359
|
+
|
|
1360
|
+
in_movement_preview = False
|
|
1361
|
+
for line in lines:
|
|
1362
|
+
if 'MOVEMENT PREVIEW:' in line:
|
|
1363
|
+
in_movement_preview = True
|
|
1364
|
+
continue
|
|
1365
|
+
|
|
1366
|
+
if in_movement_preview:
|
|
1367
|
+
# Parse movement preview lines
|
|
1368
|
+
# Format: " UP : ( 15, 10) [.] WALKABLE - Optional description"
|
|
1369
|
+
if line.strip() and ':' in line:
|
|
1370
|
+
parts = line.strip().split(':')
|
|
1371
|
+
if len(parts) >= 2:
|
|
1372
|
+
direction = parts[0].strip()
|
|
1373
|
+
rest = parts[1].strip()
|
|
1374
|
+
|
|
1375
|
+
if direction in ['UP', 'DOWN', 'LEFT', 'RIGHT']:
|
|
1376
|
+
if 'WALKABLE' in rest:
|
|
1377
|
+
walkable_directions.append(direction)
|
|
1378
|
+
# Check for special tiles
|
|
1379
|
+
if 'Door/Entrance' in rest:
|
|
1380
|
+
special_tiles[direction] = 'door'
|
|
1381
|
+
elif 'Stairs/Warp' in rest:
|
|
1382
|
+
special_tiles[direction] = 'stairs'
|
|
1383
|
+
elif 'Tall grass' in rest:
|
|
1384
|
+
special_tiles[direction] = 'grass'
|
|
1385
|
+
elif 'Jump ledge' in rest and 'can jump' in rest:
|
|
1386
|
+
special_tiles[direction] = 'ledge'
|
|
1387
|
+
elif 'BLOCKED' in rest:
|
|
1388
|
+
blocked_directions.append(direction)
|
|
1389
|
+
elif not line.strip():
|
|
1390
|
+
# Empty line typically ends the movement preview section
|
|
1391
|
+
in_movement_preview = False
|
|
1392
|
+
|
|
1393
|
+
return {
|
|
1394
|
+
'walkable_directions': walkable_directions,
|
|
1395
|
+
'blocked_directions': blocked_directions,
|
|
1396
|
+
'special_tiles': special_tiles
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
def validate_movement_sequence(self, movements: List[str], game_state: Dict[str, Any]) -> Tuple[bool, str]:
|
|
1400
|
+
"""
|
|
1401
|
+
Validate if a sequence of movements is valid based on current state.
|
|
1402
|
+
|
|
1403
|
+
Args:
|
|
1404
|
+
movements: List of movement directions
|
|
1405
|
+
game_state: Current game state
|
|
1406
|
+
|
|
1407
|
+
Returns:
|
|
1408
|
+
Tuple of (is_valid, reason)
|
|
1409
|
+
"""
|
|
1410
|
+
if not movements:
|
|
1411
|
+
return True, "No movements to validate"
|
|
1412
|
+
|
|
1413
|
+
# Analyze current movement options
|
|
1414
|
+
movement_info = self.analyze_movement_preview(game_state)
|
|
1415
|
+
walkable = movement_info['walkable_directions']
|
|
1416
|
+
blocked = movement_info['blocked_directions']
|
|
1417
|
+
|
|
1418
|
+
# Check first movement
|
|
1419
|
+
first_move = movements[0].upper()
|
|
1420
|
+
if first_move in blocked:
|
|
1421
|
+
return False, f"First movement {first_move} is BLOCKED"
|
|
1422
|
+
|
|
1423
|
+
if first_move not in walkable and first_move in ['UP', 'DOWN', 'LEFT', 'RIGHT']:
|
|
1424
|
+
return False, f"First movement {first_move} is not confirmed WALKABLE"
|
|
1425
|
+
|
|
1426
|
+
# For multiple movements, only allow if we're very confident
|
|
1427
|
+
if len(movements) > 1:
|
|
1428
|
+
# We can't predict beyond the first move accurately
|
|
1429
|
+
# So we should discourage chaining unless explicitly safe
|
|
1430
|
+
return False, "Cannot validate multi-step movements - use single steps instead"
|
|
1431
|
+
|
|
1432
|
+
return True, "Movement validated"
|
|
1433
|
+
|
|
1434
|
+
def get_history_stats(self) -> Dict[str, int]:
|
|
1435
|
+
"""Get current history tracking statistics"""
|
|
1436
|
+
return {
|
|
1437
|
+
"history_entries": len(self.state.history),
|
|
1438
|
+
"max_history_entries": self.state.history.maxlen,
|
|
1439
|
+
"recent_actions": len(self.state.recent_actions),
|
|
1440
|
+
"max_recent_actions": self.state.recent_actions.maxlen,
|
|
1441
|
+
"history_display_count": self.history_display_count,
|
|
1442
|
+
"actions_display_count": self.actions_display_count,
|
|
1443
|
+
"objectives_count": len(self.state.objectives),
|
|
1444
|
+
"step_counter": self.state.step_counter,
|
|
1445
|
+
"failed_movements": len(self.state.failed_movements),
|
|
1446
|
+
"npc_interactions": len(self.state.npc_interactions)
|
|
1447
|
+
}
|
|
1448
|
+
|
|
1449
|
+
# Global simple agent instance for backward compatibility with existing multiprocess code
|
|
1450
|
+
_global_simple_agent = None
|
|
1451
|
+
|
|
1452
|
+
def get_simple_agent(vlm) -> SimpleAgent:
|
|
1453
|
+
"""Get or create the global simple agent instance"""
|
|
1454
|
+
global _global_simple_agent
|
|
1455
|
+
if _global_simple_agent is None:
|
|
1456
|
+
_global_simple_agent = SimpleAgent(vlm)
|
|
1457
|
+
|
|
1458
|
+
# Check if we should load from checkpoint
|
|
1459
|
+
import os
|
|
1460
|
+
if os.environ.get("LOAD_CHECKPOINT_MODE") == "true":
|
|
1461
|
+
# Check cache folder first, then fall back to old location
|
|
1462
|
+
cache_dir = ".pokeagent_cache"
|
|
1463
|
+
checkpoint_file = os.path.join(cache_dir, "checkpoint_llm.txt") if os.path.exists(cache_dir) else "checkpoint_llm.txt"
|
|
1464
|
+
if not os.path.exists(checkpoint_file) and os.path.exists("checkpoint_llm.txt"):
|
|
1465
|
+
checkpoint_file = "checkpoint_llm.txt"
|
|
1466
|
+
if os.path.exists(checkpoint_file):
|
|
1467
|
+
logger.info(f"🔄 Loading SimpleAgent history from {checkpoint_file}")
|
|
1468
|
+
_global_simple_agent.load_history_from_llm_checkpoint(checkpoint_file)
|
|
1469
|
+
else:
|
|
1470
|
+
logger.info(f"⚠️ No checkpoint file found: {checkpoint_file}")
|
|
1471
|
+
|
|
1472
|
+
elif _global_simple_agent.vlm != vlm:
|
|
1473
|
+
# VLM changed, create new instance
|
|
1474
|
+
_global_simple_agent = SimpleAgent(vlm)
|
|
1475
|
+
|
|
1476
|
+
# Load checkpoint for new instance too if mode is set
|
|
1477
|
+
import os
|
|
1478
|
+
if os.environ.get("LOAD_CHECKPOINT_MODE") == "true":
|
|
1479
|
+
# Check cache folder first, then fall back to old location
|
|
1480
|
+
cache_dir = ".pokeagent_cache"
|
|
1481
|
+
checkpoint_file = os.path.join(cache_dir, "checkpoint_llm.txt") if os.path.exists(cache_dir) else "checkpoint_llm.txt"
|
|
1482
|
+
if not os.path.exists(checkpoint_file) and os.path.exists("checkpoint_llm.txt"):
|
|
1483
|
+
checkpoint_file = "checkpoint_llm.txt"
|
|
1484
|
+
if os.path.exists(checkpoint_file):
|
|
1485
|
+
logger.info(f"🔄 Loading SimpleAgent history from {checkpoint_file}")
|
|
1486
|
+
_global_simple_agent.load_history_from_llm_checkpoint(checkpoint_file)
|
|
1487
|
+
|
|
1488
|
+
return _global_simple_agent
|
|
1489
|
+
|
|
1490
|
+
def simple_mode_processing_multiprocess(vlm, game_state, args=None):
|
|
1491
|
+
"""Simple mode processing function for multiprocess mode (backward compatibility)"""
|
|
1492
|
+
# args parameter kept for backward compatibility but not used
|
|
1493
|
+
_ = args # Acknowledge unused parameter
|
|
1494
|
+
agent = get_simple_agent(vlm)
|
|
1495
|
+
frame = game_state["visual"]["screenshot"]
|
|
1496
|
+
|
|
1497
|
+
# CRITICAL: Validate frame before processing
|
|
1498
|
+
if frame is None:
|
|
1499
|
+
logger.error("🚫 CRITICAL: simple_step called with None frame")
|
|
1500
|
+
return "WAIT"
|
|
1501
|
+
|
|
1502
|
+
return agent.process_step(frame, game_state)
|