synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (229) hide show
  1. examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
  2. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +186 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
  4. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
  5. examples/multi_step/crafter_rl_lora.md +51 -10
  6. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  7. examples/multi_step/task_app_config_notes.md +7 -1
  8. examples/swe/task_app/grpo_swe_mini.py +55 -26
  9. examples/swe/task_app/hosted/rollout.py +40 -0
  10. examples/swe/task_app/hosted/test_service.py +5 -6
  11. examples/task_apps/TESTING.md +275 -0
  12. examples/task_apps/__init__.py +0 -0
  13. examples/task_apps/crafter/__init__.py +0 -0
  14. examples/task_apps/crafter/task_app/__init__.py +2 -0
  15. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +21 -46
  16. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  17. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
  18. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
  19. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +67 -49
  20. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +242 -193
  21. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  22. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  23. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  24. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  25. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  26. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  27. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  28. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  29. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  30. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  31. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  32. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  33. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  34. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  35. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  36. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  37. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  38. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  39. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  40. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  41. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  42. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  43. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  44. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  45. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  78. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  79. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  80. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  81. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  82. examples/task_apps/enron/__init__.py +1 -0
  83. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  84. examples/task_apps/enron/task_app/README.md +14 -0
  85. examples/task_apps/enron/task_app/__init__.py +1 -0
  86. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  87. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  88. examples/task_apps/enron/tests/__init__.py +2 -0
  89. examples/task_apps/enron/tests/conftest.py +115 -0
  90. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  91. examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
  92. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  93. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  94. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  95. examples/task_apps/math/__init__.py +0 -0
  96. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  97. examples/task_apps/pokemon_battle/__init__.py +2 -0
  98. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  99. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  100. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  101. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  102. examples/task_apps/pokemon_red/README.md +357 -0
  103. examples/task_apps/pokemon_red/__init__.py +3 -0
  104. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  105. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
  106. examples/task_apps/pokemon_red/task_app.py +606 -0
  107. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
  108. examples/task_apps/sokoban/README.md +307 -0
  109. examples/task_apps/sokoban/__init__.py +3 -0
  110. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  111. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  112. examples/task_apps/sokoban/task_app.py +1058 -0
  113. examples/task_apps/sokoban/tests/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/conftest.py +113 -0
  115. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  116. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  117. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  118. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  119. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  120. examples/task_apps/verilog/__init__.py +1 -0
  121. examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
  122. examples/task_apps/verilog/task_app/README.md +12 -0
  123. examples/task_apps/verilog/task_app/__init__.py +1 -0
  124. examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
  125. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  126. examples/task_apps/verilog/tests/__init__.py +2 -0
  127. examples/task_apps/verilog/tests/conftest.py +115 -0
  128. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  129. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
  130. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  131. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  132. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  133. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  134. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  135. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
  136. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
  137. examples/warming_up_to_rl/run_eval.py +127 -18
  138. examples/workflows/__init__.py +0 -0
  139. examples/workflows/math_rl/__init__.py +0 -0
  140. examples/workflows/math_rl/download_dataset.py +80 -0
  141. synth_ai/__init__.py +41 -1
  142. synth_ai/api/train/builders.py +73 -29
  143. synth_ai/api/train/cli.py +12 -6
  144. synth_ai/api/train/configs/__init__.py +44 -0
  145. synth_ai/api/train/configs/rl.py +134 -0
  146. synth_ai/api/train/configs/sft.py +95 -0
  147. synth_ai/api/train/configs/shared.py +24 -0
  148. synth_ai/api/train/env_resolver.py +5 -2
  149. synth_ai/api/train/supported_algos.py +10 -5
  150. synth_ai/api/train/utils.py +7 -4
  151. synth_ai/cli/__init__.py +7 -51
  152. synth_ai/cli/_storage.py +4 -3
  153. synth_ai/cli/_validate_task_app.py +11 -0
  154. synth_ai/cli/balance.py +4 -3
  155. synth_ai/cli/calc.py +2 -2
  156. synth_ai/cli/demo.py +49 -43
  157. synth_ai/cli/legacy_root_backup.py +1 -1
  158. synth_ai/cli/rl_demo.py +86 -106
  159. synth_ai/cli/root.py +0 -97
  160. synth_ai/cli/task_apps.py +1710 -186
  161. synth_ai/demos/core/cli.py +121 -159
  162. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
  163. synth_ai/environments/examples/crafter_classic/environment.py +16 -0
  164. synth_ai/environments/examples/enron/engine.py +7 -2
  165. synth_ai/environments/examples/enron/environment.py +68 -0
  166. synth_ai/environments/examples/red/engine.py +27 -0
  167. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  168. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  169. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  170. synth_ai/environments/examples/red/environment.py +60 -0
  171. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  172. synth_ai/environments/examples/verilog/engine.py +30 -4
  173. synth_ai/evals/__init__.py +15 -0
  174. synth_ai/evals/client.py +82 -0
  175. synth_ai/evals/types.py +42 -0
  176. synth_ai/jobs/client.py +16 -4
  177. synth_ai/judge_schemas.py +127 -0
  178. synth_ai/py.typed +0 -0
  179. synth_ai/task/__init__.py +14 -5
  180. synth_ai/task/contracts.py +124 -38
  181. synth_ai/task/proxy.py +48 -56
  182. synth_ai/task/rubrics/__init__.py +53 -0
  183. synth_ai/task/rubrics/loaders.py +133 -0
  184. synth_ai/task/rubrics/models.py +57 -0
  185. synth_ai/task/rubrics/scoring.py +113 -0
  186. synth_ai/task/rubrics/strict.py +149 -0
  187. synth_ai/task/server.py +8 -7
  188. synth_ai/task/validators.py +269 -6
  189. synth_ai/tracing_v3/decorators.py +7 -3
  190. synth_ai/tracing_v3/replica_sync.py +4 -4
  191. synth_ai/tracing_v3/serialization.py +130 -0
  192. synth_ai/tracing_v3/trace_utils.py +317 -0
  193. synth_ai/tracing_v3/turso/native_manager.py +3 -3
  194. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
  195. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +228 -89
  196. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -1
  197. synth_ai/task/rubrics.py +0 -219
  198. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  199. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  200. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  201. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  202. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  203. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  204. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  205. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  206. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
  207. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
  208. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  209. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  210. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  211. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  212. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  213. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  214. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  215. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  216. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  217. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
  218. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  219. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  220. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  221. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  222. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  223. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  224. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  225. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  226. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  227. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
  228. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
  229. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1502 @@
1
+ """
2
+ Simple Agent Module
3
+
4
+ Provides a streamlined approach for direct frame + state -> action processing,
5
+ with enhanced history tracking to prevent getting stuck in loops.
6
+
7
+ Key improvements over the original simple mode:
8
+ - Location-based stuck detection (tracks repeated actions at same coordinates)
9
+ - Context-aware history (overworld/battle/menu/dialogue awareness)
10
+ - Memory management to fit within LLM context limits
11
+ - Detailed history tracking with timestamps and game state summaries
12
+ - Smart context switching that helps agent avoid infinite loops
13
+ - Configurable history window sizes for different use cases
14
+ - Chain of thought reasoning with structured LLM responses
15
+ - Objectives system with automatic and manual completion tracking
16
+ - Dynamic goal setting and progress monitoring
17
+
18
+ The agent maintains objectives (go to location, battle trainer, etc.) that are
19
+ automatically tracked and marked complete when achieved. The LLM can also
20
+ manually complete objectives and create new ones dynamically through structured
21
+ commands. It uses chain of thought reasoning to make better decisions while
22
+ considering current objectives. All state including objectives is forwarded
23
+ to support external monitoring and debugging.
24
+
25
+ Configuration defaults (can be customized):
26
+ - 100 previous state/location entries (with context and reasoning)
27
+ - 50 recent button presses tracked
28
+ - 15 history entries shown to LLM in prompts
29
+ - 20 recent actions shown to LLM in prompts
30
+ - Automatic memory management to stay within LLM context limits
31
+ """
32
+
33
+ import logging
34
+ import os
35
+ import sys
36
+ from collections import deque
37
+ from dataclasses import dataclass, field
38
+ from datetime import datetime
39
+ from typing import List, Dict, Any, Optional, Tuple
40
+ import numpy as np
41
+ from PIL import Image
42
+
43
+ from utils.state_formatter import format_state_for_llm
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+ # Configurable parameters for history tracking
48
+ DEFAULT_MAX_HISTORY_ENTRIES = 100 # Previous states/locations with context
49
+ DEFAULT_MAX_RECENT_ACTIONS = 50 # Recent button presses
50
+ DEFAULT_HISTORY_DISPLAY_COUNT = 30 # Number of history entries shown to LLM
51
+ DEFAULT_ACTIONS_DISPLAY_COUNT = 40 # Number of recent actions shown to LLM
52
+
53
+ def configure_simple_agent_defaults(max_history_entries: int = None, max_recent_actions: int = None,
54
+ history_display_count: int = None, actions_display_count: int = None):
55
+ """Configure default parameters for all new SimpleAgent instances"""
56
+ global DEFAULT_MAX_HISTORY_ENTRIES, DEFAULT_MAX_RECENT_ACTIONS
57
+ global DEFAULT_HISTORY_DISPLAY_COUNT, DEFAULT_ACTIONS_DISPLAY_COUNT
58
+
59
+ if max_history_entries is not None:
60
+ DEFAULT_MAX_HISTORY_ENTRIES = max_history_entries
61
+ if max_recent_actions is not None:
62
+ DEFAULT_MAX_RECENT_ACTIONS = max_recent_actions
63
+ if history_display_count is not None:
64
+ DEFAULT_HISTORY_DISPLAY_COUNT = history_display_count
65
+ if actions_display_count is not None:
66
+ DEFAULT_ACTIONS_DISPLAY_COUNT = actions_display_count
67
+
68
+ logger.info(f"Updated SimpleAgent defaults: {DEFAULT_MAX_HISTORY_ENTRIES} history, {DEFAULT_MAX_RECENT_ACTIONS} actions, "
69
+ f"display {DEFAULT_HISTORY_DISPLAY_COUNT}/{DEFAULT_ACTIONS_DISPLAY_COUNT}")
70
+
71
+ @dataclass
72
+ class Objective:
73
+ """Single objective/goal for the agent"""
74
+ id: str
75
+ description: str
76
+ objective_type: str # "location", "battle", "item", "dialogue", "custom"
77
+ target_value: Optional[Any] = None # Specific target (coords, trainer name, item name, etc.)
78
+ completed: bool = False
79
+ created_at: datetime = field(default_factory=datetime.now)
80
+ completed_at: Optional[datetime] = None
81
+ progress_notes: str = ""
82
+ storyline: bool = False # True for main storyline objectives (auto-verified), False for agent sub-objectives
83
+ milestone_id: Optional[str] = None # Emulator milestone ID for storyline objectives
84
+
85
+ @dataclass
86
+ class HistoryEntry:
87
+ """Single entry in the agent's history"""
88
+ timestamp: datetime
89
+ player_coords: Optional[Tuple[int, int]]
90
+ map_id: Optional[int]
91
+ context: str # "overworld", "battle", "menu", "dialogue"
92
+ action_taken: str
93
+ game_state_summary: str
94
+
95
+ @dataclass
96
+ class SimpleAgentState:
97
+ """Maintains history and state for the simple agent"""
98
+ # Note: We don't use defaults here because they're captured at class definition time
99
+ history: deque = None
100
+ recent_actions: deque = None
101
+ stuck_detection: Dict[str, int] = field(default_factory=dict)
102
+ step_counter: int = 0
103
+ objectives: List[Objective] = field(default_factory=list)
104
+ objectives_updated: bool = False
105
+ failed_movements: Dict[str, List[str]] = field(default_factory=dict) # coord_key -> [failed_directions]
106
+ npc_interactions: Dict[str, str] = field(default_factory=dict) # coord_key -> interaction_notes
107
+
108
+ def __post_init__(self):
109
+ """Initialize deques with current default values"""
110
+ if self.history is None:
111
+ self.history = deque(maxlen=DEFAULT_MAX_HISTORY_ENTRIES)
112
+ if self.recent_actions is None:
113
+ self.recent_actions = deque(maxlen=DEFAULT_MAX_RECENT_ACTIONS)
114
+
115
+ class SimpleAgent:
116
+ """
117
+ Simple agent that processes frame + state -> action directly with history tracking
118
+ """
119
+
120
+ def __init__(self, vlm, max_history_entries: int = None, max_recent_actions: int = None,
121
+ history_display_count: int = None, actions_display_count: int = None):
122
+ self.vlm = vlm
123
+
124
+ # Use current global defaults if not specified
125
+ max_history_entries = max_history_entries or DEFAULT_MAX_HISTORY_ENTRIES
126
+ max_recent_actions = max_recent_actions or DEFAULT_MAX_RECENT_ACTIONS
127
+ history_display_count = history_display_count or DEFAULT_HISTORY_DISPLAY_COUNT
128
+ actions_display_count = actions_display_count or DEFAULT_ACTIONS_DISPLAY_COUNT
129
+
130
+ self.state = SimpleAgentState()
131
+ self.state.history = deque(maxlen=max_history_entries)
132
+ self.state.recent_actions = deque(maxlen=max_recent_actions)
133
+
134
+ # Display parameters for LLM prompts
135
+ self.history_display_count = history_display_count
136
+ self.actions_display_count = actions_display_count
137
+
138
+ # Initialize storyline objectives for Emerald progression
139
+ self._initialize_storyline_objectives()
140
+
141
+ def _initialize_storyline_objectives(self):
142
+ """Initialize the main storyline objectives for Pokémon Emerald progression"""
143
+ storyline_objectives = [
144
+ {
145
+ "id": "story_game_start",
146
+ "description": "Complete title sequence and begin the game",
147
+ "objective_type": "system",
148
+ "target_value": "Game Running",
149
+ "milestone_id": "GAME_RUNNING"
150
+ },
151
+ {
152
+ "id": "story_littleroot_town",
153
+ "description": "Arrive in Littleroot Town and explore the area",
154
+ "objective_type": "location",
155
+ "target_value": "Littleroot Town",
156
+ "milestone_id": "LITTLEROOT_TOWN"
157
+ },
158
+ {
159
+ "id": "story_route_101",
160
+ "description": "Travel north to Route 101 and encounter Prof. Birch",
161
+ "objective_type": "location",
162
+ "target_value": "Route 101",
163
+ "milestone_id": "ROUTE_101"
164
+ },
165
+ {
166
+ "id": "story_starter_chosen",
167
+ "description": "Choose starter Pokémon and receive first party member",
168
+ "objective_type": "pokemon",
169
+ "target_value": "Starter Pokémon",
170
+ "milestone_id": "STARTER_CHOSEN"
171
+ },
172
+ {
173
+ "id": "story_oldale_town",
174
+ "description": "Continue journey to Oldale Town",
175
+ "objective_type": "location",
176
+ "target_value": "Oldale Town",
177
+ "milestone_id": "OLDALE_TOWN"
178
+ },
179
+ {
180
+ "id": "story_route_103",
181
+ "description": "Travel to Route 103 to meet rival",
182
+ "objective_type": "location",
183
+ "target_value": "Route 103",
184
+ "milestone_id": "ROUTE_103"
185
+ },
186
+ {
187
+ "id": "story_route_102",
188
+ "description": "Return through Route 102 toward Petalburg City",
189
+ "objective_type": "location",
190
+ "target_value": "Route 102",
191
+ "milestone_id": "ROUTE_102"
192
+ },
193
+ {
194
+ "id": "story_petalburg_city",
195
+ "description": "Navigate to Petalburg City and visit Dad's gym",
196
+ "objective_type": "location",
197
+ "target_value": "Petalburg City",
198
+ "milestone_id": "PETALBURG_CITY"
199
+ },
200
+ {
201
+ "id": "story_route_104",
202
+ "description": "Travel north through Route 104 toward Petalburg Woods",
203
+ "objective_type": "location",
204
+ "target_value": "Route 104",
205
+ "milestone_id": "ROUTE_104"
206
+ },
207
+ {
208
+ "id": "story_petalburg_woods",
209
+ "description": "Navigate through Petalburg Woods to help Devon researcher",
210
+ "objective_type": "location",
211
+ "target_value": "Petalburg Woods",
212
+ "milestone_id": "PETALBURG_WOODS"
213
+ },
214
+ {
215
+ "id": "story_rustboro_city",
216
+ "description": "Arrive in Rustboro City and deliver Devon Goods",
217
+ "objective_type": "location",
218
+ "target_value": "Rustboro City",
219
+ "milestone_id": "RUSTBORO_CITY"
220
+ },
221
+ {
222
+ "id": "story_rustboro_gym",
223
+ "description": "Enter the Rustboro Gym and prepare for Roxanne battle",
224
+ "objective_type": "location",
225
+ "target_value": "Rustboro Gym",
226
+ "milestone_id": None # Gym entry doesn't have separate milestone
227
+ },
228
+ {
229
+ "id": "story_stone_badge",
230
+ "description": "Defeat Roxanne and earn the Stone Badge",
231
+ "objective_type": "battle",
232
+ "target_value": "Stone Badge",
233
+ "milestone_id": "STONE_BADGE"
234
+ }
235
+ ]
236
+
237
+ # Add storyline objectives to the state
238
+ for obj_data in storyline_objectives:
239
+ objective = Objective(
240
+ id=obj_data["id"],
241
+ description=obj_data["description"],
242
+ objective_type=obj_data["objective_type"],
243
+ target_value=obj_data["target_value"],
244
+ completed=False,
245
+ progress_notes="Storyline objective - verified by emulator milestones",
246
+ storyline=True,
247
+ milestone_id=obj_data["milestone_id"]
248
+ )
249
+ self.state.objectives.append(objective)
250
+
251
+ logger.info(f"Initialized {len(storyline_objectives)} storyline objectives for Emerald progression")
252
+
253
+ def get_game_context(self, game_state: Dict[str, Any]) -> str:
254
+ """Determine current game context (overworld, battle, menu, dialogue)"""
255
+ try:
256
+ # Check if in title sequence first
257
+ player_location = game_state.get("player", {}).get("location", "")
258
+ if player_location == "TITLE_SEQUENCE":
259
+ return "title"
260
+
261
+ # Check game state for title/intro
262
+ game_state_value = game_state.get("game", {}).get("game_state", "").lower()
263
+ if "title" in game_state_value or "intro" in game_state_value:
264
+ return "title"
265
+
266
+ # Check if player name is not set (indicates title sequence)
267
+ player_name = game_state.get("player", {}).get("name", "").strip()
268
+ if not player_name or player_name == "????????":
269
+ return "title"
270
+
271
+ # Check if in battle
272
+ is_in_battle = game_state.get("game", {}).get("is_in_battle", False)
273
+ if is_in_battle:
274
+ logger.debug(f"Detected battle context")
275
+ return "battle"
276
+
277
+ # Check if dialogue is active
278
+ dialogue_state = game_state.get("game", {}).get("dialogue", {})
279
+ if dialogue_state.get("active", False) or dialogue_state.get("text", "").strip():
280
+ return "dialogue"
281
+
282
+ # Check if in menu (simplified detection)
283
+ # Could be enhanced with more sophisticated menu detection
284
+ player_state = game_state.get("player", {})
285
+ if player_state.get("in_menu", False):
286
+ return "menu"
287
+
288
+ # Default to overworld
289
+ return "overworld"
290
+
291
+ except Exception as e:
292
+ logger.warning(f"Error determining game context: {e}")
293
+ return "unknown"
294
+
295
+ def get_player_coords(self, game_state: Dict[str, Any]) -> Optional[Tuple[int, int]]:
296
+ """Extract player coordinates from game state"""
297
+ try:
298
+ player = game_state.get("player", {})
299
+ # Try position.x/y first (standard format)
300
+ position = player.get("position", {})
301
+ if position:
302
+ x = position.get("x")
303
+ y = position.get("y")
304
+ if x is not None and y is not None:
305
+ return (x, y)
306
+
307
+ # Fallback: try direct x/y on player
308
+ x = player.get("x")
309
+ y = player.get("y")
310
+ if x is not None and y is not None:
311
+ return (x, y)
312
+ except Exception as e:
313
+ logger.warning(f"Error getting player coords: {e}")
314
+ return None
315
+
316
+ def get_map_id(self, game_state: Dict[str, Any]) -> Optional[int]:
317
+ """Extract map ID from game state"""
318
+ try:
319
+ return game_state.get("map", {}).get("id")
320
+ except Exception as e:
321
+ logger.warning(f"Error getting map ID: {e}")
322
+ return None
323
+
324
+ def add_objective(self, description: str, objective_type: str, target_value: Any = None) -> str:
325
+ """Add a new objective and return its ID"""
326
+ obj_id = f"obj_{len(self.state.objectives)}_{int(datetime.now().timestamp())}"
327
+ objective = Objective(
328
+ id=obj_id,
329
+ description=description,
330
+ objective_type=objective_type,
331
+ target_value=target_value
332
+ )
333
+ self.state.objectives.append(objective)
334
+ self.state.objectives_updated = True
335
+ logger.info(f"Added objective: {description}")
336
+ return obj_id
337
+
338
+ def complete_objective(self, obj_id: str, progress_notes: str = ""):
339
+ """Mark an objective as completed (storyline objectives cannot be manually completed)"""
340
+ for obj in self.state.objectives:
341
+ if obj.id == obj_id and not obj.completed:
342
+ # Prevent manual completion of storyline objectives
343
+ if obj.storyline:
344
+ logger.warning(f"Cannot manually complete storyline objective: {obj.description}. These are verified by emulator milestones.")
345
+ return False
346
+
347
+ obj.completed = True
348
+ obj.completed_at = datetime.now()
349
+ obj.progress_notes = progress_notes
350
+ self.state.objectives_updated = True
351
+ logger.info(f"Completed objective: {obj.description}")
352
+ return True
353
+ return False
354
+
355
+ def get_active_objectives(self) -> List[Objective]:
356
+ """Get list of uncompleted objectives"""
357
+ return [obj for obj in self.state.objectives if not obj.completed]
358
+
359
+ def get_completed_objectives(self) -> List[Objective]:
360
+ """Get list of completed objectives"""
361
+ return [obj for obj in self.state.objectives if obj.completed]
362
+
363
+ def check_objective_completion(self, game_state: Dict[str, Any]) -> List[str]:
364
+ """Check if any objectives should be marked as completed based on game state"""
365
+ completed_ids = []
366
+ coords = self.get_player_coords(game_state)
367
+ context = self.get_game_context(game_state)
368
+ map_id = self.get_map_id(game_state)
369
+
370
+ for obj in self.get_active_objectives():
371
+ should_complete = False
372
+ notes = ""
373
+
374
+ if obj.objective_type == "location" and coords and obj.target_value:
375
+ # Check if player reached target location
376
+ # Note: target_value is a string (location name) for storyline objectives
377
+ # Location objectives are completed via milestone verification, not coordinate checking
378
+ # This section is for dynamically added coordinate-based objectives
379
+ if isinstance(obj.target_value, (tuple, list)) and len(obj.target_value) == 2:
380
+ target_x, target_y = obj.target_value
381
+ if abs(coords[0] - target_x) <= 2 and abs(coords[1] - target_y) <= 2:
382
+ should_complete = True
383
+ notes = f"Reached location ({coords[0]}, {coords[1]})"
384
+
385
+ elif obj.objective_type == "battle" and context == "battle":
386
+ # Objective completed when battle starts
387
+ should_complete = True
388
+ notes = "Entered battle"
389
+
390
+ elif obj.objective_type == "dialogue" and context == "dialogue":
391
+ # Objective completed when dialogue starts
392
+ should_complete = True
393
+ notes = "Started dialogue"
394
+
395
+ elif obj.objective_type == "map" and map_id and obj.target_value:
396
+ # Check if player reached target map
397
+ if map_id == obj.target_value:
398
+ should_complete = True
399
+ notes = f"Reached map {map_id}"
400
+
401
+ if should_complete:
402
+ self.complete_objective(obj.id, notes)
403
+ completed_ids.append(obj.id)
404
+
405
+ return completed_ids
406
+
407
+ def check_storyline_milestones(self, game_state: Dict[str, Any]) -> List[str]:
408
+ """Check emulator milestones and auto-complete corresponding storyline objectives"""
409
+ completed_ids = []
410
+
411
+ # Get milestones from the game state (if available)
412
+ milestones = game_state.get("milestones", {})
413
+ if not milestones:
414
+ # No milestone data available, skip checking
415
+ return completed_ids
416
+
417
+ for obj in self.get_active_objectives():
418
+ # Only check storyline objectives with milestone IDs
419
+ if obj.storyline and obj.milestone_id and not obj.completed:
420
+ # Check if the corresponding emulator milestone is completed
421
+ milestone_completed = milestones.get(obj.milestone_id, {}).get("completed", False)
422
+
423
+ if milestone_completed:
424
+ # Auto-complete the storyline objective
425
+ obj.completed = True
426
+ obj.completed_at = datetime.now()
427
+ obj.progress_notes = f"Auto-completed by emulator milestone: {obj.milestone_id}"
428
+ self.state.objectives_updated = True
429
+ completed_ids.append(obj.id)
430
+ logger.info(f"Auto-completed storyline objective via milestone {obj.milestone_id}: {obj.description}")
431
+
432
+ return completed_ids
433
+
434
+ def detect_stuck_pattern(self, coords: Optional[Tuple[int, int]], context: str, game_state: Dict[str, Any] = None) -> bool:
435
+ """Detect if the agent appears to be stuck in a location/context"""
436
+ # Don't trigger stuck detection during contexts where staying in place is expected
437
+ if context in ["battle", "dialogue", "menu", "title"]:
438
+ logger.debug(f"Skipping stuck detection - context: {context}")
439
+ return False
440
+
441
+ # Need valid coordinates for stuck detection
442
+ if not coords or coords[0] is None or coords[1] is None:
443
+ return False
444
+
445
+ # Check for title sequence if game state is available
446
+ if game_state:
447
+ # Check if in title sequence (no player name or invalid coordinates)
448
+ player_name = game_state.get("player", {}).get("name", "").strip()
449
+ if not player_name or player_name == "????????":
450
+ return False
451
+
452
+ # Check if game state indicates title/intro
453
+ game_state_value = game_state.get("game", {}).get("game_state", "").lower()
454
+ if "title" in game_state_value or "intro" in game_state_value:
455
+ return False
456
+
457
+ # Check location for title sequence
458
+ player_location = game_state.get("player", {}).get("location", "")
459
+ if player_location == "TITLE_SEQUENCE":
460
+ return False
461
+
462
+ key = f"{coords[0]}_{coords[1]}_{context}"
463
+ self.state.stuck_detection[key] = self.state.stuck_detection.get(key, 0) + 1
464
+
465
+ # Consider stuck if we've been in the same location/context for 8+ consecutive steps
466
+ return self.state.stuck_detection[key] >= 8
467
+
468
+ def is_black_frame(self, frame) -> bool:
469
+ """
470
+ Check if the frame is mostly black (transition/loading screen).
471
+
472
+ Args:
473
+ frame: PIL Image or numpy array
474
+
475
+ Returns:
476
+ bool: True if frame is mostly black, False otherwise
477
+ """
478
+ try:
479
+
480
+ # Convert to PIL Image if needed
481
+ if hasattr(frame, 'convert'): # It's already a PIL Image
482
+ img = frame
483
+ elif hasattr(frame, 'shape'): # It's a numpy array
484
+ img = Image.fromarray(frame)
485
+ else:
486
+ return False # Unknown type, assume not black
487
+
488
+ # Convert to numpy array for analysis
489
+ img_array = np.array(img)
490
+
491
+ # Calculate the mean brightness
492
+ # For RGB images, average across all channels
493
+ if len(img_array.shape) == 3:
494
+ mean_brightness = np.mean(img_array)
495
+ else:
496
+ mean_brightness = np.mean(img_array)
497
+
498
+ # Also check the standard deviation to catch completely uniform frames
499
+ std_dev = np.std(img_array)
500
+
501
+ # A frame is considered "black" if:
502
+ # 1. Mean brightness is very low (< 10 out of 255)
503
+ # 2. OR standard deviation is very low (< 5) indicating uniform color
504
+ is_black = mean_brightness < 10 or (mean_brightness < 30 and std_dev < 5)
505
+
506
+ if is_black:
507
+ logger.debug(f"Black frame detected: mean_brightness={mean_brightness:.2f}, std_dev={std_dev:.2f}")
508
+
509
+ return is_black
510
+
511
+ except Exception as e:
512
+ logger.warning(f"Error checking for black frame: {e}")
513
+ return False # On error, assume not black to continue processing
514
+
515
+ def get_relevant_history_summary(self, current_context: str, coords: Optional[Tuple[int, int]]) -> str:
516
+ """Get a concise summary of relevant recent history"""
517
+ # current_context and coords could be used for more sophisticated filtering in the future
518
+ _ = current_context, coords # Acknowledge unused parameters for now
519
+ if not self.state.history:
520
+ return "No previous history."
521
+
522
+ # Get last N entries based on display count
523
+ recent_entries = list(self.state.history)[-self.history_display_count:]
524
+
525
+ # Format for LLM consumption
526
+ summary_lines = []
527
+ for i, entry in enumerate(recent_entries, 1):
528
+ coord_str = f"({entry.player_coords[0]},{entry.player_coords[1]})" if entry.player_coords else "(?)"
529
+ summary_lines.append(f"{i}. {entry.context} at {coord_str}: {entry.action_taken}")
530
+
531
+ return "\n".join(summary_lines)
532
+
533
+ def get_stuck_warning(self, coords: Optional[Tuple[int, int]], context: str, game_state: Dict[str, Any] = None) -> str:
534
+ """Generate warning text if stuck pattern detected"""
535
+ # Never show stuck warning in title sequence
536
+ if context == "title":
537
+ return ""
538
+
539
+ if self.detect_stuck_pattern(coords, context, game_state):
540
+ return "\n⚠️ WARNING: You appear to be stuck at this location/context. Try a different approach!\n" \
541
+ "💡 TIP: If you try an action like RIGHT but coordinates don't change from (X,Y) to (X+1,Y), there's likely an obstacle. Check the map around player P for walls (#) or other barriers blocking your path."
542
+ return ""
543
+
544
+ def create_game_state_summary(self, game_state: Dict[str, Any]) -> str:
545
+ """Create a concise summary of the current game state"""
546
+ try:
547
+ game_info = game_state.get("game", {})
548
+
549
+ summary_parts = []
550
+
551
+ # Player location
552
+ coords = self.get_player_coords(game_state)
553
+ if coords:
554
+ summary_parts.append(f"Player at ({coords[0]}, {coords[1]})")
555
+
556
+ # Map info
557
+ map_id = self.get_map_id(game_state)
558
+ if map_id:
559
+ summary_parts.append(f"Map {map_id}")
560
+
561
+ # Context-specific info
562
+ context = self.get_game_context(game_state)
563
+ if context == "battle":
564
+ summary_parts.append("In battle")
565
+ elif context == "dialogue":
566
+ dialogue_text = game_info.get("dialogue", {}).get("text", "")
567
+ if dialogue_text:
568
+ summary_parts.append(f"Dialogue: {dialogue_text}")
569
+
570
+ return " | ".join(summary_parts) if summary_parts else "Unknown state"
571
+
572
+ except Exception as e:
573
+ logger.warning(f"Error creating game state summary: {e}")
574
+ return "Error reading state"
575
+
576
+ def step(self, game_state: Dict[str, Any]) -> Dict[str, Any]:
577
+ """
578
+ Compatibility method for client that expects agent.step(game_state)
579
+
580
+ Args:
581
+ game_state: Complete game state dictionary (should include 'frame')
582
+
583
+ Returns:
584
+ Dictionary with 'action' and optional 'reasoning'
585
+ """
586
+ frame = game_state.get('frame')
587
+ if frame is None:
588
+ logger.error("🚫 No frame in game_state for SimpleAgent.step")
589
+ return {"action": "WAIT", "reasoning": "No frame available"}
590
+
591
+ action = self.process_step(frame, game_state)
592
+ return {"action": action, "reasoning": "Simple agent decision"}
593
+
594
+ def process_step(self, frame, game_state: Dict[str, Any]) -> str:
595
+ """
596
+ Main processing step for simple mode with history tracking
597
+
598
+ Args:
599
+ frame: Current game frame (PIL Image or similar)
600
+ game_state: Complete game state dictionary
601
+
602
+ Returns:
603
+ Action string or list of actions
604
+ """
605
+ # CRITICAL: Validate frame before any VLM processing
606
+ if frame is None:
607
+ logger.error("🚫 CRITICAL: SimpleAgent.process_step called with None frame - cannot proceed")
608
+ return "WAIT"
609
+
610
+ # Validate frame is a proper image
611
+ if not (hasattr(frame, 'save') or hasattr(frame, 'shape')):
612
+ logger.error(f"🚫 CRITICAL: SimpleAgent.process_step called with invalid frame type {type(frame)} - cannot proceed")
613
+ return "WAIT"
614
+
615
+ # Additional PIL Image validation
616
+ if hasattr(frame, 'size'):
617
+ width, height = frame.size
618
+ if width <= 0 or height <= 0:
619
+ logger.error(f"🚫 CRITICAL: SimpleAgent.process_step called with invalid frame size {width}x{height} - cannot proceed")
620
+ return "WAIT"
621
+
622
+ # Check for black frame (transition screen)
623
+ if self.is_black_frame(frame):
624
+ logger.info("⏳ Black frame detected (likely a transition), waiting for next frame...")
625
+ return "WAIT" # Return WAIT to skip this frame and wait for the next one
626
+
627
+ try:
628
+ # Increment step counter
629
+ self.state.step_counter += 1
630
+
631
+ # Get current state info
632
+ coords = self.get_player_coords(game_state)
633
+ context = self.get_game_context(game_state)
634
+ map_id = self.get_map_id(game_state)
635
+
636
+ # Format the current state for LLM (includes movement preview)
637
+ formatted_state = format_state_for_llm(game_state)
638
+
639
+ # Get movement memory for the current area
640
+ movement_memory = ""
641
+ if coords:
642
+ movement_memory = self.get_area_movement_memory(coords)
643
+
644
+ # Check for objective completion first
645
+ self.check_objective_completion(game_state)
646
+
647
+ # Check storyline milestones and auto-complete objectives
648
+ self.check_storyline_milestones(game_state)
649
+
650
+ # Get relevant history and stuck detection
651
+ history_summary = self.get_relevant_history_summary(context, coords)
652
+ stuck_warning = self.get_stuck_warning(coords, context, game_state)
653
+ recent_actions_str = ', '.join(list(self.state.recent_actions)[-self.actions_display_count:]) if self.state.recent_actions else 'None'
654
+
655
+ # Format objectives for LLM
656
+ active_objectives = self.get_active_objectives()
657
+ completed_objectives_list = self.get_completed_objectives()
658
+ objectives_summary = self._format_objectives_for_llm(active_objectives, completed_objectives_list)
659
+
660
+ # Build pathfinding rules section (only if not in title sequence)
661
+ pathfinding_rules = ""
662
+ if context != "title":
663
+ pathfinding_rules = """
664
+ 🚨 PATHFINDING RULES:
665
+ 1. **SINGLE STEP FIRST**: Always prefer single actions (UP, DOWN, LEFT, RIGHT, A, B) unless you're 100% certain about multi-step paths
666
+ 2. **CHECK EVERY STEP**: Before chaining movements, verify EACH step in your sequence using the MOVEMENT PREVIEW and map
667
+ 3. **BLOCKED = STOP**: If ANY step shows BLOCKED in the movement preview, the entire sequence will fail
668
+ 4. **NO BLIND CHAINS**: Never chain movements through areas you can't see or verify as walkable
669
+ 5. **PERFORM PATHFINDING**: Find a path to a target location (X',Y') from the player position (X,Y) on the map. DO NOT TRAVERSE THROUGH OBSTACLES (#) -- it will not work.
670
+
671
+ 💡 SMART MOVEMENT STRATEGY:
672
+ - Use MOVEMENT PREVIEW to see exactly what happens with each direction
673
+ - If your target requires multiple steps, plan ONE step at a time
674
+ - Only chain 2-3 moves if ALL intermediate tiles are confirmed WALKABLE
675
+ - When stuck, try a different direction rather than repeating the same blocked move
676
+
677
+ EXAMPLE - DON'T DO THIS:
678
+ ❌ "I want to go right 5 tiles" → "RIGHT, RIGHT, RIGHT, RIGHT, RIGHT" (may hit wall on step 2!)
679
+
680
+ EXAMPLE - DO THIS INSTEAD:
681
+ ✅ Check movement preview → "RIGHT shows (X+1,Y) WALKABLE" → "RIGHT" (single safe step)
682
+ ✅ Next turn, check again → "RIGHT shows (X+2,Y) WALKABLE" → "RIGHT" (another safe step)
683
+
684
+ 💡 SMART NAVIGATION:
685
+ - Check the VISUAL FRAME for NPCs (people/trainers) before moving - they're not always on the map!
686
+ - Review MOVEMENT MEMORY for locations where you've failed to move before
687
+ - Only explore areas marked with ? (these are confirmed explorable edges)
688
+ - Avoid areas surrounded by # (walls) - they're fully blocked
689
+ - Use doors (D), stairs (S), or walk around obstacles when pathfinding suggests it
690
+
691
+ 💡 NPC & OBSTACLE HANDLING:
692
+ - If you see NPCs in the image, avoid walking into them or interact with A/B if needed
693
+ - If a movement fails (coordinates don't change), that location likely has an NPC or obstacle
694
+ - Use your MOVEMENT MEMORY to remember problem areas and plan around them
695
+ - NPCs can trigger battles or dialogue, which may be useful for objectives
696
+ """
697
+
698
+ # Create enhanced prompt with objectives, history context and chain of thought request
699
+ prompt = f"""You are playing Pokemon Emerald. Progress quickly to the milestones by balancing exploration and exploitation of things you know.
700
+ Based on the current game frame and state information, think through your next move and choose the best button action.
701
+
702
+ RECENT ACTION HISTORY (last {self.actions_display_count} actions):
703
+ {recent_actions_str}
704
+
705
+ LOCATION/CONTEXT HISTORY (last {self.history_display_count} steps):
706
+ {history_summary}
707
+
708
+ CURRENT OBJECTIVES:
709
+ {objectives_summary}
710
+
711
+ CURRENT GAME STATE:
712
+ {formatted_state}
713
+
714
+ {movement_memory}
715
+
716
+ {stuck_warning}
717
+
718
+ Available actions: A, B, START, SELECT, UP, DOWN, LEFT, RIGHT
719
+
720
+ IMPORTANT: Please think step by step before choosing your action. Structure your response like this:
721
+
722
+ ANALYSIS:
723
+ [Analyze what you see in the frame and current game state - what's happening? where are you? what should you be doing?
724
+ IMPORTANT: Look carefully at the game image for NPCs (people, trainers) that might not be shown on the map. NPCs appear as sprite characters and can block movement or trigger battles/dialogue.]
725
+
726
+ OBJECTIVES:
727
+ [Review your current objectives. You have main storyline objectives (story_*) that track overall Emerald progression - these are automatically verified and you CANNOT manually complete them. You can create your own sub-objectives to help achieve the main goals. Do any need to be updated, added, or marked as complete?
728
+ - Add sub-objectives: ADD_OBJECTIVE: type:description:target_value (e.g., "ADD_OBJECTIVE: location:Find Pokemon Center in town:(15,20)" or "ADD_OBJECTIVE: item:Buy Pokeballs:5")
729
+ - Complete sub-objectives only: COMPLETE_OBJECTIVE: objective_id:notes (e.g., "COMPLETE_OBJECTIVE: my_sub_obj_123:Successfully bought Pokeballs")
730
+ - NOTE: Do NOT try to complete storyline objectives (story_*) - they auto-complete when milestones are reached]
731
+
732
+ PLAN:
733
+ [Think about your immediate goal - what do you want to accomplish in the next few actions? Consider your current objectives and recent history.
734
+ Check MOVEMENT MEMORY for areas you've had trouble with before and plan your route accordingly.]
735
+
736
+ REASONING:
737
+ [Explain why you're choosing this specific action. Reference the MOVEMENT PREVIEW and MOVEMENT MEMORY sections. Check the visual frame for NPCs before moving. If you see NPCs in the image, avoid walking into them. Consider any failed movements or known obstacles from your memory.]
738
+
739
+ ACTION:
740
+ [Your final action choice - PREFER SINGLE ACTIONS like 'RIGHT' or 'A'. Only use multiple actions like 'UP, UP, RIGHT' if you've verified each step is WALKABLE in the movement preview and map.]
741
+
742
+ {pathfinding_rules}
743
+
744
+ Context: {context} | Coords: {coords} """
745
+
746
+ # Print complete prompt to terminal for debugging
747
+ print("\n" + "="*120)
748
+ print("🤖 SIMPLE AGENT PROMPT SENT TO VLM:")
749
+ print("="*120)
750
+
751
+ # Print prompt in chunks to avoid terminal truncation
752
+ sys.stdout.write(prompt)
753
+ sys.stdout.write("\n")
754
+ sys.stdout.flush()
755
+
756
+ print("="*120)
757
+ print("🤖 END OF SIMPLE AGENT PROMPT")
758
+ print("="*120 + "\n")
759
+ sys.stdout.flush()
760
+
761
+ # Make VLM call - double-check frame validation before VLM
762
+ if frame and (hasattr(frame, 'save') or hasattr(frame, 'shape')):
763
+ print("🔍 Making VLM call...")
764
+ try:
765
+ response = self.vlm.get_query(frame, prompt, "simple_mode")
766
+ print(f"🔍 VLM response received: {response[:100]}..." if len(response) > 100 else f"🔍 VLM response: {response}")
767
+ except Exception as e:
768
+ print(f"❌ VLM call failed: {e}")
769
+ return "WAIT"
770
+ else:
771
+ logger.error("🚫 CRITICAL: About to call VLM but frame validation failed - this should never happen!")
772
+ return "WAIT"
773
+
774
+ # Extract action(s) from structured response
775
+ actions, reasoning = self._parse_structured_response(response, game_state)
776
+
777
+ # Check for failed movement by comparing previous coordinates
778
+ if len(self.state.history) > 0:
779
+ prev_coords = self.state.history[-1].player_coords
780
+ if prev_coords and coords:
781
+ # If coordinates didn't change and we attempted a movement, record it as failed
782
+ if (prev_coords == coords and
783
+ isinstance(actions, list) and len(actions) > 0 and
784
+ actions[0] in ['UP', 'DOWN', 'LEFT', 'RIGHT']):
785
+ self.record_failed_movement(coords, actions[0], "movement_blocked")
786
+ elif (prev_coords == coords and
787
+ isinstance(actions, str) and
788
+ actions in ['UP', 'DOWN', 'LEFT', 'RIGHT']):
789
+ self.record_failed_movement(coords, actions, "movement_blocked")
790
+
791
+ # Record this step in history with reasoning
792
+ game_state_summary = self.create_game_state_summary(game_state)
793
+ action_with_reasoning = f"{actions} | Reasoning: {reasoning}" if reasoning else str(actions)
794
+ history_entry = HistoryEntry(
795
+ timestamp=datetime.now(),
796
+ player_coords=coords,
797
+ map_id=map_id,
798
+ context=context,
799
+ action_taken=action_with_reasoning,
800
+ game_state_summary=game_state_summary
801
+ )
802
+ self.state.history.append(history_entry)
803
+
804
+ # Update recent actions
805
+ if isinstance(actions, list):
806
+ self.state.recent_actions.extend(actions)
807
+ else:
808
+ self.state.recent_actions.append(actions)
809
+
810
+ # Reset stuck detection for other locations when we move
811
+ if coords:
812
+ keys_to_reset = [k for k in self.state.stuck_detection.keys()
813
+ if not k.startswith(f"{coords[0]}_{coords[1]}")]
814
+ for key in keys_to_reset:
815
+ if self.state.stuck_detection[key] > 0:
816
+ self.state.stuck_detection[key] = max(0, self.state.stuck_detection[key] - 1)
817
+
818
+ # Update server with agent step and metrics (for agent thinking display)
819
+ self._update_server_metrics()
820
+
821
+ return actions
822
+
823
+ except Exception as e:
824
+ logger.error(f"Error in simple agent processing: {e}")
825
+ return ["A"] # Default safe action as list
826
+
827
+ def _update_server_metrics(self):
828
+ """Update server with current agent step count and LLM metrics"""
829
+ try:
830
+ import requests
831
+ from utils.llm_logger import get_llm_logger
832
+
833
+ # Get current LLM metrics
834
+ llm_logger = get_llm_logger()
835
+ metrics = llm_logger.get_cumulative_metrics()
836
+
837
+ # Send metrics to server
838
+ try:
839
+ response = requests.post(
840
+ "http://localhost:8000/agent_step",
841
+ json={"metrics": metrics},
842
+ timeout=1
843
+ )
844
+ if response.status_code != 200:
845
+ logger.warning(f"Failed to update server metrics: {response.status_code}")
846
+ except requests.exceptions.RequestException:
847
+ # Silent fail - server might not be running or in different mode
848
+ pass
849
+
850
+ except Exception as e:
851
+ logger.warning(f"Error updating server metrics: {e}")
852
+
853
+ def _parse_actions(self, response: str, game_state: Dict[str, Any] = None) -> List[str]:
854
+ """Parse action response from LLM into list of valid actions"""
855
+ response_upper = response.upper().strip()
856
+ valid_actions = ['A', 'B', 'START', 'SELECT', 'UP', 'DOWN', 'LEFT', 'RIGHT', 'WAIT']
857
+
858
+ # Parse multiple actions (could be comma or space separated)
859
+ actions_found = []
860
+ # Replace commas with spaces for consistent parsing
861
+ response_clean = response_upper.replace(',', ' ').replace('.', ' ')
862
+ tokens = response_clean.split()
863
+
864
+ for token in tokens:
865
+ if token in valid_actions:
866
+ actions_found.append(token)
867
+ if len(actions_found) >= 10: # Max 10 actions
868
+ break
869
+
870
+ # Validate movement sequences if we have game state
871
+ if game_state and len(actions_found) > 1:
872
+ # Check if this is a movement sequence
873
+ movement_actions = [a for a in actions_found if a in ['UP', 'DOWN', 'LEFT', 'RIGHT']]
874
+ if movement_actions:
875
+ # Validate the movement sequence
876
+ is_valid, reason = self.validate_movement_sequence(movement_actions, game_state)
877
+ if not is_valid:
878
+ logger.warning(f"Movement sequence validation failed: {reason}")
879
+ # Only take the first movement if sequence is invalid
880
+ if movement_actions:
881
+ actions_found = [movement_actions[0]]
882
+ logger.info(f"Reduced to single movement: {actions_found[0]}")
883
+
884
+ # If no valid actions found, use default
885
+ if not actions_found:
886
+ actions_found = ['A']
887
+
888
+ return actions_found
889
+
890
+ def _format_objectives_for_llm(self, active_objectives: List[Objective], completed_objectives: List[Objective]) -> str:
891
+ """Format objectives for LLM consumption"""
892
+ lines = []
893
+
894
+ if active_objectives:
895
+ lines.append("🎯 ACTIVE OBJECTIVES:")
896
+ for i, obj in enumerate(active_objectives[:5], 1): # Show top 5 active
897
+ target_str = f" (Target: {obj.target_value})" if obj.target_value else ""
898
+ lines.append(f" {i}. [{obj.objective_type}] {obj.description}{target_str} [ID: {obj.id}]")
899
+ else:
900
+ lines.append("🎯 ACTIVE OBJECTIVES: None - Consider setting some goals!")
901
+
902
+ if completed_objectives:
903
+ recent_completed = completed_objectives[-3:] # Show last 3 completed
904
+ lines.append("✅ RECENTLY COMPLETED:")
905
+ for obj in recent_completed:
906
+ lines.append(f" ✓ [{obj.objective_type}] {obj.description}")
907
+
908
+ return "\n".join(lines)
909
+
910
+ def _parse_structured_response(self, response: str, game_state: Dict[str, Any] = None) -> Tuple[List[str], str]:
911
+ """Parse structured chain-of-thought response and extract actions and reasoning"""
912
+ try:
913
+ # Extract sections from structured response
914
+ analysis = ""
915
+ objectives_section = ""
916
+ plan = ""
917
+ reasoning = ""
918
+ actions = []
919
+
920
+ # Split response into lines for processing
921
+ lines = response.split('\n')
922
+ current_section = None
923
+
924
+ for line in lines:
925
+ line = line.strip()
926
+
927
+ # Identify section headers
928
+ if line.upper().startswith('ANALYSIS:'):
929
+ current_section = 'analysis'
930
+ analysis = line[9:].strip() # Remove "ANALYSIS:" prefix
931
+ elif line.upper().startswith('OBJECTIVES:'):
932
+ current_section = 'objectives'
933
+ objectives_section = line[11:].strip() # Remove "OBJECTIVES:" prefix
934
+ elif line.upper().startswith('PLAN:'):
935
+ current_section = 'plan'
936
+ plan = line[5:].strip() # Remove "PLAN:" prefix
937
+ elif line.upper().startswith('REASONING:'):
938
+ current_section = 'reasoning'
939
+ reasoning = line[10:].strip() # Remove "REASONING:" prefix
940
+ elif line.upper().startswith('ACTION:'):
941
+ current_section = 'action'
942
+ # Extract actions from this line
943
+ action_text = line[7:].strip() # Remove "ACTION:" prefix
944
+ if action_text: # Only parse if there's content
945
+ actions = self._parse_actions(action_text, game_state)
946
+ elif line and current_section:
947
+ # Continue content of current section
948
+ if current_section == 'analysis':
949
+ analysis += " " + line
950
+ elif current_section == 'objectives':
951
+ objectives_section += " " + line
952
+ elif current_section == 'plan':
953
+ plan += " " + line
954
+ elif current_section == 'reasoning':
955
+ reasoning += " " + line
956
+ elif current_section == 'action':
957
+ # Additional action parsing from action section content
958
+ if line.strip(): # Only process non-empty lines
959
+ additional_actions = self._parse_actions(line, game_state)
960
+ actions.extend(additional_actions)
961
+ if len(actions) >= 10: # Max 10 actions
962
+ actions = actions[:10]
963
+ break
964
+
965
+ # Process objectives if mentioned
966
+ if objectives_section:
967
+ self._process_objectives_from_response(objectives_section)
968
+
969
+ # If no actions found in structured format, fall back to parsing entire response
970
+ if not actions:
971
+ actions = self._parse_actions(response, game_state)
972
+
973
+ # Create concise reasoning summary
974
+ reasoning_parts = []
975
+ if analysis:
976
+ reasoning_parts.append(f"Analysis: {analysis}")
977
+ if objectives_section:
978
+ reasoning_parts.append(f"Objectives: {objectives_section}")
979
+ if plan:
980
+ reasoning_parts.append(f"Plan: {plan}")
981
+ if reasoning:
982
+ reasoning_parts.append(f"Reasoning: {reasoning}")
983
+
984
+ full_reasoning = " | ".join(reasoning_parts) if reasoning_parts else "No reasoning provided"
985
+
986
+ return actions, full_reasoning
987
+
988
+ except Exception as e:
989
+ logger.warning(f"Error parsing structured response: {e}")
990
+ # Fall back to basic action parsing
991
+ return self._parse_actions(response, game_state), "Error parsing reasoning"
992
+
993
+ def _process_objectives_from_response(self, objectives_text: str):
994
+ """Process objective management commands from LLM response"""
995
+ try:
996
+ # Look for ADD_OBJECTIVE and COMPLETE_OBJECTIVE commands
997
+ for line in objectives_text.split('\n'):
998
+ line = line.strip()
999
+ if line.upper().startswith('ADD_OBJECTIVE:'):
1000
+ # Parse format: ADD_OBJECTIVE: type:description:target_value
1001
+ content = line[14:].strip() # Remove "ADD_OBJECTIVE:" prefix
1002
+ parts = content.split(':', 2) # Split into max 3 parts
1003
+
1004
+ if len(parts) >= 2:
1005
+ obj_type = parts[0].strip()
1006
+ description = parts[1].strip()
1007
+ target_value = parts[2].strip() if len(parts) > 2 else None
1008
+
1009
+ # Parse target_value based on type
1010
+ parsed_target = self._parse_target_value(obj_type, target_value)
1011
+
1012
+ # Add the objective
1013
+ self.add_objective(description, obj_type, parsed_target)
1014
+
1015
+ elif line.upper().startswith('COMPLETE_OBJECTIVE:'):
1016
+ # Parse format: COMPLETE_OBJECTIVE: objective_id:notes
1017
+ content = line[19:].strip() # Remove "COMPLETE_OBJECTIVE:" prefix
1018
+ parts = content.split(':', 1) # Split into max 2 parts
1019
+
1020
+ if len(parts) >= 1:
1021
+ obj_id = parts[0].strip()
1022
+ notes = parts[1].strip() if len(parts) > 1 else "Manually completed by LLM"
1023
+
1024
+ # Complete the objective
1025
+ success = self.complete_objective(obj_id, notes)
1026
+ if success:
1027
+ logger.info(f"LLM manually completed objective: {obj_id}")
1028
+ else:
1029
+ logger.warning(f"LLM tried to complete non-existent or already completed objective: {obj_id}")
1030
+
1031
+ except Exception as e:
1032
+ logger.warning(f"Error processing objectives from response: {e}")
1033
+
1034
+ def _parse_target_value(self, obj_type: str, target_str: Optional[str]) -> Any:
1035
+ """Parse target value based on objective type"""
1036
+ if not target_str:
1037
+ return None
1038
+
1039
+ try:
1040
+ if obj_type == "location":
1041
+ # Try to parse coordinates like "(15,20)" or "15,20"
1042
+ target_str = target_str.strip('()')
1043
+ if ',' in target_str:
1044
+ x, y = map(int, target_str.split(','))
1045
+ return (x, y)
1046
+ elif obj_type == "map":
1047
+ # Try to parse map ID as integer
1048
+ return int(target_str)
1049
+ else:
1050
+ # For other types, return as string
1051
+ return target_str
1052
+ except (ValueError, TypeError):
1053
+ # If parsing fails, return as string
1054
+ return target_str
1055
+
1056
+ def get_memory_usage_estimate(self) -> Dict[str, int]:
1057
+ """Estimate current memory usage for context management"""
1058
+ history_chars = sum(len(str(entry)) for entry in self.state.history)
1059
+ recent_actions_chars = sum(len(action) for action in self.state.recent_actions)
1060
+ objectives_chars = sum(len(f"{obj.description} {obj.target_value}") for obj in self.state.objectives)
1061
+
1062
+ return {
1063
+ "history_entries": len(self.state.history),
1064
+ "history_chars": history_chars,
1065
+ "recent_actions": len(self.state.recent_actions),
1066
+ "recent_actions_chars": recent_actions_chars,
1067
+ "objectives_count": len(self.state.objectives),
1068
+ "objectives_chars": objectives_chars,
1069
+ "estimated_total_chars": history_chars + recent_actions_chars + objectives_chars
1070
+ }
1071
+
1072
+ def get_objectives_state(self) -> Dict[str, Any]:
1073
+ """Get objectives formatted for forwarding in game state"""
1074
+ return {
1075
+ "active": [
1076
+ {
1077
+ "id": obj.id,
1078
+ "description": obj.description,
1079
+ "type": obj.objective_type,
1080
+ "target": obj.target_value,
1081
+ "created_at": obj.created_at.isoformat()
1082
+ }
1083
+ for obj in self.get_active_objectives()
1084
+ ],
1085
+ "completed": [
1086
+ {
1087
+ "id": obj.id,
1088
+ "description": obj.description,
1089
+ "type": obj.objective_type,
1090
+ "target": obj.target_value,
1091
+ "completed_at": obj.completed_at.isoformat() if obj.completed_at else None,
1092
+ "notes": obj.progress_notes
1093
+ }
1094
+ for obj in self.get_completed_objectives()[-5:] # Last 5 completed
1095
+ ],
1096
+ "updated": self.state.objectives_updated
1097
+ }
1098
+
1099
+ def trim_history_for_context(self, max_chars: int = 4000):
1100
+ """Trim history to fit within context limits"""
1101
+ # Preserve minimum history for context
1102
+ min_history = max(5, self.history_display_count // 2)
1103
+ min_actions = max(10, self.actions_display_count // 2)
1104
+
1105
+ while self.get_memory_usage_estimate()["estimated_total_chars"] > max_chars and len(self.state.history) > min_history:
1106
+ self.state.history.popleft()
1107
+
1108
+ while len(self.state.recent_actions) > min_actions and self.get_memory_usage_estimate()["estimated_total_chars"] > max_chars:
1109
+ self.state.recent_actions.popleft()
1110
+
1111
+ def reset_objectives_updated_flag(self):
1112
+ """Reset the objectives updated flag (call after forwarding state)"""
1113
+ self.state.objectives_updated = False
1114
+
1115
+ def configure_history_limits(self, max_history_entries: int = None, max_recent_actions: int = None,
1116
+ history_display_count: int = None, actions_display_count: int = None):
1117
+ """Configure history tracking parameters at runtime"""
1118
+ if max_history_entries is not None:
1119
+ # Create new deque with updated max length, preserving existing data
1120
+ existing_history = list(self.state.history)
1121
+ self.state.history = deque(existing_history, maxlen=max_history_entries)
1122
+
1123
+ if max_recent_actions is not None:
1124
+ # Create new deque with updated max length, preserving existing data
1125
+ existing_actions = list(self.state.recent_actions)
1126
+ self.state.recent_actions = deque(existing_actions, maxlen=max_recent_actions)
1127
+
1128
+ if history_display_count is not None:
1129
+ self.history_display_count = history_display_count
1130
+
1131
+ if actions_display_count is not None:
1132
+ self.actions_display_count = actions_display_count
1133
+
1134
+ logger.info(f"Updated history configuration: {len(self.state.history)}/{self.state.history.maxlen} history, "
1135
+ f"{len(self.state.recent_actions)}/{self.state.recent_actions.maxlen} actions, "
1136
+ f"display {self.history_display_count}/{self.actions_display_count}")
1137
+
1138
+ def load_history_from_llm_checkpoint(self, checkpoint_file: str):
1139
+ """Load SimpleAgent history from LLM checkpoint file"""
1140
+ try:
1141
+ from utils.llm_logger import get_llm_logger
1142
+ import json
1143
+ import re
1144
+ from datetime import datetime
1145
+
1146
+ if not os.path.exists(checkpoint_file):
1147
+ logger.info(f"No checkpoint file found: {checkpoint_file}")
1148
+ return False
1149
+
1150
+ # Use LLM logger to restore cumulative metrics first
1151
+ llm_logger = get_llm_logger()
1152
+ if llm_logger:
1153
+ restored_step_count = llm_logger.load_checkpoint(checkpoint_file)
1154
+ if restored_step_count is not None:
1155
+ logger.info(f"✅ LLM logger restored checkpoint with {restored_step_count} steps")
1156
+ # Update SimpleAgent step counter to match LLM logger
1157
+ self.state.step_counter = restored_step_count
1158
+
1159
+ with open(checkpoint_file, 'r') as f:
1160
+ checkpoint_data = json.load(f)
1161
+
1162
+ log_entries = checkpoint_data.get("log_entries", [])
1163
+ restored_count = 0
1164
+
1165
+ for entry in log_entries:
1166
+ if entry.get("type") == "interaction" and "simple_mode" in entry.get("interaction_type", ""):
1167
+ try:
1168
+ # Extract state info from prompt
1169
+ prompt = entry.get("prompt", "")
1170
+ response = entry.get("response", "")
1171
+ timestamp_str = entry.get("timestamp", "")
1172
+
1173
+ # Parse coordinates from prompt
1174
+ coords_match = re.search(r"Position: X=(\d+), Y=(\d+)", prompt)
1175
+ coords = None
1176
+ if coords_match:
1177
+ coords = (int(coords_match.group(1)), int(coords_match.group(2)))
1178
+
1179
+ # Parse context from prompt
1180
+ context = "overworld" # default
1181
+ if "Game State: battle" in prompt:
1182
+ context = "battle"
1183
+ elif "DIALOGUE:" in prompt or "dialogue" in prompt.lower():
1184
+ context = "dialogue"
1185
+ elif "menu" in prompt.lower():
1186
+ context = "menu"
1187
+
1188
+ # Extract action from response
1189
+ action_taken = "UNKNOWN"
1190
+ if "ACTION:" in response:
1191
+ action_section = response.split("ACTION:")[-1].strip()
1192
+ action_line = action_section.split('\n')[0].strip()
1193
+ action_taken = action_line
1194
+
1195
+ # Parse timestamp
1196
+ timestamp = datetime.now()
1197
+ if timestamp_str:
1198
+ try:
1199
+ timestamp = datetime.fromisoformat(timestamp_str)
1200
+ except:
1201
+ pass
1202
+
1203
+ # Create simplified game state summary
1204
+ game_state_summary = f"Position: {coords}" if coords else "Position unknown"
1205
+ if coords:
1206
+ game_state_summary += f" | Context: {context}"
1207
+
1208
+ # Add reasoning summary
1209
+ reasoning = ""
1210
+ if "REASONING:" in response:
1211
+ reasoning_section = response.split("REASONING:")[-1].split("ACTION:")[0].strip()
1212
+ reasoning = reasoning_section
1213
+
1214
+ action_with_reasoning = f"{action_taken} | Reasoning: {reasoning}" if reasoning else action_taken
1215
+
1216
+ # Create history entry
1217
+ history_entry = HistoryEntry(
1218
+ timestamp=timestamp,
1219
+ player_coords=coords,
1220
+ map_id=None, # Not available in checkpoint
1221
+ context=context,
1222
+ action_taken=action_with_reasoning,
1223
+ game_state_summary=game_state_summary
1224
+ )
1225
+
1226
+ self.state.history.append(history_entry)
1227
+
1228
+ # Also add to recent actions if it's a valid action
1229
+ if action_taken and action_taken not in ["UNKNOWN", "WAIT"]:
1230
+ # Parse multiple actions if comma-separated
1231
+ actions = [a.strip() for a in action_taken.replace(',', ' ').split()]
1232
+ for action in actions:
1233
+ if action in ['UP', 'DOWN', 'LEFT', 'RIGHT', 'A', 'B', 'START', 'SELECT']:
1234
+ self.state.recent_actions.append(action)
1235
+
1236
+ restored_count += 1
1237
+
1238
+ except Exception as e:
1239
+ logger.warning(f"Error parsing checkpoint entry: {e}")
1240
+ continue
1241
+
1242
+ # Update step counter to match checkpoint
1243
+ self.state.step_counter = restored_count
1244
+
1245
+ logger.info(f"✅ Restored {restored_count} history entries from {checkpoint_file}")
1246
+ logger.info(f" History: {len(self.state.history)} entries")
1247
+ logger.info(f" Recent actions: {len(self.state.recent_actions)} actions")
1248
+ logger.info(f" Step counter: {self.state.step_counter}")
1249
+
1250
+ return True
1251
+
1252
+ except Exception as e:
1253
+ logger.error(f"❌ Failed to load history from checkpoint: {e}")
1254
+ import traceback
1255
+ traceback.print_exc()
1256
+ return False
1257
+
1258
+ def save_history_to_llm_checkpoint(self, checkpoint_file: str = None):
1259
+ """Save SimpleAgent history using LLM logger checkpoint system"""
1260
+ try:
1261
+ from utils.llm_logger import get_llm_logger
1262
+
1263
+ # Get the global LLM logger instance
1264
+ llm_logger = get_llm_logger()
1265
+ if llm_logger is None:
1266
+ logger.warning("No LLM logger available for checkpoint saving")
1267
+ return False
1268
+
1269
+ # Save checkpoint using LLM logger which includes cumulative metrics
1270
+ # The LLM logger will handle saving log_entries AND cumulative_metrics
1271
+ # If checkpoint_file is None, it will use the cache folder
1272
+ llm_logger.save_checkpoint(checkpoint_file, agent_step_count=self.state.step_counter)
1273
+
1274
+ logger.info(f"💾 Saved LLM checkpoint to {checkpoint_file}")
1275
+ logger.info(f" Step counter: {self.state.step_counter}")
1276
+ logger.info(f" History: {len(self.state.history)} entries")
1277
+ logger.info(f" Recent actions: {len(self.state.recent_actions)} actions")
1278
+ return True
1279
+
1280
+ except Exception as e:
1281
+ logger.error(f"❌ Failed to save LLM checkpoint: {e}")
1282
+ import traceback
1283
+ traceback.print_exc()
1284
+ return False
1285
+
1286
+ def record_failed_movement(self, coords: Tuple[int, int], direction: str, reason: str = "blocked"):
1287
+ """Record a failed movement attempt for future reference"""
1288
+ coord_key = f"{coords[0]},{coords[1]}"
1289
+ if coord_key not in self.state.failed_movements:
1290
+ self.state.failed_movements[coord_key] = []
1291
+
1292
+ failed_entry = f"{direction}:{reason}"
1293
+ if failed_entry not in self.state.failed_movements[coord_key]:
1294
+ self.state.failed_movements[coord_key].append(failed_entry)
1295
+ logger.info(f"Recorded failed movement: {coord_key} -> {direction} ({reason})")
1296
+
1297
+ def record_npc_interaction(self, coords: Tuple[int, int], interaction_type: str, notes: str = ""):
1298
+ """Record an NPC interaction for future reference"""
1299
+ coord_key = f"{coords[0]},{coords[1]}"
1300
+ interaction_info = f"{interaction_type}: {notes}" if notes else interaction_type
1301
+ self.state.npc_interactions[coord_key] = interaction_info
1302
+ logger.info(f"Recorded NPC interaction: {coord_key} -> {interaction_info}")
1303
+
1304
+ def get_movement_memory(self, coords: Tuple[int, int]) -> str:
1305
+ """Get memory about failed movements and interactions at specific coordinates"""
1306
+ coord_key = f"{coords[0]},{coords[1]}"
1307
+ memory_parts = []
1308
+
1309
+ # Check for failed movements
1310
+ if coord_key in self.state.failed_movements:
1311
+ failed_list = self.state.failed_movements[coord_key]
1312
+ memory_parts.append(f"Failed moves: {', '.join(failed_list)}")
1313
+
1314
+ # Check for NPC interactions
1315
+ if coord_key in self.state.npc_interactions:
1316
+ interaction = self.state.npc_interactions[coord_key]
1317
+ memory_parts.append(f"NPC: {interaction}")
1318
+
1319
+ return " | ".join(memory_parts) if memory_parts else ""
1320
+
1321
+ def get_area_movement_memory(self, center_coords: Tuple[int, int], radius: int = 7) -> str:
1322
+ """Get movement memory for the area around the player"""
1323
+ cx, cy = center_coords
1324
+ memory_lines = []
1325
+
1326
+ # Check nearby coordinates for failed movements or NPC interactions
1327
+ nearby_memories = []
1328
+ for dx in range(-radius, radius + 1):
1329
+ for dy in range(-radius, radius + 1):
1330
+ if dx == 0 and dy == 0:
1331
+ continue # Skip current position
1332
+
1333
+ check_coords = (cx + dx, cy + dy)
1334
+ memory = self.get_movement_memory(check_coords)
1335
+ if memory:
1336
+ nearby_memories.append(f"({check_coords[0]},{check_coords[1]}): {memory}")
1337
+
1338
+ if nearby_memories:
1339
+ memory_lines.append("🧠 MOVEMENT MEMORY (nearby area):")
1340
+ for memory in nearby_memories[:5]: # Limit to 5 most relevant
1341
+ memory_lines.append(f" {memory}")
1342
+
1343
+ return "\n".join(memory_lines)
1344
+
1345
+ def analyze_movement_preview(self, game_state: Dict[str, Any]) -> Dict[str, Any]:
1346
+ """
1347
+ Analyze the movement preview data from game state to find valid moves.
1348
+
1349
+ Returns:
1350
+ Dict with 'walkable_directions', 'blocked_directions', and 'special_tiles'
1351
+ """
1352
+ walkable_directions = []
1353
+ blocked_directions = []
1354
+ special_tiles = {}
1355
+
1356
+ # Look for movement preview in the formatted state
1357
+ formatted_state = format_state_for_llm(game_state)
1358
+ lines = formatted_state.split('\n')
1359
+
1360
+ in_movement_preview = False
1361
+ for line in lines:
1362
+ if 'MOVEMENT PREVIEW:' in line:
1363
+ in_movement_preview = True
1364
+ continue
1365
+
1366
+ if in_movement_preview:
1367
+ # Parse movement preview lines
1368
+ # Format: " UP : ( 15, 10) [.] WALKABLE - Optional description"
1369
+ if line.strip() and ':' in line:
1370
+ parts = line.strip().split(':')
1371
+ if len(parts) >= 2:
1372
+ direction = parts[0].strip()
1373
+ rest = parts[1].strip()
1374
+
1375
+ if direction in ['UP', 'DOWN', 'LEFT', 'RIGHT']:
1376
+ if 'WALKABLE' in rest:
1377
+ walkable_directions.append(direction)
1378
+ # Check for special tiles
1379
+ if 'Door/Entrance' in rest:
1380
+ special_tiles[direction] = 'door'
1381
+ elif 'Stairs/Warp' in rest:
1382
+ special_tiles[direction] = 'stairs'
1383
+ elif 'Tall grass' in rest:
1384
+ special_tiles[direction] = 'grass'
1385
+ elif 'Jump ledge' in rest and 'can jump' in rest:
1386
+ special_tiles[direction] = 'ledge'
1387
+ elif 'BLOCKED' in rest:
1388
+ blocked_directions.append(direction)
1389
+ elif not line.strip():
1390
+ # Empty line typically ends the movement preview section
1391
+ in_movement_preview = False
1392
+
1393
+ return {
1394
+ 'walkable_directions': walkable_directions,
1395
+ 'blocked_directions': blocked_directions,
1396
+ 'special_tiles': special_tiles
1397
+ }
1398
+
1399
+ def validate_movement_sequence(self, movements: List[str], game_state: Dict[str, Any]) -> Tuple[bool, str]:
1400
+ """
1401
+ Validate if a sequence of movements is valid based on current state.
1402
+
1403
+ Args:
1404
+ movements: List of movement directions
1405
+ game_state: Current game state
1406
+
1407
+ Returns:
1408
+ Tuple of (is_valid, reason)
1409
+ """
1410
+ if not movements:
1411
+ return True, "No movements to validate"
1412
+
1413
+ # Analyze current movement options
1414
+ movement_info = self.analyze_movement_preview(game_state)
1415
+ walkable = movement_info['walkable_directions']
1416
+ blocked = movement_info['blocked_directions']
1417
+
1418
+ # Check first movement
1419
+ first_move = movements[0].upper()
1420
+ if first_move in blocked:
1421
+ return False, f"First movement {first_move} is BLOCKED"
1422
+
1423
+ if first_move not in walkable and first_move in ['UP', 'DOWN', 'LEFT', 'RIGHT']:
1424
+ return False, f"First movement {first_move} is not confirmed WALKABLE"
1425
+
1426
+ # For multiple movements, only allow if we're very confident
1427
+ if len(movements) > 1:
1428
+ # We can't predict beyond the first move accurately
1429
+ # So we should discourage chaining unless explicitly safe
1430
+ return False, "Cannot validate multi-step movements - use single steps instead"
1431
+
1432
+ return True, "Movement validated"
1433
+
1434
+ def get_history_stats(self) -> Dict[str, int]:
1435
+ """Get current history tracking statistics"""
1436
+ return {
1437
+ "history_entries": len(self.state.history),
1438
+ "max_history_entries": self.state.history.maxlen,
1439
+ "recent_actions": len(self.state.recent_actions),
1440
+ "max_recent_actions": self.state.recent_actions.maxlen,
1441
+ "history_display_count": self.history_display_count,
1442
+ "actions_display_count": self.actions_display_count,
1443
+ "objectives_count": len(self.state.objectives),
1444
+ "step_counter": self.state.step_counter,
1445
+ "failed_movements": len(self.state.failed_movements),
1446
+ "npc_interactions": len(self.state.npc_interactions)
1447
+ }
1448
+
1449
+ # Global simple agent instance for backward compatibility with existing multiprocess code
1450
+ _global_simple_agent = None
1451
+
1452
+ def get_simple_agent(vlm) -> SimpleAgent:
1453
+ """Get or create the global simple agent instance"""
1454
+ global _global_simple_agent
1455
+ if _global_simple_agent is None:
1456
+ _global_simple_agent = SimpleAgent(vlm)
1457
+
1458
+ # Check if we should load from checkpoint
1459
+ import os
1460
+ if os.environ.get("LOAD_CHECKPOINT_MODE") == "true":
1461
+ # Check cache folder first, then fall back to old location
1462
+ cache_dir = ".pokeagent_cache"
1463
+ checkpoint_file = os.path.join(cache_dir, "checkpoint_llm.txt") if os.path.exists(cache_dir) else "checkpoint_llm.txt"
1464
+ if not os.path.exists(checkpoint_file) and os.path.exists("checkpoint_llm.txt"):
1465
+ checkpoint_file = "checkpoint_llm.txt"
1466
+ if os.path.exists(checkpoint_file):
1467
+ logger.info(f"🔄 Loading SimpleAgent history from {checkpoint_file}")
1468
+ _global_simple_agent.load_history_from_llm_checkpoint(checkpoint_file)
1469
+ else:
1470
+ logger.info(f"⚠️ No checkpoint file found: {checkpoint_file}")
1471
+
1472
+ elif _global_simple_agent.vlm != vlm:
1473
+ # VLM changed, create new instance
1474
+ _global_simple_agent = SimpleAgent(vlm)
1475
+
1476
+ # Load checkpoint for new instance too if mode is set
1477
+ import os
1478
+ if os.environ.get("LOAD_CHECKPOINT_MODE") == "true":
1479
+ # Check cache folder first, then fall back to old location
1480
+ cache_dir = ".pokeagent_cache"
1481
+ checkpoint_file = os.path.join(cache_dir, "checkpoint_llm.txt") if os.path.exists(cache_dir) else "checkpoint_llm.txt"
1482
+ if not os.path.exists(checkpoint_file) and os.path.exists("checkpoint_llm.txt"):
1483
+ checkpoint_file = "checkpoint_llm.txt"
1484
+ if os.path.exists(checkpoint_file):
1485
+ logger.info(f"🔄 Loading SimpleAgent history from {checkpoint_file}")
1486
+ _global_simple_agent.load_history_from_llm_checkpoint(checkpoint_file)
1487
+
1488
+ return _global_simple_agent
1489
+
1490
+ def simple_mode_processing_multiprocess(vlm, game_state, args=None):
1491
+ """Simple mode processing function for multiprocess mode (backward compatibility)"""
1492
+ # args parameter kept for backward compatibility but not used
1493
+ _ = args # Acknowledge unused parameter
1494
+ agent = get_simple_agent(vlm)
1495
+ frame = game_state["visual"]["screenshot"]
1496
+
1497
+ # CRITICAL: Validate frame before processing
1498
+ if frame is None:
1499
+ logger.error("🚫 CRITICAL: simple_step called with None frame")
1500
+ return "WAIT"
1501
+
1502
+ return agent.process_step(frame, game_state)