synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,455 @@
1
+ import pytest
2
+ from unittest.mock import Mock, patch, AsyncMock
3
+
4
+ # Add imports for the new dataclasses
5
+ from synth_ai.environments.examples.red.engine import (
6
+ GameWorldState,
7
+ PlayerProgressState,
8
+ GameSystemState,
9
+ PokemonData,
10
+ )
11
+ from synth_ai.environments.examples.red.environment import (
12
+ PokemonRedEnvironment,
13
+ PokemonRedPublicState,
14
+ PokemonRedPrivateState,
15
+ PressButtonTool,
16
+ PokemonRedObservationCallable,
17
+ )
18
+ from synth_ai.environments.environment.tools import EnvToolCall, ToolResult
19
+ from synth_ai.environments.examples.red.taskset import INSTANCE as DEFAULT_TASK
20
+
21
+
22
+ class TestPokemonRedEnvironment:
23
+ """Test Pokemon Red environment wrapper"""
24
+
25
+ @pytest.fixture
26
+ def mock_engine(self):
27
+ """Create a mock engine"""
28
+ engine = Mock()
29
+ engine._reset_engine = AsyncMock(
30
+ return_value=(
31
+ PokemonRedPrivateState(
32
+ reward_last_step=0.0,
33
+ total_reward=0.0,
34
+ terminated=False,
35
+ truncated=False,
36
+ step_count=0,
37
+ ),
38
+ create_test_public_state(
39
+ map_id=3,
40
+ player_x=10,
41
+ player_y=8,
42
+ badges=0,
43
+ in_battle=False,
44
+ party_level=10,
45
+ party_hp_current=35,
46
+ party_hp_max=35,
47
+ party_xp=1000,
48
+ step_count=0,
49
+ ),
50
+ )
51
+ )
52
+ engine._step_engine = AsyncMock()
53
+ engine._serialize_engine = AsyncMock()
54
+ engine._create_states = Mock()
55
+ return engine
56
+
57
+ @patch("src.examples.red.environment.PokemonRedEngine")
58
+ def test_environment_initialization(self, mock_engine_class, mock_engine):
59
+ """Test environment initialization"""
60
+ mock_engine_class.return_value = mock_engine
61
+
62
+ env = PokemonRedEnvironment()
63
+
64
+ assert env.name == "PokemonRed"
65
+ assert env.task_instance == DEFAULT_TASK
66
+ assert env.engine == mock_engine
67
+ assert isinstance(env._press_button_tool, PressButtonTool)
68
+
69
+ @patch("src.examples.red.environment.PokemonRedEngine")
70
+ @pytest.mark.asyncio
71
+ async def test_initialize(self, mock_engine_class, mock_engine):
72
+ """Test environment initialization"""
73
+ mock_engine_class.return_value = mock_engine
74
+
75
+ env = PokemonRedEnvironment()
76
+ obs = await env.initialize()
77
+
78
+ mock_engine._reset_engine.assert_called_once()
79
+ assert "position" in obs
80
+ assert "badges_earned" in obs
81
+ assert obs["badges_earned"] == 0
82
+ assert obs["party_level"] == 10
83
+
84
+ @patch("src.examples.red.environment.PokemonRedEngine")
85
+ @pytest.mark.asyncio
86
+ async def test_terminate(self, mock_engine_class, mock_engine):
87
+ """Test environment termination"""
88
+ mock_engine_class.return_value = mock_engine
89
+ mock_engine._create_states.return_value = (
90
+ PokemonRedPrivateState(
91
+ reward_last_step=0.0,
92
+ total_reward=10.5,
93
+ terminated=True,
94
+ truncated=False,
95
+ step_count=42,
96
+ ),
97
+ create_test_public_state(
98
+ map_id=3,
99
+ player_x=10,
100
+ player_y=8,
101
+ badges=1,
102
+ in_battle=False,
103
+ party_level=12,
104
+ party_hp_current=30,
105
+ party_hp_max=35,
106
+ party_xp=1500,
107
+ step_count=42,
108
+ ),
109
+ )
110
+
111
+ env = PokemonRedEnvironment()
112
+ obs = await env.terminate()
113
+
114
+ assert obs["terminated"] is True
115
+ assert "message" in obs
116
+
117
+ def test_validate_tool_calls_single_call(self):
118
+ """Test tool call validation with single call"""
119
+ with patch("src.examples.red.environment.PokemonRedEngine"):
120
+ env = PokemonRedEnvironment()
121
+
122
+ call = EnvToolCall(tool="press_button", args={"button": "A"})
123
+ validated = env.validate_tool_calls(call)
124
+
125
+ assert validated == call
126
+
127
+ def test_validate_tool_calls_list(self):
128
+ """Test tool call validation with list"""
129
+ with patch("src.examples.red.environment.PokemonRedEngine"):
130
+ env = PokemonRedEnvironment()
131
+
132
+ call = EnvToolCall(tool="press_button", args={"button": "A"})
133
+ validated = env.validate_tool_calls([call])
134
+
135
+ assert validated == call
136
+
137
+ def test_validate_tool_calls_nested_list(self):
138
+ """Test tool call validation with nested list"""
139
+ with patch("src.examples.red.environment.PokemonRedEngine"):
140
+ env = PokemonRedEnvironment()
141
+
142
+ call = EnvToolCall(tool="press_button", args={"button": "A"})
143
+ validated = env.validate_tool_calls([[call]])
144
+
145
+ assert validated == call
146
+
147
+ def test_validate_tool_calls_invalid_tool(self):
148
+ """Test tool call validation with invalid tool"""
149
+ with patch("src.examples.red.environment.PokemonRedEngine"):
150
+ env = PokemonRedEnvironment()
151
+
152
+ call = EnvToolCall(tool="invalid_tool", args={})
153
+ with pytest.raises(ValueError, match="Unknown tool: invalid_tool"):
154
+ env.validate_tool_calls(call)
155
+
156
+ def test_validate_tool_calls_empty_list(self):
157
+ """Test tool call validation with empty list"""
158
+ with patch("src.examples.red.environment.PokemonRedEngine"):
159
+ env = PokemonRedEnvironment()
160
+
161
+ with pytest.raises(ValueError, match="empty list"):
162
+ env.validate_tool_calls([])
163
+
164
+ def test_validate_tool_calls_wrong_type(self):
165
+ """Test tool call validation with wrong type"""
166
+ with patch("src.examples.red.environment.PokemonRedEngine"):
167
+ env = PokemonRedEnvironment()
168
+
169
+ with pytest.raises(TypeError):
170
+ env.validate_tool_calls("not_a_call")
171
+
172
+ @patch("src.examples.red.environment.PokemonRedEngine")
173
+ @pytest.mark.asyncio
174
+ async def test_step_successful(self, mock_engine_class, mock_engine):
175
+ """Test successful step execution"""
176
+ mock_engine_class.return_value = mock_engine
177
+
178
+ # Mock successful tool execution
179
+ tool_result = ToolResult(
180
+ ok=True,
181
+ payload={
182
+ "private": PokemonRedPrivateState(
183
+ reward_last_step=0.1,
184
+ total_reward=0.1,
185
+ terminated=False,
186
+ truncated=False,
187
+ step_count=1,
188
+ ),
189
+ "public": create_test_public_state(
190
+ map_id=3,
191
+ player_x=11,
192
+ player_y=8,
193
+ badges=0,
194
+ in_battle=False,
195
+ party_level=10,
196
+ party_hp_current=35,
197
+ party_hp_max=35,
198
+ party_xp=1000,
199
+ step_count=1,
200
+ ),
201
+ },
202
+ )
203
+
204
+ env = PokemonRedEnvironment()
205
+ env._press_button_tool = AsyncMock(return_value=tool_result)
206
+
207
+ call = EnvToolCall(tool="press_button", args={"button": "RIGHT"})
208
+ obs = await env.step(call)
209
+
210
+ assert obs["position"] == "Map03:(11,8)"
211
+ assert obs["step_count"] == 1
212
+ assert obs["total_reward"] == 0.1
213
+
214
+ @patch("src.examples.red.environment.PokemonRedEngine")
215
+ @pytest.mark.asyncio
216
+ async def test_step_failed_tool(self, mock_engine_class, mock_engine):
217
+ """Test step with failed tool execution"""
218
+ mock_engine_class.return_value = mock_engine
219
+ mock_engine._create_states.return_value = (
220
+ PokemonRedPrivateState(
221
+ reward_last_step=0.0,
222
+ total_reward=0.0,
223
+ terminated=False,
224
+ truncated=False,
225
+ step_count=0,
226
+ ),
227
+ create_test_public_state(
228
+ map_id=3,
229
+ player_x=10,
230
+ player_y=8,
231
+ badges=0,
232
+ in_battle=False,
233
+ party_level=10,
234
+ party_hp_current=35,
235
+ party_hp_max=35,
236
+ party_xp=1000,
237
+ step_count=0,
238
+ error_info="Button press failed",
239
+ ),
240
+ )
241
+
242
+ # Mock failed tool execution
243
+ tool_result = ToolResult(ok=False, error="Invalid button", payload={"public": {}})
244
+
245
+ env = PokemonRedEnvironment()
246
+ env._press_button_tool = AsyncMock(return_value=tool_result)
247
+
248
+ call = EnvToolCall(tool="press_button", args={"button": "INVALID"})
249
+ obs = await env.step(call)
250
+
251
+ # Should still return valid observation
252
+ assert "position" in obs
253
+
254
+ @patch("src.examples.red.environment.PokemonRedEngine")
255
+ @pytest.mark.asyncio
256
+ async def test_checkpoint(self, mock_engine_class, mock_engine):
257
+ """Test environment checkpointing"""
258
+ mock_engine_class.return_value = mock_engine
259
+ mock_engine._serialize_engine.return_value = Mock(model_dump=lambda: {"test": "data"})
260
+ mock_engine._create_states.return_value = (
261
+ PokemonRedPrivateState(
262
+ reward_last_step=0.0,
263
+ total_reward=5.0,
264
+ terminated=False,
265
+ truncated=False,
266
+ step_count=20,
267
+ ),
268
+ create_test_public_state(
269
+ map_id=4,
270
+ player_x=15,
271
+ player_y=12,
272
+ badges=1,
273
+ in_battle=False,
274
+ party_level=11,
275
+ party_hp_current=40,
276
+ party_hp_max=40,
277
+ party_xp=1200,
278
+ step_count=20,
279
+ ),
280
+ )
281
+
282
+ env = PokemonRedEnvironment()
283
+ obs = await env.checkpoint()
284
+
285
+ assert "engine_snapshot_data" in obs
286
+ assert obs["step_count"] == 20
287
+ assert obs["total_reward"] == 5.0
288
+
289
+ @pytest.mark.asyncio
290
+ async def test_observation_callable(self):
291
+ """Test observation callable functionality"""
292
+ obs_callable = PokemonRedObservationCallable()
293
+
294
+ priv_state = PokemonRedPrivateState(
295
+ reward_last_step=0.1,
296
+ total_reward=2.5,
297
+ terminated=False,
298
+ truncated=False,
299
+ step_count=25,
300
+ )
301
+
302
+ pub_state = create_test_public_state(
303
+ map_id=5,
304
+ player_x=20,
305
+ player_y=15,
306
+ badges=3, # 2 badges set
307
+ in_battle=True,
308
+ party_level=15,
309
+ party_hp_current=25,
310
+ party_hp_max=50,
311
+ party_xp=5000,
312
+ step_count=25,
313
+ error_info="Test error",
314
+ )
315
+
316
+ obs = await obs_callable.get_observation(pub_state, priv_state)
317
+
318
+ assert obs["position"] == "Map05:(20,15)"
319
+ assert obs["badges_earned"] == 2 # bin(3).count('1')
320
+ assert obs["badges_bitfield"] == 3
321
+ assert obs["hp_status"] == "HP: 25/50 (50%)"
322
+ assert obs["party_level"] == 15
323
+ assert obs["in_battle"] is True
324
+ assert obs["step_count"] == 25
325
+ assert obs["total_reward"] == 2.5
326
+ assert obs["error"] == "Test error"
327
+
328
+
329
+ class TestPressButtonTool:
330
+ """Test the press button tool"""
331
+
332
+ @pytest.fixture
333
+ def mock_engine(self):
334
+ """Create a mock engine for tool testing"""
335
+ engine = Mock()
336
+ engine._step_engine = AsyncMock(
337
+ return_value=(
338
+ PokemonRedPrivateState(
339
+ reward_last_step=0.0,
340
+ total_reward=0.0,
341
+ terminated=False,
342
+ truncated=False,
343
+ step_count=1,
344
+ ),
345
+ create_test_public_state(
346
+ map_id=3,
347
+ player_x=10,
348
+ player_y=8,
349
+ badges=0,
350
+ in_battle=False,
351
+ party_level=10,
352
+ party_hp_current=35,
353
+ party_hp_max=35,
354
+ party_xp=1000,
355
+ step_count=1,
356
+ ),
357
+ )
358
+ )
359
+ return engine
360
+
361
+ @pytest.mark.asyncio
362
+ async def test_press_button_tool_success(self, mock_engine):
363
+ """Test successful button press tool execution"""
364
+ tool = PressButtonTool(mock_engine)
365
+
366
+ call = EnvToolCall(tool="press_button", args={"button": "A", "frames": 2})
367
+ result = await tool(call)
368
+
369
+ assert result.ok is True
370
+ assert "public" in result.payload
371
+ assert "private" in result.payload
372
+ mock_engine._step_engine.assert_called_once_with({"button": "A", "frames": 2})
373
+
374
+ @pytest.mark.asyncio
375
+ async def test_press_button_tool_invalid_args(self, mock_engine):
376
+ """Test button press tool with invalid arguments"""
377
+ tool = PressButtonTool(mock_engine)
378
+ mock_engine._create_states.return_value = (Mock(), Mock())
379
+
380
+ # Missing required button argument
381
+ call = EnvToolCall(tool="press_button", args={"frames": 1})
382
+ result = await tool(call)
383
+
384
+ assert result.ok is False
385
+ assert result.error is not None
386
+
387
+ @pytest.mark.asyncio
388
+ async def test_press_button_tool_engine_error(self, mock_engine):
389
+ """Test button press tool when engine raises error"""
390
+ tool = PressButtonTool(mock_engine)
391
+ mock_engine._step_engine.side_effect = Exception("Engine error")
392
+ mock_engine._create_states.return_value = (Mock(), Mock())
393
+
394
+ call = EnvToolCall(tool="press_button", args={"button": "A"})
395
+ result = await tool(call)
396
+
397
+ assert result.ok is False
398
+ assert "Engine error" in result.error
399
+
400
+
401
+ # Helper function to create properly structured PokemonRedPublicState
402
+ def create_test_public_state(
403
+ map_id: int = 3,
404
+ player_x: int = 10,
405
+ player_y: int = 8,
406
+ badges: int = 0,
407
+ in_battle: bool = False,
408
+ party_level: int = 10,
409
+ party_hp_current: int = 35,
410
+ party_hp_max: int = 35,
411
+ party_xp: int = 1000,
412
+ step_count: int = 0,
413
+ error_info: str = None,
414
+ ) -> PokemonRedPublicState:
415
+ """Create a properly structured PokemonRedPublicState for testing"""
416
+
417
+ # Create structured components
418
+ world = GameWorldState(map_id=map_id, player_x=player_x, player_y=player_y)
419
+
420
+ progress = PlayerProgressState(
421
+ badges=badges,
422
+ badge_count=badges, # badge_count should match badges
423
+ money=3000,
424
+ step_count=step_count,
425
+ )
426
+
427
+ system = GameSystemState(
428
+ in_battle=in_battle,
429
+ battle_outcome=0,
430
+ menu_state=1,
431
+ text_box_active=False,
432
+ warp_flag=207,
433
+ )
434
+
435
+ # Create party if stats are provided
436
+ party = []
437
+ if party_level > 0:
438
+ pokemon = PokemonData(
439
+ species_id=25, # Pikachu
440
+ level=party_level,
441
+ hp_current=party_hp_current,
442
+ hp_max=party_hp_max,
443
+ xp=party_xp,
444
+ hp_percentage=party_hp_current / party_hp_max * 100.0 if party_hp_max > 0 else 0.0,
445
+ )
446
+ party.append(pokemon)
447
+
448
+ return PokemonRedPublicState(
449
+ world=world,
450
+ progress=progress,
451
+ party=party,
452
+ inventory=[],
453
+ system=system,
454
+ error_info=error_info,
455
+ )
@@ -0,0 +1,227 @@
1
+ import pytest
2
+ import asyncio
3
+ import uuid
4
+
5
+ from synth_ai.environments.examples.red.environment import (
6
+ PokemonRedEnvironment,
7
+ PokemonRedPublicState,
8
+ PokemonRedPrivateState,
9
+ )
10
+ from synth_ai.environments.environment.shared_engine import (
11
+ GetObservationCallable,
12
+ InternalObservation,
13
+ )
14
+ from synth_ai.environments.examples.red.taskset import PokemonRedTaskInstance
15
+ from synth_ai.environments.tasks.core import Impetus, Intent, TaskInstanceMetadata
16
+ from synth_ai.environments.environment.tools import EnvToolCall
17
+
18
+
19
+ class PressButtonCall(EnvToolCall):
20
+ """Helper class for creating button press calls"""
21
+
22
+ def __init__(self, button: str, frames: int = 1):
23
+ super().__init__(tool="press_button", args={"button": button, "frames": frames})
24
+
25
+
26
+ class ExplorationObservationCallable(GetObservationCallable):
27
+ """Observation callable for exploration testing"""
28
+
29
+ def __init__(self):
30
+ self.screen_buffer = None
31
+
32
+ async def get_observation(
33
+ self, pub: PokemonRedPublicState, priv: PokemonRedPrivateState
34
+ ) -> InternalObservation:
35
+ if pub is None or priv is None:
36
+ raise RuntimeError("Missing public or private state in get_observation")
37
+
38
+ formatted_obs = (
39
+ f"Step: {pub.step_count}, Position: ({pub.player_x}, {pub.player_y}), Map: {pub.map_id}"
40
+ )
41
+
42
+ return {
43
+ "public": pub,
44
+ "private": priv,
45
+ "formatted_obs": formatted_obs,
46
+ "screen_buffer": self.screen_buffer,
47
+ }
48
+
49
+
50
+ async def test_exploration_when_stuck():
51
+ """
52
+ Test what happens when we try different buttons in the initial game state.
53
+ """
54
+ print("\n" + "=" * 80)
55
+ print("EXPLORATION STRATEGY TEST - FINDING AVAILABLE ACTIONS")
56
+ print("=" * 80)
57
+
58
+ # Create a task instance
59
+ task_metadata = TaskInstanceMetadata()
60
+ inst = PokemonRedTaskInstance(
61
+ id=uuid.uuid4(),
62
+ impetus=Impetus(instructions="Explore available actions in initial state."),
63
+ intent=Intent(
64
+ rubric={"goal": "Find working actions"},
65
+ gold_trajectories=None,
66
+ gold_state_diff={},
67
+ ),
68
+ metadata=task_metadata,
69
+ is_reproducible=True,
70
+ initial_engine_snapshot=None,
71
+ )
72
+
73
+ exploration_obs = ExplorationObservationCallable()
74
+ env = PokemonRedEnvironment(inst, custom_step_obs=exploration_obs)
75
+
76
+ try:
77
+ # Initialize environment
78
+ print("\n[DEBUG] Initializing environment...")
79
+ obs_payload = await env.initialize()
80
+
81
+ if "error" in obs_payload:
82
+ pytest.fail(f"Environment initialization failed: {obs_payload['error']}")
83
+
84
+ print("[DEBUG] Environment initialized successfully")
85
+
86
+ # Get initial state
87
+ initial_pub = obs_payload["public"]
88
+ initial_position = (initial_pub.player_x, initial_pub.player_y)
89
+ initial_map_id = initial_pub.map_id
90
+
91
+ print(f"[DEBUG] Initial position: {initial_position}")
92
+ print(f"[DEBUG] Initial map ID: {initial_map_id}")
93
+
94
+ # Test all available buttons systematically
95
+ buttons_to_test = ["A", "B", "UP", "DOWN", "LEFT", "RIGHT", "START", "SELECT"]
96
+
97
+ results = {}
98
+
99
+ for button in buttons_to_test:
100
+ print(f"\n--- Testing {button} button ---")
101
+
102
+ # Get state before button press
103
+ before_pub = obs_payload["public"]
104
+ before_position = (before_pub.player_x, before_pub.player_y)
105
+ before_map = before_pub.map_id
106
+
107
+ print(f"Before {button}: pos={before_position}, map={before_map}")
108
+
109
+ # Press the button
110
+ step_result = await env.step([[PressButtonCall(button)]])
111
+
112
+ if "error" in step_result:
113
+ print(f"[ERROR] {button} button failed: {step_result['error']}")
114
+ results[button] = {"error": step_result["error"]}
115
+ continue
116
+
117
+ # Check state after button press
118
+ after_pub = step_result["public"]
119
+ after_position = (after_pub.player_x, after_pub.player_y)
120
+ after_map = after_pub.map_id
121
+
122
+ print(f"After {button}: pos={after_position}, map={after_map}")
123
+
124
+ # Analyze what changed
125
+ position_changed = after_position != before_position
126
+ map_changed = after_map != before_map
127
+
128
+ # Check if any other state changed
129
+ state_changes = []
130
+ if position_changed:
131
+ state_changes.append(f"position: {before_position} -> {after_position}")
132
+ if map_changed:
133
+ state_changes.append(f"map: {before_map} -> {after_map}")
134
+
135
+ # Check other state attributes
136
+ if hasattr(before_pub, "party_level") and hasattr(after_pub, "party_level"):
137
+ if before_pub.party_level != after_pub.party_level:
138
+ state_changes.append(
139
+ f"party_level: {before_pub.party_level} -> {after_pub.party_level}"
140
+ )
141
+
142
+ if hasattr(before_pub, "badges") and hasattr(after_pub, "badges"):
143
+ if before_pub.badges != after_pub.badges:
144
+ state_changes.append(f"badges: {before_pub.badges} -> {after_pub.badges}")
145
+
146
+ results[button] = {
147
+ "position_changed": position_changed,
148
+ "map_changed": map_changed,
149
+ "state_changes": state_changes,
150
+ "effective": len(state_changes) > 0,
151
+ }
152
+
153
+ if state_changes:
154
+ print(f"[SUCCESS] {button} caused changes: {', '.join(state_changes)}")
155
+ else:
156
+ print(f"[NO EFFECT] {button} had no visible effect")
157
+
158
+ # Update obs_payload for next test
159
+ obs_payload = step_result
160
+
161
+ # Analysis and recommendations
162
+ print("\n" + "=" * 80)
163
+ print("EXPLORATION RESULTS AND RECOMMENDATIONS")
164
+ print("=" * 80)
165
+
166
+ effective_buttons = [
167
+ btn
168
+ for btn, result in results.items()
169
+ if isinstance(result, dict) and result.get("effective", False)
170
+ ]
171
+
172
+ ineffective_buttons = [
173
+ btn
174
+ for btn, result in results.items()
175
+ if isinstance(result, dict) and not result.get("effective", False)
176
+ ]
177
+
178
+ error_buttons = [
179
+ btn for btn, result in results.items() if isinstance(result, dict) and "error" in result
180
+ ]
181
+
182
+ print(f"\n✅ EFFECTIVE BUTTONS ({len(effective_buttons)}): {', '.join(effective_buttons)}")
183
+ for btn in effective_buttons:
184
+ changes = results[btn]["state_changes"]
185
+ print(f" {btn}: {', '.join(changes)}")
186
+
187
+ print(
188
+ f"\n❌ INEFFECTIVE BUTTONS ({len(ineffective_buttons)}): {', '.join(ineffective_buttons)}"
189
+ )
190
+
191
+ if error_buttons:
192
+ print(f"\n🚫 ERROR BUTTONS ({len(error_buttons)}): {', '.join(error_buttons)}")
193
+
194
+ # Recommendations
195
+ print("\n💡 RECOMMENDATIONS:")
196
+ if effective_buttons:
197
+ print(f" - Agent should prioritize: {', '.join(effective_buttons[:3])}")
198
+ print(" - These buttons cause state changes and may lead to progress")
199
+ else:
200
+ print(" - No buttons caused state changes in this initial position")
201
+ print(" - May need to investigate game state or save file")
202
+
203
+ if "LEFT" in effective_buttons or "RIGHT" in effective_buttons:
204
+ print(" - Movement is working - agent should explore the area")
205
+
206
+ if "A" not in effective_buttons:
207
+ print(" - 'A' button ineffective at this position - agent needs to move first")
208
+
209
+ return results
210
+
211
+ except Exception as e:
212
+ print(f"[ERROR] Test failed with exception: {e}")
213
+ raise
214
+
215
+
216
+ @pytest.mark.asyncio
217
+ async def test_exploration_strategy():
218
+ """Main test function"""
219
+ results = await test_exploration_when_stuck()
220
+
221
+ # The test always passes but provides diagnostic information
222
+ assert True, "Exploration strategy test completed - see output for recommendations"
223
+
224
+
225
+ if __name__ == "__main__":
226
+ # Run the test directly
227
+ asyncio.run(test_exploration_strategy())