synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,217 @@
1
+ import pytest
2
+ from synth_ai.environments.examples.red.environment import PokemonRedEnvironment
3
+ from synth_ai.environments.examples.red.taskset import INSTANCE as POKEMON_TASK
4
+ from synth_ai.environments.environment.tools import EnvToolCall
5
+
6
+
7
+ class TestPokemonRedIntegration:
8
+ """Integration tests for Pokemon Red environment with REAL ROM"""
9
+
10
+ @pytest.mark.asyncio
11
+ async def test_full_workflow_real(self):
12
+ """Test complete workflow from initialization to termination with REAL ROM"""
13
+ # Initialize environment with real ROM
14
+ env = PokemonRedEnvironment(POKEMON_TASK)
15
+
16
+ # Test initialization
17
+ obs = await env.initialize()
18
+ assert "position" in obs
19
+ assert "badges_earned" in obs
20
+ # Note: badges_earned might be 0 or could have some initial value from ROM
21
+ assert isinstance(obs["badges_earned"], int)
22
+
23
+ # Test series of actions
24
+ actions = [
25
+ EnvToolCall(tool="press_button", args={"button": "RIGHT", "frames": 1}),
26
+ EnvToolCall(tool="press_button", args={"button": "UP", "frames": 2}),
27
+ EnvToolCall(tool="press_button", args={"button": "A", "frames": 1}),
28
+ ]
29
+
30
+ for action in actions:
31
+ obs = await env.step(action)
32
+ assert "step_count" in obs
33
+ assert "total_reward" in obs
34
+ assert isinstance(obs["step_count"], int)
35
+ assert isinstance(obs["total_reward"], float)
36
+
37
+ # Test checkpointing
38
+ checkpoint_obs = await env.checkpoint()
39
+ assert "engine_snapshot_data" in checkpoint_obs
40
+
41
+ # Test termination
42
+ final_obs = await env.terminate()
43
+ assert final_obs["terminated"] is True
44
+
45
+ @pytest.mark.asyncio
46
+ async def test_button_sequence_real(self):
47
+ """Test sequence of different button presses with real ROM"""
48
+ env = PokemonRedEnvironment(POKEMON_TASK)
49
+ await env.initialize()
50
+
51
+ # Test all basic buttons
52
+ buttons = ["A", "B", "UP", "DOWN", "LEFT", "RIGHT", "START", "SELECT"]
53
+
54
+ for i, button in enumerate(buttons):
55
+ action = EnvToolCall(tool="press_button", args={"button": button, "frames": 1})
56
+ obs = await env.step(action)
57
+
58
+ assert obs["step_count"] == i + 1
59
+ assert "position" in obs
60
+ assert "badges_earned" in obs
61
+ assert "hp_status" in obs
62
+ assert "party_level" in obs
63
+
64
+ @pytest.mark.asyncio
65
+ async def test_multiple_frame_actions_real(self):
66
+ """Test actions with multiple frames using real ROM"""
67
+ env = PokemonRedEnvironment(POKEMON_TASK)
68
+ await env.initialize()
69
+
70
+ # Test holding buttons for multiple frames
71
+ action = EnvToolCall(tool="press_button", args={"button": "RIGHT", "frames": 5})
72
+ obs = await env.step(action)
73
+
74
+ assert obs["step_count"] == 1 # Should count as one step
75
+ assert "position" in obs
76
+
77
+ # Test another multi-frame action
78
+ action = EnvToolCall(tool="press_button", args={"button": "A", "frames": 3})
79
+ obs = await env.step(action)
80
+
81
+ assert obs["step_count"] == 2
82
+
83
+ @pytest.mark.asyncio
84
+ async def test_state_consistency_real(self):
85
+ """Test that game state remains consistent across steps with real ROM"""
86
+ env = PokemonRedEnvironment(POKEMON_TASK)
87
+
88
+ obs1 = await env.initialize()
89
+ initial_position = obs1["position"]
90
+ initial_badges = obs1["badges_earned"]
91
+ initial_hp = obs1["hp_status"]
92
+
93
+ # Take some actions
94
+ for _ in range(5):
95
+ action = EnvToolCall(tool="press_button", args={"button": "A", "frames": 1})
96
+ obs = await env.step(action)
97
+
98
+ # State should remain valid even if unchanged
99
+ assert "position" in obs
100
+ assert "badges_earned" in obs
101
+ assert "hp_status" in obs
102
+ assert isinstance(obs["badges_earned"], int)
103
+
104
+ @pytest.mark.asyncio
105
+ async def test_reward_accumulation_real(self):
106
+ """Test that rewards accumulate properly with real ROM"""
107
+ env = PokemonRedEnvironment(POKEMON_TASK)
108
+
109
+ obs = await env.initialize()
110
+ initial_reward = obs["total_reward"]
111
+
112
+ # Take several steps and track reward changes
113
+ for i in range(3):
114
+ action = EnvToolCall(tool="press_button", args={"button": "DOWN", "frames": 1})
115
+ obs = await env.step(action)
116
+
117
+ # Reward should change (likely negative step penalty)
118
+ assert obs["total_reward"] != initial_reward
119
+ assert isinstance(obs["total_reward"], float)
120
+ assert obs["step_count"] == i + 1
121
+
122
+ @pytest.mark.asyncio
123
+ async def test_checkpointing_real(self):
124
+ """Test checkpointing functionality with real ROM"""
125
+ env = PokemonRedEnvironment(POKEMON_TASK)
126
+
127
+ # Initialize and take some steps
128
+ await env.initialize()
129
+ action = EnvToolCall(tool="press_button", args={"button": "RIGHT", "frames": 1})
130
+ await env.step(action)
131
+
132
+ # Create checkpoint
133
+ checkpoint_obs = await env.checkpoint()
134
+
135
+ assert "engine_snapshot_data" in checkpoint_obs
136
+ snapshot = checkpoint_obs["engine_snapshot_data"]
137
+ assert "state_data" in snapshot
138
+ assert "total_reward" in snapshot
139
+ assert "step_count" in snapshot
140
+ assert isinstance(snapshot["total_reward"], float)
141
+ assert isinstance(snapshot["step_count"], int)
142
+
143
+ @pytest.mark.asyncio
144
+ async def test_invalid_button_handling_real(self):
145
+ """Test handling of invalid buttons with real ROM"""
146
+ env = PokemonRedEnvironment(POKEMON_TASK)
147
+ await env.initialize()
148
+
149
+ # Try invalid button
150
+ action = EnvToolCall(tool="press_button", args={"button": "INVALID", "frames": 1})
151
+
152
+ # Should handle gracefully and return valid observation
153
+ obs = await env.step(action)
154
+ assert "position" in obs
155
+ assert "step_count" in obs
156
+
157
+ @pytest.mark.asyncio
158
+ async def test_observation_format_real(self):
159
+ """Test that observations have expected format with real ROM"""
160
+ env = PokemonRedEnvironment(POKEMON_TASK)
161
+
162
+ obs = await env.initialize()
163
+
164
+ # Check required observation keys (based on actual observation format)
165
+ required_keys = [
166
+ "position",
167
+ "badges_earned",
168
+ "badges_bitfield",
169
+ "hp_status",
170
+ "party_level",
171
+ "party_xp",
172
+ "in_battle",
173
+ "step_count",
174
+ "reward_last_step",
175
+ "total_reward",
176
+ "terminated",
177
+ ]
178
+
179
+ for key in required_keys:
180
+ assert key in obs, f"Missing key: {key}"
181
+
182
+ # Check types
183
+ assert isinstance(obs["position"], str)
184
+ assert isinstance(obs["badges_earned"], int)
185
+ assert isinstance(obs["badges_bitfield"], int)
186
+ assert isinstance(obs["hp_status"], str)
187
+ assert isinstance(obs["party_level"], int)
188
+ assert isinstance(obs["party_xp"], int)
189
+ assert isinstance(obs["in_battle"], bool)
190
+ assert isinstance(obs["step_count"], int)
191
+ assert isinstance(obs["reward_last_step"], float)
192
+ assert isinstance(obs["total_reward"], float)
193
+ assert isinstance(obs["terminated"], bool)
194
+
195
+ @pytest.mark.asyncio
196
+ async def test_rom_memory_integration_real(self):
197
+ """Test that we can access and read ROM memory consistently"""
198
+ env = PokemonRedEnvironment(POKEMON_TASK)
199
+ await env.initialize()
200
+
201
+ # Should be able to access engine and emulator
202
+ assert env.engine is not None
203
+ assert env.engine.emulator is not None
204
+ assert hasattr(env.engine.emulator, "memory")
205
+
206
+ # Memory reads should be consistent
207
+ memory = env.engine.emulator.memory
208
+ badge_flags1 = memory[0xD356]
209
+ badge_flags2 = memory[0xD356]
210
+ assert badge_flags1 == badge_flags2 # Should be deterministic
211
+
212
+ # After taking an action, memory should still be accessible
213
+ action = EnvToolCall(tool="press_button", args={"button": "A", "frames": 1})
214
+ await env.step(action)
215
+
216
+ badge_flags3 = memory[0xD356]
217
+ assert isinstance(badge_flags3, int) # Should still be valid
@@ -0,0 +1,111 @@
1
+ from synth_ai.environments.examples.red.engine_helpers.state_extraction import (
2
+ extract_game_state,
3
+ get_badge_count,
4
+ format_position,
5
+ format_hp_status,
6
+ get_byte,
7
+ get_word,
8
+ get_3byte_int,
9
+ )
10
+ from synth_ai.environments.examples.red.engine_helpers.memory_map import *
11
+
12
+
13
+ class TestMemoryExtraction:
14
+ """Test memory extraction functions"""
15
+
16
+ def test_get_byte(self):
17
+ """Test single byte extraction"""
18
+ memory = bytearray([0x00, 0x42, 0xFF, 0x80])
19
+ assert get_byte(memory, 0) == 0x00
20
+ assert get_byte(memory, 1) == 0x42
21
+ assert get_byte(memory, 2) == 0xFF
22
+ assert get_byte(memory, 3) == 0x80
23
+
24
+ # Test bounds checking
25
+ assert get_byte(memory, 100) == 0
26
+
27
+ def test_get_word(self):
28
+ """Test 16-bit word extraction (little endian)"""
29
+ memory = bytearray([0x34, 0x12, 0xFF, 0x00])
30
+ assert get_word(memory, 0) == 0x1234
31
+ assert get_word(memory, 2) == 0x00FF
32
+
33
+ def test_get_3byte_int(self):
34
+ """Test 24-bit integer extraction for XP values"""
35
+ memory = bytearray([0x56, 0x34, 0x12, 0x00])
36
+ assert get_3byte_int(memory, 0) == 0x123456
37
+
38
+ def test_extract_game_state(self):
39
+ """Test full game state extraction"""
40
+ # Create mock Game Boy memory
41
+ memory = bytearray(0x10000) # 64KB
42
+
43
+ # Set test values at known addresses
44
+ memory[MAP_ID] = 0x03 # Pewter City
45
+ memory[PLAYER_X] = 10 # X position
46
+ memory[PLAYER_Y] = 8 # Y position
47
+ memory[BADGE_FLAGS] = 0x01 # Boulder Badge
48
+ memory[IN_BATTLE_FLAG] = 0 # Not in battle
49
+ memory[PARTY_COUNT] = 1 # One Pokemon in party
50
+ memory[PARTY_LEVELS] = 12 # Level 12
51
+ memory[PARTY_HP_CURRENT] = 35 # Current HP (low byte)
52
+ memory[PARTY_HP_CURRENT + 1] = 0 # Current HP (high byte)
53
+ memory[PARTY_HP_MAX] = 42 # Max HP (low byte)
54
+ memory[PARTY_HP_MAX + 1] = 0 # Max HP (high byte)
55
+ memory[PARTY_XP] = 0x40 # XP (low byte)
56
+ memory[PARTY_XP + 1] = 0x42 # XP (mid byte)
57
+ memory[PARTY_XP + 2] = 0x0F # XP (high byte)
58
+
59
+ state = extract_game_state(memory)
60
+
61
+ assert state["map_id"] == 0x03
62
+ assert state["player_x"] == 10
63
+ assert state["player_y"] == 8
64
+ assert state["badges"] == 0x01
65
+ assert state["in_battle"] == False
66
+ assert state["party_level"] == 12
67
+ assert state["party_hp_current"] == 35
68
+ assert state["party_hp_max"] == 42
69
+ assert state["party_xp"] == 0x0F4240 # 1000000 in decimal
70
+
71
+ def test_get_badge_count(self):
72
+ """Test badge counting from bitfield"""
73
+ assert get_badge_count(0x00) == 0 # No badges
74
+ assert get_badge_count(0x01) == 1 # Boulder Badge
75
+ assert get_badge_count(0x03) == 2 # Boulder + Cascade
76
+ assert get_badge_count(0xFF) == 8 # All badges
77
+ assert get_badge_count(0x55) == 4 # Every other badge
78
+
79
+ def test_format_position(self):
80
+ """Test position formatting"""
81
+ assert format_position(10, 8, 3) == "Map03:(10,8)"
82
+ assert format_position(0, 0, 255) == "MapFF:(0,0)"
83
+
84
+ def test_format_hp_status(self):
85
+ """Test HP status formatting"""
86
+ assert format_hp_status(35, 50) == "HP: 35/50 (70%)"
87
+ assert format_hp_status(0, 35) == "HP: 0/35 (0%)"
88
+ assert format_hp_status(35, 35) == "HP: 35/35 (100%)"
89
+ assert format_hp_status(10, 0) == "HP: Unknown"
90
+
91
+ def test_memory_addresses_valid(self):
92
+ """Test that all memory addresses are valid Game Boy addresses"""
93
+ addresses = [
94
+ BADGE_FLAGS,
95
+ MAP_ID,
96
+ PLAYER_X,
97
+ PLAYER_Y,
98
+ IN_BATTLE_FLAG,
99
+ BATTLE_OUTCOME,
100
+ PARTY_LEVELS,
101
+ PARTY_HP_CURRENT,
102
+ PARTY_HP_MAX,
103
+ PARTY_XP,
104
+ INVENTORY_COUNT,
105
+ INVENTORY_START,
106
+ MENU_STATE,
107
+ WARP_FLAG,
108
+ ]
109
+
110
+ for addr in addresses:
111
+ assert 0x8000 <= addr <= 0xFFFF, f"Address {hex(addr)} outside Game Boy RAM range"