synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,217 @@
1
+ import pytest
2
+ import asyncio
3
+ import uuid
4
+
5
+ from synth_ai.environments.examples.red.environment import (
6
+ PokemonRedEnvironment,
7
+ PokemonRedPublicState,
8
+ PokemonRedPrivateState,
9
+ )
10
+ from synth_ai.environments.environment.shared_engine import (
11
+ GetObservationCallable,
12
+ InternalObservation,
13
+ )
14
+ from synth_ai.environments.examples.red.taskset import PokemonRedTaskInstance
15
+ from synth_ai.environments.tasks.core import Impetus, Intent, TaskInstanceMetadata
16
+ from synth_ai.environments.environment.tools import EnvToolCall
17
+
18
+
19
+ class PressButtonCall(EnvToolCall):
20
+ """Helper class for creating button press calls"""
21
+
22
+ def __init__(self, button: str, frames: int = 1):
23
+ super().__init__(tool="press_button", args={"button": button, "frames": frames})
24
+
25
+
26
+ class ButtonTestObservationCallable(GetObservationCallable):
27
+ """Observation callable for systematic button testing"""
28
+
29
+ def __init__(self):
30
+ self.screen_buffer = None
31
+
32
+ async def get_observation(
33
+ self, pub: PokemonRedPublicState, priv: PokemonRedPrivateState
34
+ ) -> InternalObservation:
35
+ if pub is None or priv is None:
36
+ raise RuntimeError("Missing public or private state in get_observation")
37
+
38
+ # Extract screen buffer
39
+ try:
40
+ import inspect
41
+
42
+ frame = inspect.currentframe()
43
+ env = None
44
+
45
+ while frame:
46
+ if "self" in frame.f_locals and hasattr(frame.f_locals["self"], "engine"):
47
+ env = frame.f_locals["self"]
48
+ break
49
+ frame = frame.f_back
50
+
51
+ if env and hasattr(env, "engine") and env.engine:
52
+ if hasattr(env.engine, "emulator") and env.engine.emulator:
53
+ if hasattr(env.engine.emulator, "screen"):
54
+ screen_buffer = env.engine.emulator.screen.ndarray.copy()
55
+ self.screen_buffer = screen_buffer
56
+ except Exception as e:
57
+ print(f"[DEBUG] Failed to extract screen buffer: {e}")
58
+
59
+ formatted_obs = (
60
+ f"Step: {pub.step_count}, Position: ({pub.player_x}, {pub.player_y}), Map: {pub.map_id}"
61
+ )
62
+
63
+ return {
64
+ "public": pub,
65
+ "private": priv,
66
+ "formatted_obs": formatted_obs,
67
+ "screen_buffer": self.screen_buffer,
68
+ }
69
+
70
+
71
+ async def test_single_vs_multiple_presses():
72
+ """
73
+ Test how many button presses are needed for reliable movement in different directions.
74
+ """
75
+ print("\n" + "=" * 80)
76
+ print("SYSTEMATIC BUTTON PRESS REQUIREMENT ANALYSIS")
77
+ print("=" * 80)
78
+
79
+ # Test different buttons and press counts
80
+ test_scenarios = [
81
+ ("LEFT", "movement"),
82
+ ("RIGHT", "movement"),
83
+ ("UP", "movement"),
84
+ ("DOWN", "movement"),
85
+ ("A", "interaction"),
86
+ ("B", "cancel/back"),
87
+ ]
88
+
89
+ results = {}
90
+
91
+ for button, action_type in test_scenarios:
92
+ print(f"\n{'=' * 60}")
93
+ print(f"TESTING {button} BUTTON ({action_type})")
94
+ print(f"{'=' * 60}")
95
+
96
+ # Test with different numbers of presses (1-5)
97
+ button_results = {}
98
+
99
+ for press_count in range(1, 6):
100
+ print(f"\nTesting {press_count} press(es) of {button}...")
101
+
102
+ # Create fresh environment for each test
103
+ task_metadata = TaskInstanceMetadata()
104
+ inst = PokemonRedTaskInstance(
105
+ id=uuid.uuid4(),
106
+ impetus=Impetus(instructions=f"Test {button} button with {press_count} presses."),
107
+ intent=Intent(
108
+ rubric={"goal": f"Test {button}"},
109
+ gold_trajectories=None,
110
+ gold_state_diff={},
111
+ ),
112
+ metadata=task_metadata,
113
+ is_reproducible=True,
114
+ initial_engine_snapshot=None,
115
+ )
116
+
117
+ test_obs = ButtonTestObservationCallable()
118
+ env = PokemonRedEnvironment(inst, custom_step_obs=test_obs)
119
+
120
+ try:
121
+ # Initialize
122
+ obs_payload = await env.initialize()
123
+ if "error" in obs_payload:
124
+ print(f"[ERROR] Init failed: {obs_payload['error']}")
125
+ continue
126
+
127
+ initial_pub = obs_payload["public"]
128
+ initial_position = (initial_pub.player_x, initial_pub.player_y)
129
+ initial_map_id = initial_pub.map_id
130
+
131
+ print(f" Initial state: pos={initial_position}, map={initial_map_id}")
132
+
133
+ # Press button the specified number of times
134
+ final_position = initial_position
135
+ final_map_id = initial_map_id
136
+
137
+ for press_num in range(press_count):
138
+ step_result = await env.step([[PressButtonCall(button)]])
139
+ if "error" in step_result:
140
+ print(f" [ERROR] Step {press_num + 1} failed: {step_result['error']}")
141
+ break
142
+
143
+ new_pub = step_result["public"]
144
+ final_position = (new_pub.player_x, new_pub.player_y)
145
+ final_map_id = new_pub.map_id
146
+
147
+ # Analyze results
148
+ position_changed = final_position != initial_position
149
+ map_changed = final_map_id != initial_map_id
150
+
151
+ result = {
152
+ "initial_position": initial_position,
153
+ "final_position": final_position,
154
+ "initial_map": initial_map_id,
155
+ "final_map": final_map_id,
156
+ "position_changed": position_changed,
157
+ "map_changed": map_changed,
158
+ "effective": position_changed or map_changed,
159
+ }
160
+
161
+ button_results[press_count] = result
162
+
163
+ print(f" Result: pos={final_position}, map={final_map_id}")
164
+ print(f" Effect: {'YES' if result['effective'] else 'NO'}")
165
+
166
+ except Exception as e:
167
+ print(f" [ERROR] Test failed: {e}")
168
+ button_results[press_count] = {"error": str(e)}
169
+
170
+ results[button] = button_results
171
+
172
+ # Analysis and recommendations
173
+ print("\n" + "=" * 80)
174
+ print("ANALYSIS AND RECOMMENDATIONS")
175
+ print("=" * 80)
176
+
177
+ for button, button_results in results.items():
178
+ print(f"\n{button} BUTTON:")
179
+
180
+ # Find minimum presses for reliable effect
181
+ min_effective_presses = None
182
+ for press_count in range(1, 6):
183
+ if press_count in button_results:
184
+ result = button_results[press_count]
185
+ if not isinstance(result, dict) or "error" in result:
186
+ continue
187
+ if result.get("effective", False):
188
+ min_effective_presses = press_count
189
+ break
190
+
191
+ if min_effective_presses:
192
+ print(f" ✓ Minimum effective presses: {min_effective_presses}")
193
+ print(f" ✓ Recommendation: Use {min_effective_presses} presses for {button}")
194
+ else:
195
+ print(" ✗ No effective movement detected with up to 5 presses")
196
+
197
+ # Show detailed results
198
+ for press_count, result in button_results.items():
199
+ if isinstance(result, dict) and "error" not in result:
200
+ effect_str = "EFFECTIVE" if result.get("effective") else "no effect"
201
+ print(f" {press_count} press(es): {effect_str}")
202
+
203
+ return results
204
+
205
+
206
+ @pytest.mark.asyncio
207
+ async def test_button_press_requirements():
208
+ """Main test function"""
209
+ results = await test_single_vs_multiple_presses()
210
+
211
+ # The test always passes but provides diagnostic information
212
+ assert True, "Button press requirements test completed - see output for recommendations"
213
+
214
+
215
+ if __name__ == "__main__":
216
+ # Run the test directly
217
+ asyncio.run(test_button_press_requirements())
@@ -0,0 +1,192 @@
1
+ import pytest
2
+ from synth_ai.environments.examples.red.engine import (
3
+ PokemonRedEngine,
4
+ BUTTON_MAP,
5
+ PokemonRedEngineSnapshot,
6
+ )
7
+ from synth_ai.environments.examples.red.taskset import (
8
+ INSTANCE as DEFAULT_TASK,
9
+ )
10
+
11
+
12
+ class TestPokemonRedEngine:
13
+ """Test Pokemon Red engine functionality with REAL ROM"""
14
+
15
+ @pytest.fixture
16
+ def task_instance(self):
17
+ """Create a task instance"""
18
+ return DEFAULT_TASK
19
+
20
+ def test_button_map_completeness(self):
21
+ """Test that all expected buttons are mapped"""
22
+ expected_buttons = ["A", "B", "UP", "DOWN", "LEFT", "RIGHT", "START", "SELECT"]
23
+ assert all(button in BUTTON_MAP for button in expected_buttons)
24
+
25
+ # Each button should map to a string (PyBoy event name)
26
+ for button, mapped in BUTTON_MAP.items():
27
+ assert isinstance(mapped, str)
28
+ assert mapped
29
+
30
+ def test_engine_initialization(self, task_instance):
31
+ """Test engine initialization with REAL ROM"""
32
+ engine = PokemonRedEngine(task_instance)
33
+
34
+ assert engine.task_instance == task_instance
35
+ assert engine._total_reward == 0.0
36
+ assert engine._step_count == 0
37
+ assert engine._previous_state is None
38
+ assert engine.emulator is not None # Should have real PyBoy instance
39
+
40
+ def test_rom_path_resolution(self, task_instance):
41
+ """Test ROM path resolution logic"""
42
+ engine = PokemonRedEngine(task_instance)
43
+ rom_path = engine._get_rom_path()
44
+
45
+ # Should find the actual ROM file
46
+ assert rom_path.exists()
47
+ assert rom_path.name == "pokemon_red.gb"
48
+
49
+ @pytest.mark.asyncio
50
+ async def test_press_button_real(self, task_instance):
51
+ """Test button press functionality with real ROM"""
52
+ engine = PokemonRedEngine(task_instance)
53
+
54
+ # Test valid button press - should not raise exception
55
+ engine._press_button("A", 1)
56
+
57
+ # Test multiple frames
58
+ engine._press_button("RIGHT", 3)
59
+
60
+ @pytest.mark.asyncio
61
+ async def test_press_button_invalid(self, task_instance):
62
+ """Test invalid button press"""
63
+ engine = PokemonRedEngine(task_instance)
64
+
65
+ with pytest.raises(ValueError, match="Invalid button: INVALID"):
66
+ engine._press_button("INVALID")
67
+
68
+ @pytest.mark.asyncio
69
+ async def test_extract_current_state_real(self, task_instance):
70
+ """Test state extraction from real emulator"""
71
+ engine = PokemonRedEngine(task_instance)
72
+ state = engine._extract_current_state()
73
+
74
+ # Should return a dictionary with expected keys (from actual state extraction)
75
+ expected_keys = [
76
+ "map_id",
77
+ "player_x",
78
+ "player_y",
79
+ "badges",
80
+ "party_hp_current",
81
+ "party_hp_max",
82
+ "party_level",
83
+ "party_xp",
84
+ "in_battle",
85
+ "battle_outcome",
86
+ "inventory_count",
87
+ "menu_state",
88
+ "warp_flag",
89
+ ]
90
+ for key in expected_keys:
91
+ assert key in state
92
+
93
+ # Values should be correct types
94
+ assert isinstance(state["map_id"], int)
95
+ assert isinstance(state["player_x"], int)
96
+ assert isinstance(state["player_y"], int)
97
+ assert isinstance(state["badges"], int)
98
+ assert isinstance(state["in_battle"], bool)
99
+
100
+ @pytest.mark.asyncio
101
+ async def test_reset_engine_real(self, task_instance):
102
+ """Test engine reset with real ROM"""
103
+ engine = PokemonRedEngine(task_instance)
104
+
105
+ priv, pub = await engine._reset_engine()
106
+
107
+ assert engine._total_reward == 0.0
108
+ assert engine._step_count == 0
109
+ assert priv.reward_last_step == 0.0
110
+ assert priv.total_reward == 0.0
111
+ assert not priv.terminated
112
+
113
+ # Public state should have real values
114
+ assert isinstance(pub.map_id, int)
115
+ assert isinstance(pub.player_x, int)
116
+ assert isinstance(pub.player_y, int)
117
+
118
+ @pytest.mark.asyncio
119
+ async def test_step_engine_real(self, task_instance):
120
+ """Test engine step execution with real ROM"""
121
+ engine = PokemonRedEngine(task_instance)
122
+ await engine._reset_engine()
123
+
124
+ action = {"button": "A", "frames": 1}
125
+ priv, pub = await engine._step_engine(action)
126
+
127
+ assert engine._step_count == 1
128
+ assert priv.step_count == 1
129
+ assert isinstance(priv.reward_last_step, float)
130
+ assert priv.total_reward == engine._total_reward
131
+
132
+ # Should have actual game state
133
+ assert isinstance(pub.map_id, int)
134
+ assert isinstance(pub.badges, int)
135
+ assert isinstance(pub.party_hp_current, int)
136
+
137
+ @pytest.mark.asyncio
138
+ async def test_button_sequence_real(self, task_instance):
139
+ """Test a sequence of button presses with real ROM"""
140
+ engine = PokemonRedEngine(task_instance)
141
+ await engine._reset_engine()
142
+
143
+ # Try a sequence of different buttons
144
+ buttons = ["A", "B", "UP", "DOWN", "LEFT", "RIGHT", "START", "SELECT"]
145
+
146
+ for i, button in enumerate(buttons):
147
+ action = {"button": button, "frames": 1}
148
+ priv, pub = await engine._step_engine(action)
149
+
150
+ assert engine._step_count == i + 1
151
+ assert priv.step_count == i + 1
152
+
153
+ # Game state should remain consistent
154
+ assert isinstance(pub.map_id, int)
155
+ assert isinstance(pub.player_x, int)
156
+ assert isinstance(pub.player_y, int)
157
+
158
+ @pytest.mark.asyncio
159
+ async def test_serialization_real(self, task_instance):
160
+ """Test engine serialization with real ROM"""
161
+ engine = PokemonRedEngine(task_instance)
162
+ await engine._reset_engine()
163
+
164
+ # Take a few steps to change state
165
+ await engine._step_engine({"button": "A", "frames": 1})
166
+ await engine._step_engine({"button": "RIGHT", "frames": 1})
167
+
168
+ snapshot = await engine._serialize_engine()
169
+
170
+ assert isinstance(snapshot, PokemonRedEngineSnapshot)
171
+ assert snapshot.total_reward == engine._total_reward
172
+ assert snapshot.step_count == engine._step_count
173
+ assert "_save_state_bytes" in snapshot.state_data
174
+
175
+ @pytest.mark.asyncio
176
+ async def test_rom_memory_access(self, task_instance):
177
+ """Test that we can actually read ROM memory"""
178
+ engine = PokemonRedEngine(task_instance)
179
+
180
+ # Should be able to access memory
181
+ assert engine.emulator is not None
182
+ assert hasattr(engine.emulator, "memory")
183
+
184
+ # Try reading some memory locations
185
+ badge_flags = engine.emulator.memory[0xD356]
186
+ player_x = engine.emulator.memory[0xD362]
187
+ player_y = engine.emulator.memory[0xD361]
188
+
189
+ # Should be valid integers (even if zero initially)
190
+ assert isinstance(badge_flags, int)
191
+ assert isinstance(player_x, int)
192
+ assert isinstance(player_y, int)