synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,226 @@
1
+ """Test actual action behavior to debug left/right turn issues."""
2
+
3
+ import pytest
4
+ from synth_ai.environments.examples.minigrid.environment import MiniGridEnvironment
5
+ from synth_ai.environments.examples.minigrid.taskset import DEFAULT_MINIGRID_TASK
6
+
7
+
8
+ @pytest.mark.asyncio
9
+ async def test_initial_state():
10
+ """Test the initial state of the agent."""
11
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
12
+ obs = await env.initialize()
13
+
14
+ # Check initial state
15
+ assert "Agent Position: (1, 1)" in obs["observation"]
16
+ assert "Agent Direction: →" in obs["observation"]
17
+ print(f"✓ Initial state verified: position (1,1), direction →")
18
+
19
+
20
+ @pytest.mark.asyncio
21
+ async def test_right_turn_action():
22
+ """Test what happens when we send 'right' action."""
23
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
24
+ await env.initialize()
25
+
26
+ # Send 'right' action
27
+ tool_call = {"tool": "minigrid_act", "args": {"action": "right"}}
28
+ obs = await env.step(tool_call)
29
+
30
+ # Extract direction from observation
31
+ obs_text = obs["observation"]
32
+ if "Agent Direction: ↓" in obs_text:
33
+ actual_dir = "↓ (down)"
34
+ expected = True
35
+ elif "Agent Direction: ↑" in obs_text:
36
+ actual_dir = "↑ (up)"
37
+ expected = False
38
+ elif "Agent Direction: ←" in obs_text:
39
+ actual_dir = "← (left)"
40
+ expected = False
41
+ elif "Agent Direction: →" in obs_text:
42
+ actual_dir = "→ (right)"
43
+ expected = False
44
+ else:
45
+ actual_dir = "unknown"
46
+ expected = False
47
+
48
+ print(f"RIGHT action result: {actual_dir}")
49
+ print(f"Expected: ↓ (down) for clockwise turn")
50
+ print(f"✓ RIGHT turn working correctly: {expected}")
51
+
52
+ assert expected, f"RIGHT turn failed: expected ↓ (down), got {actual_dir}"
53
+
54
+
55
+ @pytest.mark.asyncio
56
+ async def test_left_turn_action():
57
+ """Test what happens when we send 'left' action."""
58
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
59
+ await env.initialize()
60
+
61
+ # Send 'left' action
62
+ tool_call = {"tool": "minigrid_act", "args": {"action": "left"}}
63
+ obs = await env.step(tool_call)
64
+
65
+ # Extract direction from observation
66
+ obs_text = obs["observation"]
67
+ if "Agent Direction: ↑" in obs_text:
68
+ actual_dir = "↑ (up)"
69
+ expected = True
70
+ elif "Agent Direction: ↓" in obs_text:
71
+ actual_dir = "↓ (down)"
72
+ expected = False
73
+ elif "Agent Direction: ←" in obs_text:
74
+ actual_dir = "← (left)"
75
+ expected = False
76
+ elif "Agent Direction: →" in obs_text:
77
+ actual_dir = "→ (right)"
78
+ expected = False
79
+ else:
80
+ actual_dir = "unknown"
81
+ expected = False
82
+
83
+ print(f"LEFT action result: {actual_dir}")
84
+ print(f"Expected: ↑ (up) for counter-clockwise turn")
85
+ print(f"✓ LEFT turn working correctly: {expected}")
86
+
87
+ assert expected, f"LEFT turn failed: expected ↑ (up), got {actual_dir}"
88
+
89
+
90
+ @pytest.mark.asyncio
91
+ async def test_full_rotation_sequence():
92
+ """Test a full sequence of turns to verify direction logic."""
93
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
94
+ await env.initialize()
95
+
96
+ directions = []
97
+
98
+ # Get initial direction
99
+ obs = await env.checkpoint()
100
+ if "Agent Direction: →" in obs["observation"]:
101
+ directions.append("→")
102
+
103
+ # Do 4 right turns (should return to initial direction)
104
+ for i in range(4):
105
+ tool_call = {"tool": "minigrid_act", "args": {"action": "right"}}
106
+ obs = await env.step(tool_call)
107
+
108
+ # Extract direction
109
+ obs_text = obs["observation"]
110
+ if "Agent Direction: ↓" in obs_text:
111
+ directions.append("↓")
112
+ elif "Agent Direction: ←" in obs_text:
113
+ directions.append("←")
114
+ elif "Agent Direction: ↑" in obs_text:
115
+ directions.append("↑")
116
+ elif "Agent Direction: →" in obs_text:
117
+ directions.append("→")
118
+
119
+ print(f"Full rotation sequence: {' -> '.join(directions)}")
120
+ print(f"Expected clockwise: → -> ↓ -> ← -> ↑ -> →")
121
+
122
+ expected_sequence = ["→", "↓", "←", "↑", "→"]
123
+ assert directions == expected_sequence, f"Rotation sequence wrong: {directions}"
124
+
125
+
126
+ @pytest.mark.asyncio
127
+ async def test_forward_movement():
128
+ """Test forward movement in different directions."""
129
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
130
+ await env.initialize()
131
+
132
+ # Test forward when facing right (initial direction)
133
+ tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
134
+ obs = await env.step(tool_call)
135
+
136
+ # Should move from (1,1) to (2,1)
137
+ assert "Agent Position: (2, 1)" in obs["observation"]
138
+ print("✓ Forward movement verified: (1,1) -> (2,1)")
139
+
140
+ # Move to (3,1)
141
+ obs = await env.step(tool_call)
142
+ assert "Agent Position: (3, 1)" in obs["observation"]
143
+ print("✓ Forward movement verified: (2,1) -> (3,1)")
144
+
145
+ # Try to move forward again (should hit wall)
146
+ obs = await env.step(tool_call)
147
+ assert "Agent Position: (3, 1)" in obs["observation"] # Should stay at (3,1)
148
+ assert obs.get("last_action_result") == "blocked_by_wall"
149
+ print("✓ Wall blocking verified: stayed at (3,1)")
150
+
151
+
152
+ @pytest.mark.asyncio
153
+ async def test_turn_then_move_sequence():
154
+ """Test the critical sequence: move to (3,1), turn right, then move toward goal."""
155
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
156
+ await env.initialize()
157
+
158
+ # Move to (3,1)
159
+ forward_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
160
+ await env.step(forward_call) # (1,1) -> (2,1)
161
+ await env.step(forward_call) # (2,1) -> (3,1)
162
+
163
+ # Verify at (3,1) facing right
164
+ obs = await env.checkpoint()
165
+ assert "Agent Position: (3, 1)" in obs["observation"]
166
+ assert "Agent Direction: →" in obs["observation"]
167
+ print("✓ At position (3,1) facing right")
168
+
169
+ # Turn right (should face down toward goal)
170
+ right_call = {"tool": "minigrid_act", "args": {"action": "right"}}
171
+ obs = await env.step(right_call)
172
+
173
+ direction_after_right = None
174
+ if "Agent Direction: ↓" in obs["observation"]:
175
+ direction_after_right = "↓ (down)"
176
+ facing_goal = True
177
+ elif "Agent Direction: ↑" in obs["observation"]:
178
+ direction_after_right = "↑ (up)"
179
+ facing_goal = False
180
+ else:
181
+ direction_after_right = "other"
182
+ facing_goal = False
183
+
184
+ print(f"After RIGHT turn at (3,1): facing {direction_after_right}")
185
+ print(f"Goal is at (3,3), so agent should face ↓ (down)")
186
+ print(f"✓ Facing toward goal: {facing_goal}")
187
+
188
+ # If facing down, try to move toward goal
189
+ if facing_goal:
190
+ obs = await env.step(forward_call)
191
+ if "Agent Position: (3, 2)" in obs["observation"]:
192
+ print("✓ Successfully moved toward goal: (3,1) -> (3,2)")
193
+
194
+ # Try to reach goal
195
+ obs = await env.step(forward_call)
196
+ if "Agent Position: (3, 3)" in obs["observation"]:
197
+ print("✓ SUCCESS: Reached goal at (3,3)!")
198
+ return True
199
+
200
+ return False
201
+
202
+
203
+ if __name__ == "__main__":
204
+ import asyncio
205
+
206
+ async def run_tests():
207
+ print("=== TESTING MINIGRID ACTION BEHAVIOR ===")
208
+
209
+ try:
210
+ await test_initial_state()
211
+ await test_right_turn_action()
212
+ await test_left_turn_action()
213
+ await test_full_rotation_sequence()
214
+ await test_forward_movement()
215
+ success = await test_turn_then_move_sequence()
216
+
217
+ print(f"\n=== SUMMARY ===")
218
+ print(f"Goal reached successfully: {success}")
219
+
220
+ except Exception as e:
221
+ print(f"❌ Test failed: {e}")
222
+ import traceback
223
+
224
+ traceback.print_exc()
225
+
226
+ asyncio.run(run_tests())
@@ -0,0 +1,83 @@
1
+ """Test debug message functionality in MiniGrid."""
2
+
3
+ import pytest
4
+ import asyncio
5
+ from synth_ai.environments.examples.minigrid.environment import MiniGridEnvironment
6
+ from synth_ai.environments.examples.minigrid.taskset import DEFAULT_MINIGRID_TASK
7
+
8
+
9
+ @pytest.mark.asyncio
10
+ async def test_debug_messages_on_movement():
11
+ """Test that debug messages are properly generated on movement."""
12
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
13
+ obs = await env.initialize()
14
+
15
+ # Test successful forward movement
16
+ tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
17
+ obs = await env.step(tool_call)
18
+
19
+ assert "debug_message" in obs
20
+ assert "last_action" in obs
21
+ assert "last_action_result" in obs
22
+ assert obs["last_action"] == "forward"
23
+ assert obs["last_action_result"] == "moved"
24
+ assert "Moved forward" in obs["debug_message"]
25
+
26
+
27
+ @pytest.mark.asyncio
28
+ async def test_debug_messages_on_blocked_movement():
29
+ """Test debug messages when movement is blocked."""
30
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
31
+ obs = await env.initialize()
32
+
33
+ # Move to a position where we'll be blocked
34
+ # Move right twice to reach the edge
35
+ for _ in range(2):
36
+ tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
37
+ obs = await env.step(tool_call)
38
+
39
+ # Now try to move forward again - should be blocked
40
+ tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
41
+ obs = await env.step(tool_call)
42
+
43
+ assert obs["last_action_result"] in ["blocked_by_wall", "blocked_by_boundary"]
44
+ assert "blocked" in obs["debug_message"].lower()
45
+
46
+
47
+ @pytest.mark.asyncio
48
+ async def test_debug_messages_on_turn():
49
+ """Test debug messages when turning."""
50
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
51
+ obs = await env.initialize()
52
+
53
+ # Test turning left
54
+ tool_call = {"tool": "minigrid_act", "args": {"action": "left"}}
55
+ obs = await env.step(tool_call)
56
+
57
+ assert obs["last_action"] == "left"
58
+ assert obs["last_action_result"] == "turned"
59
+ assert "Turned left" in obs["debug_message"]
60
+
61
+ # Test turning right
62
+ tool_call = {"tool": "minigrid_act", "args": {"action": "right"}}
63
+ obs = await env.step(tool_call)
64
+
65
+ assert obs["last_action"] == "right"
66
+ assert obs["last_action_result"] == "turned"
67
+ assert "Turned right" in obs["debug_message"]
68
+
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_debug_messages_in_observation_text():
72
+ """Test that debug messages appear in the observation text."""
73
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
74
+ obs = await env.initialize()
75
+
76
+ # Move forward
77
+ tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
78
+ obs = await env.step(tool_call)
79
+
80
+ # Check that debug info appears in the text observation
81
+ observation_text = obs["observation"]
82
+ assert "Debug:" in observation_text
83
+ assert "Last action result:" in observation_text or obs["last_action_result"] == "moved"
@@ -0,0 +1,120 @@
1
+ """Test exploration mechanics in MiniGrid."""
2
+
3
+ import pytest
4
+ import asyncio
5
+ from synth_ai.environments.examples.minigrid.environment import MiniGridEnvironment
6
+ from synth_ai.environments.examples.minigrid.taskset import DEFAULT_MINIGRID_TASK
7
+
8
+
9
+ @pytest.mark.asyncio
10
+ async def test_goal_not_always_visible():
11
+ """Test that the goal is not always visible initially."""
12
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
13
+ obs = await env.initialize()
14
+
15
+ # Check if 'G' appears in the actual grid (not in legend)
16
+ lines = obs["observation"].split("\n")
17
+ grid_lines = []
18
+ grid_started = False
19
+
20
+ for line in lines:
21
+ if "Grid:" in line:
22
+ grid_started = True
23
+ elif "Legend:" in line:
24
+ break
25
+ elif grid_started and line.strip():
26
+ grid_lines.append(line)
27
+
28
+ # The goal 'G' should not be visible in the initial 5x5 view
29
+ # (though this depends on the specific seed/layout)
30
+ grid_text = "\n".join(grid_lines)
31
+
32
+ # Goal might or might not be visible - this is expected
33
+ # The test is mainly to document this behavior
34
+ has_goal_in_grid = "G" in grid_text
35
+ assert isinstance(has_goal_in_grid, bool) # Can be True or False
36
+
37
+
38
+ @pytest.mark.asyncio
39
+ async def test_limited_visibility():
40
+ """Test that the agent has limited visibility."""
41
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
42
+ obs = await env.initialize()
43
+
44
+ # Check that the grid contains '?' symbols indicating unseen areas
45
+ observation_text = obs["observation"]
46
+ assert "?" in observation_text
47
+
48
+ # The grid should be small (agent's view)
49
+ lines = observation_text.split("\n")
50
+ grid_lines = []
51
+ grid_started = False
52
+
53
+ for line in lines:
54
+ if "Grid:" in line:
55
+ grid_started = True
56
+ elif "Legend:" in line:
57
+ break
58
+ elif grid_started and line.strip():
59
+ grid_lines.append(line)
60
+
61
+ # In a 5x5 environment, the agent sees a 5x5 view
62
+ assert len(grid_lines) == 5
63
+
64
+
65
+ @pytest.mark.asyncio
66
+ async def test_exploration_reveals_new_areas():
67
+ """Test that moving reveals new areas of the grid."""
68
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
69
+ initial_obs = await env.initialize()
70
+
71
+ # Move to a new position
72
+ tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
73
+ new_obs = await env.step(tool_call)
74
+
75
+ # The observations should be different (agent moved)
76
+ assert initial_obs["observation"] != new_obs["observation"]
77
+
78
+ # Agent position should have changed
79
+ initial_pos = None
80
+ new_pos = None
81
+
82
+ for line in initial_obs["observation"].split("\n"):
83
+ if "Agent Position:" in line:
84
+ initial_pos = line
85
+ break
86
+
87
+ for line in new_obs["observation"].split("\n"):
88
+ if "Agent Position:" in line:
89
+ new_pos = line
90
+ break
91
+
92
+ assert initial_pos != new_pos
93
+
94
+
95
+ @pytest.mark.asyncio
96
+ async def test_complete_exploration_finds_goal():
97
+ """Test that systematic exploration can find the goal."""
98
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
99
+ obs = await env.initialize()
100
+
101
+ # Known solution path for the default task
102
+ solution_path = [
103
+ "forward", # Move right to (2,1)
104
+ "forward", # Move right to (3,1)
105
+ "right", # Turn to face down
106
+ "forward", # Move down to (3,2)
107
+ "forward", # Move down to (3,3) - goal
108
+ ]
109
+
110
+ for action in solution_path:
111
+ tool_call = {"tool": "minigrid_act", "args": {"action": action}}
112
+ obs = await env.step(tool_call)
113
+
114
+ if obs.get("terminated", False):
115
+ # Should have found the goal
116
+ assert obs.get("total_reward", 0) > 0
117
+ return
118
+
119
+ # If we didn't terminate, the test fails
120
+ assert False, "Failed to reach goal with known solution path"
@@ -0,0 +1,214 @@
1
+ """Unit tests for MiniGrid engine."""
2
+
3
+ import asyncio
4
+ import pytest
5
+ import numpy as np
6
+
7
+ from synth_ai.environments.examples.minigrid.engine import (
8
+ MiniGridEngine,
9
+ MiniGridPublicState,
10
+ MiniGridPrivateState,
11
+ MiniGridStepPenaltyComponent,
12
+ )
13
+ from synth_ai.environments.examples.minigrid.taskset import DEFAULT_MINIGRID_TASK
14
+
15
+
16
+ @pytest.mark.asyncio
17
+ async def test_engine_initialization():
18
+ """Test engine initialization."""
19
+ engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
20
+
21
+ # Check initial state
22
+ assert engine.env_name == "MiniGrid-Empty-5x5-v0"
23
+ assert engine.seed == 42
24
+ assert engine.total_reward == 0.0
25
+ assert not engine._initialized
26
+
27
+ # Reset engine
28
+ priv, pub = await engine._reset_engine()
29
+
30
+ # Check reset state
31
+ assert engine._initialized
32
+ assert isinstance(priv, MiniGridPrivateState)
33
+ assert isinstance(pub, MiniGridPublicState)
34
+ assert priv.terminated is False
35
+ assert priv.truncated is False
36
+ assert priv.total_reward == 0.0
37
+ assert pub.grid_array.shape == (5, 5, 3)
38
+ assert pub.agent_pos == (1, 1) # Default starting position
39
+ assert pub.step_count == 0
40
+ assert pub.mission == "get to the green goal square"
41
+
42
+
43
+ @pytest.mark.asyncio
44
+ async def test_engine_step():
45
+ """Test engine step functionality."""
46
+ engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
47
+ await engine._reset_engine()
48
+
49
+ # Test moving forward
50
+ initial_pos = engine.env.unwrapped.agent_pos
51
+ priv, pub = await engine._step_engine(2) # Forward action
52
+
53
+ # Check step results
54
+ assert isinstance(priv, MiniGridPrivateState)
55
+ assert isinstance(pub, MiniGridPublicState)
56
+ assert pub.step_count == 1
57
+ assert priv.reward_last == -0.01 # Step penalty
58
+ assert priv.total_reward == -0.01
59
+
60
+ # Test turning
61
+ initial_dir = pub.agent_dir
62
+ priv, pub = await engine._step_engine(0) # Turn left
63
+ assert pub.agent_dir == (initial_dir - 1) % 4
64
+ assert pub.step_count == 2
65
+
66
+
67
+ @pytest.mark.asyncio
68
+ async def test_invalid_actions():
69
+ """Test invalid action handling."""
70
+ engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
71
+ await engine._reset_engine()
72
+
73
+ # Test invalid action values
74
+ with pytest.raises(ValueError, match="Invalid action"):
75
+ await engine._step_engine(-1)
76
+
77
+ with pytest.raises(ValueError, match="Invalid action"):
78
+ await engine._step_engine(7)
79
+
80
+ with pytest.raises(ValueError, match="Invalid action"):
81
+ await engine._step_engine("forward")
82
+
83
+
84
+ @pytest.mark.asyncio
85
+ async def test_grid_to_array():
86
+ """Test grid to array conversion."""
87
+ engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
88
+ await engine._reset_engine()
89
+
90
+ grid_array = engine._grid_to_array()
91
+
92
+ # Check array properties
93
+ assert isinstance(grid_array, np.ndarray)
94
+ assert grid_array.shape == (5, 5, 3)
95
+ assert grid_array.dtype == np.uint8
96
+
97
+ # Check agent is in the grid
98
+ agent_pos = engine.env.unwrapped.agent_pos
99
+ agent_cell = grid_array[agent_pos[1], agent_pos[0]]
100
+ assert agent_cell[0] == 9 # Agent object type
101
+
102
+
103
+ @pytest.mark.asyncio
104
+ async def test_state_diff():
105
+ """Test state diff functionality."""
106
+ engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
107
+ priv1, pub1 = await engine._reset_engine()
108
+
109
+ # Take a step
110
+ priv2, pub2 = await engine._step_engine(2) # Forward
111
+
112
+ # Check public state diff
113
+ diff = pub2.diff(pub1)
114
+ assert "step_count" in diff
115
+ assert diff["step_count"] == 1
116
+ if pub1.agent_pos != pub2.agent_pos:
117
+ assert "agent_pos" in diff
118
+
119
+ # Check private state diff
120
+ priv_diff = priv2.diff(priv1)
121
+ assert "reward_last" in priv_diff
122
+ assert "total_reward" in priv_diff
123
+
124
+
125
+ @pytest.mark.asyncio
126
+ async def test_reward_components():
127
+ """Test reward components."""
128
+ component = MiniGridStepPenaltyComponent()
129
+
130
+ # Create a dummy state
131
+ from synth_ai.environments.examples.minigrid.engine import MiniGridPublicState
132
+
133
+ state = MiniGridPublicState(
134
+ grid_array=np.zeros((5, 5, 3)),
135
+ agent_pos=(1, 1),
136
+ agent_dir=0,
137
+ step_count=1,
138
+ max_steps=100,
139
+ mission="test",
140
+ )
141
+
142
+ # Test penalty
143
+ reward = await component.score(state, 2)
144
+ assert reward == -0.01
145
+
146
+
147
+ @pytest.mark.asyncio
148
+ async def test_serialization():
149
+ """Test engine serialization."""
150
+ engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
151
+ await engine._reset_engine()
152
+
153
+ # Take some steps
154
+ await engine._step_engine(2)
155
+ await engine._step_engine(1)
156
+
157
+ # Serialize
158
+ snapshot = await engine._serialize_engine()
159
+
160
+ # Check snapshot
161
+ assert snapshot.engine_snapshot["env_name"] == "MiniGrid-Empty-5x5-v0"
162
+ assert snapshot.engine_snapshot["seed"] == 42
163
+ assert snapshot.engine_snapshot["initialized"] is True
164
+ assert "total_reward" in snapshot.engine_snapshot
165
+
166
+
167
+ @pytest.mark.asyncio
168
+ async def test_get_available_actions():
169
+ """Test getting available actions."""
170
+ engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
171
+ actions = engine.get_available_actions()
172
+
173
+ assert len(actions) == 7
174
+ assert actions[0] == (0, "turn left")
175
+ assert actions[2] == (2, "move forward")
176
+ assert actions[3] == (3, "pickup")
177
+
178
+
179
+ @pytest.mark.asyncio
180
+ async def test_different_environments():
181
+ """Test different MiniGrid environments."""
182
+ from synth_ai.environments.examples.minigrid.taskset import (
183
+ MiniGridTaskInstance,
184
+ MiniGridTaskInstanceMetadata,
185
+ )
186
+ from synth_ai.environments.tasks.api import Impetus, Intent
187
+ from uuid import uuid4
188
+
189
+ # Test DoorKey environment
190
+ task = MiniGridTaskInstance(
191
+ id=uuid4(),
192
+ impetus=Impetus(instructions="Test"),
193
+ intent=Intent(rubric={"goal": "Test"}, gold_trajectories=None, gold_state_diff={}),
194
+ metadata=MiniGridTaskInstanceMetadata(
195
+ env_name="MiniGrid-DoorKey-5x5-v0",
196
+ grid_size=(5, 5),
197
+ difficulty="medium",
198
+ has_key=True,
199
+ has_door=True,
200
+ ),
201
+ is_reproducible=True,
202
+ initial_engine_snapshot=None,
203
+ )
204
+
205
+ engine = MiniGridEngine(task)
206
+ priv, pub = await engine._reset_engine()
207
+
208
+ # Check environment properties
209
+ assert pub.mission == "open the door then get to the goal"
210
+ assert pub.grid_array.shape == (5, 5, 3)
211
+
212
+
213
+ if __name__ == "__main__":
214
+ asyncio.run(pytest.main([__file__, "-v"]))