synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,238 @@
1
+ """Unit tests for MiniGrid environment."""
2
+
3
+ import asyncio
4
+ import pytest
5
+ import json
6
+
7
+ from synth_ai.environments.examples.minigrid.environment import (
8
+ MiniGridEnvironment,
9
+ MiniGridInteractTool,
10
+ )
11
+ from synth_ai.environments.examples.minigrid.taskset import DEFAULT_MINIGRID_TASK
12
+ from synth_ai.environments.environment.tools import EnvToolCall
13
+
14
+
15
+ @pytest.mark.asyncio
16
+ async def test_environment_initialization():
17
+ """Test environment initialization."""
18
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
19
+
20
+ # Check initial setup
21
+ assert env.name == "MiniGridEnvironment"
22
+ assert env.task_instance == DEFAULT_MINIGRID_TASK
23
+
24
+ # Initialize
25
+ obs = await env.initialize()
26
+
27
+ # Check observation
28
+ assert isinstance(obs, dict)
29
+ assert "observation" in obs
30
+ assert "terminated" in obs
31
+ assert "total_reward" in obs
32
+ assert obs["terminated"] is False
33
+ assert obs["total_reward"] == 0.0
34
+
35
+ # Check observation text
36
+ obs_text = obs["observation"]
37
+ assert "Mission:" in obs_text
38
+ assert "Grid:" in obs_text
39
+ assert "Legend:" in obs_text
40
+
41
+
42
+ @pytest.mark.asyncio
43
+ async def test_environment_step():
44
+ """Test environment step functionality."""
45
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
46
+ await env.initialize()
47
+
48
+ # Test forward action
49
+ tool_call = {"name": "minigrid_act", "args": {"action": "forward"}}
50
+ obs = await env.step(tool_call)
51
+
52
+ assert isinstance(obs, dict)
53
+ assert "observation" in obs
54
+ assert "reward_last" in obs
55
+ assert obs["reward_last"] == -0.01 # Step penalty
56
+
57
+ # Test turn action
58
+ tool_call = {"name": "minigrid_act", "args": {"action": "right"}}
59
+ obs = await env.step(tool_call)
60
+ assert obs["reward_last"] == -0.01
61
+
62
+
63
+ @pytest.mark.asyncio
64
+ async def test_tool_validation():
65
+ """Test tool call validation."""
66
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
67
+
68
+ # Test different input formats
69
+ # Format 1: Dict with name and args
70
+ call1 = env.validate_tool_calls({"name": "minigrid_act", "args": {"action": "forward"}})
71
+ assert call1.tool == "minigrid_act"
72
+ assert call1.args == {"action": "forward"}
73
+
74
+ # Format 2: Dict with tool and args
75
+ call2 = env.validate_tool_calls({"tool": "minigrid_act", "args": {"action": "left"}})
76
+ assert call2.tool == "minigrid_act"
77
+ assert call2.args == {"action": "left"}
78
+
79
+ # Format 3: List of tool calls
80
+ call3 = env.validate_tool_calls([{"name": "minigrid_act", "args": {"action": "right"}}])
81
+ assert call3.tool == "minigrid_act"
82
+ assert call3.args == {"action": "right"}
83
+
84
+ # Format 4: With input field
85
+ call4 = env.validate_tool_calls({"name": "minigrid_act", "input": {"action": "pickup"}})
86
+ assert call4.tool == "minigrid_act"
87
+ assert call4.args == {"action": "pickup"}
88
+
89
+ # Format 5: With string input
90
+ call5 = env.validate_tool_calls({"name": "minigrid_act", "input": '{"action": "drop"}'})
91
+ assert call5.tool == "minigrid_act"
92
+ assert call5.args == {"action": "drop"}
93
+
94
+ # Test invalid tool name
95
+ with pytest.raises(ValueError, match="Unknown tool"):
96
+ env.validate_tool_calls({"name": "invalid_tool", "args": {"action": "forward"}})
97
+
98
+
99
+ @pytest.mark.asyncio
100
+ async def test_invalid_actions():
101
+ """Test invalid action handling."""
102
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
103
+ await env.initialize()
104
+
105
+ # Test invalid action
106
+ tool_call = {"name": "minigrid_act", "args": {"action": "invalid_action"}}
107
+ obs = await env.step(tool_call)
108
+
109
+ # Should have error in observation
110
+ assert "error" in obs
111
+ assert "Invalid action" in obs["error"]
112
+
113
+
114
+ @pytest.mark.asyncio
115
+ async def test_checkpoint_and_terminate():
116
+ """Test checkpoint and terminate functionality."""
117
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
118
+ await env.initialize()
119
+
120
+ # Take some steps
121
+ await env.step({"name": "minigrid_act", "args": {"action": "forward"}})
122
+ await env.step({"name": "minigrid_act", "args": {"action": "right"}})
123
+
124
+ # Checkpoint
125
+ checkpoint = await env.checkpoint()
126
+ assert isinstance(checkpoint, dict)
127
+ assert "mission" in checkpoint
128
+ assert "total_steps" in checkpoint
129
+ assert "total_reward" in checkpoint
130
+
131
+ # Terminate
132
+ final_obs = await env.terminate()
133
+ assert isinstance(final_obs, dict)
134
+ assert "mission" in final_obs
135
+ assert "final_position" in final_obs
136
+
137
+
138
+ @pytest.mark.asyncio
139
+ async def test_interact_tool():
140
+ """Test the interact tool directly."""
141
+ from synth_ai.environments.examples.minigrid.engine import MiniGridEngine
142
+
143
+ engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
144
+ await engine._reset_engine()
145
+
146
+ tool = MiniGridInteractTool(engine)
147
+
148
+ # Test valid action
149
+ call = EnvToolCall(tool="minigrid_act", args={"action": "forward"})
150
+ result = await tool(call)
151
+
152
+ assert result.ok is True
153
+ assert "message" in result.payload
154
+ assert "public_state" in result.payload
155
+ assert "private_state" in result.payload
156
+
157
+ # Test invalid action
158
+ call = EnvToolCall(tool="minigrid_act", args={"action": "invalid"})
159
+ result = await tool(call)
160
+
161
+ assert result.ok is False
162
+ assert "Invalid action" in result.error
163
+
164
+
165
+ @pytest.mark.asyncio
166
+ async def test_observation_callables():
167
+ """Test observation callable functionality."""
168
+ from synth_ai.environments.examples.minigrid.engine import (
169
+ MiniGridObservationCallable,
170
+ MiniGridCheckpointObservationCallable,
171
+ )
172
+
173
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
174
+ await env.initialize()
175
+
176
+ # Get states
177
+ priv, pub = env.engine.get_current_states_for_observation()
178
+
179
+ # Test step observation
180
+ step_obs_callable = MiniGridObservationCallable()
181
+ step_obs = await step_obs_callable.get_observation(pub, priv)
182
+
183
+ assert "observation" in step_obs
184
+ assert "terminated" in step_obs
185
+ assert "reward_last" in step_obs
186
+
187
+ # Test checkpoint observation
188
+ ckpt_obs_callable = MiniGridCheckpointObservationCallable()
189
+ ckpt_obs = await ckpt_obs_callable.get_observation(pub, priv)
190
+
191
+ assert "mission" in ckpt_obs
192
+ assert "final_position" in ckpt_obs
193
+ assert "total_steps" in ckpt_obs
194
+
195
+
196
+ @pytest.mark.asyncio
197
+ async def test_full_episode():
198
+ """Test a full episode from start to finish."""
199
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
200
+
201
+ # Initialize
202
+ obs = await env.initialize()
203
+ assert obs["terminated"] is False
204
+
205
+ # Take multiple steps
206
+ actions = ["forward", "forward", "right", "forward", "forward"]
207
+ for action in actions:
208
+ obs = await env.step({"name": "minigrid_act", "args": {"action": action}})
209
+ if obs["terminated"]:
210
+ break
211
+
212
+ # Final observation should have accumulated rewards
213
+ assert obs["total_reward"] < 0 # Should have step penalties
214
+
215
+ # Terminate
216
+ final = await env.terminate()
217
+ assert "success" in final
218
+
219
+
220
+ @pytest.mark.asyncio
221
+ async def test_serialization():
222
+ """Test environment serialization."""
223
+ env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
224
+ await env.initialize()
225
+
226
+ # Take some steps
227
+ await env.step({"name": "minigrid_act", "args": {"action": "forward"}})
228
+
229
+ # Serialize
230
+ serialized = await env._serialize_engine()
231
+
232
+ assert "task_instance_dict" in serialized
233
+ assert "engine_snapshot" in serialized
234
+ assert serialized["engine_snapshot"]["env_name"] == "MiniGrid-Empty-5x5-v0"
235
+
236
+
237
+ if __name__ == "__main__":
238
+ asyncio.run(pytest.main([__file__, "-v"]))
@@ -0,0 +1,301 @@
1
+ """Unit tests for MiniGrid environment mapping functionality."""
2
+
3
+ import pytest
4
+ from synth_ai.environments.examples.minigrid.environment_mapping import (
5
+ get_environment_from_seed,
6
+ get_difficulty_from_seed,
7
+ get_minigrid_environment,
8
+ get_environment_by_difficulty,
9
+ get_curriculum_environment,
10
+ validate_environment_name,
11
+ get_all_environments,
12
+ get_environments_by_difficulty,
13
+ ENVIRONMENT_MAPPING,
14
+ DIFFICULTY_MAPPING,
15
+ )
16
+
17
+
18
+ class TestEnvironmentMapping:
19
+ """Test the environment mapping functionality."""
20
+
21
+ def test_seed_to_environment_mapping(self):
22
+ """Test basic seed to environment mapping."""
23
+ # Test specific known mappings
24
+ assert get_environment_from_seed(0) == "MiniGrid-Empty-5x5-v0"
25
+ assert get_environment_from_seed(1) == "MiniGrid-Empty-6x6-v0"
26
+ assert get_environment_from_seed(7) == "MiniGrid-DoorKey-5x5-v0"
27
+ assert get_environment_from_seed(42) == "MiniGrid-KeyCorridorS6R3-v0"
28
+ assert get_environment_from_seed(59) == "MiniGrid-Empty-16x16-v0"
29
+
30
+ # Test modulo behavior
31
+ assert get_environment_from_seed(60) == get_environment_from_seed(0)
32
+ assert get_environment_from_seed(61) == get_environment_from_seed(1)
33
+ assert get_environment_from_seed(120) == get_environment_from_seed(0)
34
+
35
+ # Test negative seeds (hash behavior, not simple modulo)
36
+ assert get_environment_from_seed(-1) in ENVIRONMENT_MAPPING.values()
37
+ assert get_environment_from_seed(-5) in ENVIRONMENT_MAPPING.values()
38
+
39
+ def test_difficulty_mapping(self):
40
+ """Test difficulty level mapping."""
41
+ # Test ultra-easy (0-4)
42
+ assert get_difficulty_from_seed(0) == "ultra-easy"
43
+ assert get_difficulty_from_seed(4) == "ultra-easy"
44
+
45
+ # Test easy (5-14)
46
+ assert get_difficulty_from_seed(5) == "easy"
47
+ assert get_difficulty_from_seed(14) == "easy"
48
+
49
+ # Test medium (15-29)
50
+ assert get_difficulty_from_seed(15) == "medium"
51
+ assert get_difficulty_from_seed(29) == "medium"
52
+
53
+ # Test hard (30-44)
54
+ assert get_difficulty_from_seed(30) == "hard"
55
+ assert get_difficulty_from_seed(44) == "hard"
56
+
57
+ # Test ultra-hard (45-54)
58
+ assert get_difficulty_from_seed(45) == "ultra-hard"
59
+ assert get_difficulty_from_seed(54) == "ultra-hard"
60
+
61
+ # Test specialized (55-59)
62
+ assert get_difficulty_from_seed(55) == "specialized"
63
+ assert get_difficulty_from_seed(59) == "specialized"
64
+
65
+ def test_combined_mapping(self):
66
+ """Test the combined environment and difficulty mapping."""
67
+ test_seeds = [0, 7, 15, 30, 45, 55, 42, 1337]
68
+
69
+ for seed in test_seeds:
70
+ env_name = get_environment_from_seed(seed)
71
+ difficulty = get_difficulty_from_seed(seed)
72
+
73
+ combined_env, combined_diff = get_minigrid_environment(seed)
74
+
75
+ assert env_name == combined_env
76
+ assert difficulty == combined_diff
77
+
78
+ def test_environment_by_difficulty(self):
79
+ """Test selecting environments by difficulty level."""
80
+ # Test each difficulty level
81
+ for difficulty in DIFFICULTY_MAPPING:
82
+ env_name = get_environment_by_difficulty(difficulty, 0)
83
+ assert env_name in ENVIRONMENT_MAPPING.values()
84
+
85
+ # Test that different seeds give different environments (within range)
86
+ env_name2 = get_environment_by_difficulty(difficulty, 1)
87
+ assert env_name2 in ENVIRONMENT_MAPPING.values()
88
+
89
+ def test_environment_by_difficulty_invalid(self):
90
+ """Test invalid difficulty levels."""
91
+ with pytest.raises(ValueError, match="Unknown difficulty"):
92
+ get_environment_by_difficulty("invalid", 0)
93
+
94
+ with pytest.raises(ValueError, match="Unknown difficulty"):
95
+ get_environment_by_difficulty("super-easy", 0)
96
+
97
+ def test_curriculum_environment(self):
98
+ """Test curriculum-based environment selection."""
99
+ # Test early stage (0.0-0.2) -> ultra-easy
100
+ env_name, difficulty = get_curriculum_environment(0.0, 0)
101
+ assert difficulty == "ultra-easy"
102
+
103
+ env_name, difficulty = get_curriculum_environment(0.1, 0)
104
+ assert difficulty == "ultra-easy"
105
+
106
+ # Test beginning (0.2-0.4) -> easy
107
+ env_name, difficulty = get_curriculum_environment(0.3, 0)
108
+ assert difficulty == "easy"
109
+
110
+ # Test intermediate (0.4-0.6) -> medium
111
+ env_name, difficulty = get_curriculum_environment(0.5, 0)
112
+ assert difficulty == "medium"
113
+
114
+ # Test advanced (0.6-0.8) -> hard
115
+ env_name, difficulty = get_curriculum_environment(0.7, 0)
116
+ assert difficulty == "hard"
117
+
118
+ # Test expert (0.8-0.9) -> ultra-hard
119
+ env_name, difficulty = get_curriculum_environment(0.85, 0)
120
+ assert difficulty == "ultra-hard"
121
+
122
+ # Test master (0.9-1.0) -> specialized
123
+ env_name, difficulty = get_curriculum_environment(0.95, 0)
124
+ assert difficulty == "specialized"
125
+
126
+ def test_environment_validation(self):
127
+ """Test environment name validation."""
128
+ # Valid environments
129
+ assert validate_environment_name("MiniGrid-Empty-5x5-v0") is True
130
+ assert validate_environment_name("MiniGrid-FourRooms-v0") is True
131
+ assert validate_environment_name("MiniGrid-DoorKey-5x5-v0") is True
132
+
133
+ # Invalid environments
134
+ assert validate_environment_name("MiniGrid-NonExistent-v0") is False
135
+ assert validate_environment_name("InvalidEnv") is False
136
+ assert validate_environment_name("") is False
137
+
138
+ def test_get_all_environments(self):
139
+ """Test getting all environments."""
140
+ all_envs = get_all_environments()
141
+
142
+ assert len(all_envs) == 60
143
+ assert "MiniGrid-Empty-5x5-v0" in all_envs
144
+ assert "MiniGrid-FourRooms-v0" in all_envs
145
+ assert "MiniGrid-Empty-16x16-v0" in all_envs
146
+
147
+ # Check no duplicates
148
+ assert len(all_envs) == len(set(all_envs))
149
+
150
+ def test_get_environments_by_difficulty(self):
151
+ """Test getting environments by difficulty level."""
152
+ # Test each difficulty level
153
+ ultra_easy = get_environments_by_difficulty("ultra-easy")
154
+ assert len(ultra_easy) == 5 # Seeds 0-4
155
+
156
+ easy = get_environments_by_difficulty("easy")
157
+ assert len(easy) == 10 # Seeds 5-14
158
+
159
+ medium = get_environments_by_difficulty("medium")
160
+ assert len(medium) == 15 # Seeds 15-29
161
+
162
+ hard = get_environments_by_difficulty("hard")
163
+ assert len(hard) == 15 # Seeds 30-44
164
+
165
+ ultra_hard = get_environments_by_difficulty("ultra-hard")
166
+ assert len(ultra_hard) == 10 # Seeds 45-54
167
+
168
+ specialized = get_environments_by_difficulty("specialized")
169
+ assert len(specialized) == 5 # Seeds 55-59
170
+
171
+ # Test invalid difficulty
172
+ with pytest.raises(ValueError, match="Unknown difficulty"):
173
+ get_environments_by_difficulty("invalid")
174
+
175
+ def test_hash_seed_parameter(self):
176
+ """Test the hash_seed parameter for better distribution."""
177
+ # Test with hash_seed=True (default)
178
+ env1 = get_environment_from_seed(100, hash_seed=True)
179
+ diff1 = get_difficulty_from_seed(100, hash_seed=True)
180
+
181
+ # Test with hash_seed=False
182
+ env2 = get_environment_from_seed(100, hash_seed=False)
183
+ diff2 = get_difficulty_from_seed(100, hash_seed=False)
184
+
185
+ # Should be different due to hash distribution
186
+ # (Note: This might occasionally fail due to hash collisions, but very unlikely)
187
+ assert env1 in ENVIRONMENT_MAPPING.values()
188
+ assert env2 in ENVIRONMENT_MAPPING.values()
189
+
190
+ # Test consistency
191
+ assert get_environment_from_seed(100, hash_seed=True) == env1
192
+ assert get_environment_from_seed(100, hash_seed=False) == env2
193
+
194
+ def test_environment_mapping_completeness(self):
195
+ """Test that the environment mapping is complete."""
196
+ # Test all 60 environments are mapped
197
+ assert len(ENVIRONMENT_MAPPING) == 60
198
+
199
+ # Test all indices 0-59 are present
200
+ for i in range(60):
201
+ assert i in ENVIRONMENT_MAPPING
202
+
203
+ # Test all environments are unique
204
+ env_names = list(ENVIRONMENT_MAPPING.values())
205
+ assert len(env_names) == len(set(env_names))
206
+
207
+ def test_difficulty_mapping_completeness(self):
208
+ """Test that the difficulty mapping is complete."""
209
+ # Test all difficulty levels are mapped
210
+ expected_difficulties = [
211
+ "ultra-easy",
212
+ "easy",
213
+ "medium",
214
+ "hard",
215
+ "ultra-hard",
216
+ "specialized",
217
+ ]
218
+ assert set(DIFFICULTY_MAPPING.keys()) == set(expected_difficulties)
219
+
220
+ # Test ranges are correct
221
+ assert DIFFICULTY_MAPPING["ultra-easy"] == (0, 4)
222
+ assert DIFFICULTY_MAPPING["easy"] == (5, 14)
223
+ assert DIFFICULTY_MAPPING["medium"] == (15, 29)
224
+ assert DIFFICULTY_MAPPING["hard"] == (30, 44)
225
+ assert DIFFICULTY_MAPPING["ultra-hard"] == (45, 54)
226
+ assert DIFFICULTY_MAPPING["specialized"] == (55, 59)
227
+
228
+ # Test ranges are contiguous and complete
229
+ ranges = list(DIFFICULTY_MAPPING.values())
230
+ ranges.sort()
231
+
232
+ current_end = -1
233
+ for start, end in ranges:
234
+ assert start == current_end + 1
235
+ current_end = end
236
+
237
+ assert current_end == 59
238
+
239
+ def test_reproducibility(self):
240
+ """Test that seed mapping is reproducible."""
241
+ test_seeds = [0, 42, 1337, -5, 999]
242
+
243
+ for seed in test_seeds:
244
+ # Multiple calls should return same result
245
+ env1 = get_environment_from_seed(seed)
246
+ env2 = get_environment_from_seed(seed)
247
+ assert env1 == env2
248
+
249
+ diff1 = get_difficulty_from_seed(seed)
250
+ diff2 = get_difficulty_from_seed(seed)
251
+ assert diff1 == diff2
252
+
253
+ def test_extreme_seeds(self):
254
+ """Test with extreme seed values."""
255
+ extreme_seeds = [
256
+ 0,
257
+ 1,
258
+ -1,
259
+ 999999,
260
+ -999999,
261
+ 2**31 - 1,
262
+ -(2**31), # 32-bit int limits
263
+ 2**63 - 1,
264
+ -(2**63), # 64-bit int limits
265
+ ]
266
+
267
+ for seed in extreme_seeds:
268
+ env_name = get_environment_from_seed(seed)
269
+ difficulty = get_difficulty_from_seed(seed)
270
+
271
+ assert env_name in ENVIRONMENT_MAPPING.values()
272
+ assert difficulty in DIFFICULTY_MAPPING.keys()
273
+
274
+ def test_comprehensive_mapping_coverage(self):
275
+ """Test that all environments in the mapping are properly categorized."""
276
+ # Test that every environment has proper features
277
+ feature_keywords = {
278
+ "key": ["DoorKey", "Unlock", "KeyCorridor"],
279
+ "door": ["Door", "Room", "Unlock"],
280
+ "lava": ["Lava"],
281
+ "memory": ["Memory"],
282
+ "fetch": ["Fetch"],
283
+ "multi_room": ["MultiRoom"],
284
+ "crossing": ["Crossing"],
285
+ "maze": ["Maze"],
286
+ "empty": ["Empty"],
287
+ }
288
+
289
+ for env_name in ENVIRONMENT_MAPPING.values():
290
+ # Each environment should have at least one recognizable feature
291
+ has_feature = False
292
+ for feature, keywords in feature_keywords.items():
293
+ if any(keyword in env_name for keyword in keywords):
294
+ has_feature = True
295
+ break
296
+
297
+ # Some environments might not match these patterns, that's OK
298
+ # This is just a sanity check, not a strict requirement
299
+ assert isinstance(env_name, str)
300
+ assert env_name.startswith("MiniGrid-")
301
+ assert env_name.endswith("-v0")