synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,119 @@
1
+ """
2
+ test_sokoban_environment.py – A*‑style search and replay, but through the
3
+ *SokobanEnvironment* API (initialize/step/checkpoint) rather than talking to
4
+ SokobanEngine directly.
5
+ """
6
+
7
+ import asyncio
8
+ from typing import List, Dict, Any
9
+ from uuid import uuid4
10
+
11
+ import pytest
12
+
13
+ # ––––– app imports ––––– #
14
+ from synth_ai.environments.examples.sokoban.environment import SokobanEnvironment # <- your wrapper
15
+ from synth_ai.environments.examples.sokoban.engine import (
16
+ SokobanEngineSnapshot,
17
+ ) # same snapshot type
18
+ from synth_ai.environments.environment.tools import EnvToolCall # call interface
19
+
20
+ from synth_ai.environments.examples.sokoban.taskset import (
21
+ SokobanTaskInstanceMetadata,
22
+ SokobanTaskInstance,
23
+ )
24
+ from synth_ai.environments.tasks.core import Impetus, Intent
25
+
26
+ # shared A* / heuristic utilities
27
+ from synth_ai.environments.examples.sokoban.units.astar_common import (
28
+ ENGINE_ASTAR,
29
+ solved,
30
+ ) # Use ENGINE_ASTAR
31
+
32
+
33
+ # ---------------- test fixture snapshot ---------------------------------- #
34
+ # solvable in exactly two actions: push-right, push-up
35
+ SIMPLE_SNAPSHOT: Dict[str, Any] = {
36
+ "dim_room": [4, 4],
37
+ "room_fixed": [
38
+ [0, 0, 0, 0],
39
+ [0, 1, 2, 1], # target at (1,2)
40
+ [0, 1, 1, 1],
41
+ [0, 0, 0, 0],
42
+ ],
43
+ "room_state": [
44
+ [0, 0, 0, 0],
45
+ [0, 1, 1, 1],
46
+ [0, 1, 4, 1], # box at (2,2)
47
+ [0, 5, 1, 1], # player at (3,1)
48
+ ],
49
+ "boxes_on_target": 0,
50
+ "max_steps": 10,
51
+ "num_boxes": 1,
52
+ }
53
+
54
+
55
+ # helper: tiny wrapper so we don't depend on full EnvToolCall implementation
56
+ class Move(EnvToolCall): # type: ignore[misc]
57
+ def __init__(self, action: int):
58
+ self.action = action
59
+
60
+
61
+ # replay helper --------------------------------------------------------- #
62
+ async def replay(env: SokobanEnvironment, start: SokobanEngineSnapshot, plan: List[int]) -> bool:
63
+ """Re-run actions from start snapshot and verify solved state."""
64
+ current_env = await SokobanEnvironment._deserialize_engine(start)
65
+ for a in plan:
66
+ await current_env.step([[Move(a)]])
67
+ return solved(current_env.engine)
68
+
69
+
70
+ # ----------------------------- test -------------------------------------- #
71
+ @pytest.mark.asyncio
72
+ async def test_environment_solve_and_replay():
73
+ # build minimal TaskInstance
74
+ meta = SokobanTaskInstanceMetadata(
75
+ difficulty="easy",
76
+ num_boxes=1,
77
+ dim_room=(4, 4),
78
+ max_steps=10,
79
+ shortest_path_length=-1,
80
+ seed=-1,
81
+ generation_params="unit‑test",
82
+ )
83
+ ti = SokobanTaskInstance(
84
+ id=uuid4(),
85
+ impetus=Impetus(instructions="solve"),
86
+ intent=Intent(rubric={}, gold_trajectories=None, gold_state_diff={}),
87
+ metadata=meta,
88
+ is_reproducible=True,
89
+ initial_engine_snapshot=SIMPLE_SNAPSHOT,
90
+ )
91
+
92
+ env = SokobanEnvironment(ti)
93
+ await env.initialize()
94
+
95
+ # speed-up: disable image rendering inside gym-sokoban
96
+ env.engine.package_sokoban_env.observation_mode = "raw"
97
+
98
+ root_snapshot = await env._serialize_engine()
99
+
100
+ # plan search – use the engine step to avoid costly renders
101
+ # Use ENGINE_ASTAR which is set up for engine-level operations
102
+ plan = await ENGINE_ASTAR(
103
+ env.engine, # Pass the engine directly
104
+ max_nodes=200, # tighter breaker
105
+ )
106
+ assert plan, "Environment A* failed to find a plan"
107
+ assert len(plan) == 2 # expect the 2-move solution
108
+
109
+ # verify replay
110
+ replayed_successfully = await replay(env, root_snapshot, plan)
111
+ assert replayed_successfully, "Plan did not solve the puzzle upon replay"
112
+ print(
113
+ f"Test passed: Plan {plan} (length {len(plan)}) replayed successfully and solved the puzzle."
114
+ )
115
+
116
+
117
+ if __name__ == "__main__":
118
+ asyncio.run(test_environment_solve_and_replay())
119
+ pass
@@ -0,0 +1,98 @@
1
+ """
2
+ test_sokoban_environment.py – A*‑style search and replay, but through the
3
+ *SokobanEnvironment* API (initialize/step/checkpoint) rather than talking to
4
+ SokobanEngine directly.
5
+ """
6
+
7
+ import asyncio
8
+ from typing import List, Dict, Any
9
+ from uuid import uuid4
10
+
11
+ import pytest
12
+
13
+ # ––––– app imports ––––– #
14
+ from synth_ai.environments.examples.sokoban.environment import SokobanEnvironment
15
+ from synth_ai.environments.examples.sokoban.engine import SokobanEngineSnapshot
16
+ from synth_ai.environments.environment.tools import EnvToolCall
17
+
18
+ # shared A* / heuristic utilities
19
+ from synth_ai.environments.examples.sokoban.units.astar_common import astar, solved
20
+
21
+ from synth_ai.environments.examples.sokoban.taskset import (
22
+ SokobanTaskInstanceMetadata,
23
+ SokobanTaskInstance,
24
+ )
25
+ from synth_ai.environments.tasks.core import Impetus, Intent
26
+
27
+
28
+ # ---------------- test fixture snapshot ---------------------------------- #
29
+ SIMPLE_SNAPSHOT: Dict[str, Any] = {
30
+ "dim_room": [4, 4],
31
+ "room_fixed": [[0, 0, 0, 0], [0, 2, 1, 0], [0, 1, 0, 0], [0, 0, 0, 0]],
32
+ "room_state": [[0, 0, 0, 0], [0, 1, 4, 0], [0, 5, 0, 0], [0, 0, 0, 0]],
33
+ "boxes_on_target": 0,
34
+ "max_steps": 10,
35
+ "num_boxes": 1,
36
+ }
37
+
38
+
39
+ # helper: tiny wrapper so we don't depend on full EnvToolCall implementation
40
+ class Move(EnvToolCall): # type: ignore[misc]
41
+ def __init__(self, action: int):
42
+ self.action = action
43
+
44
+
45
+ async def replay(env: SokobanEnvironment, start: SokobanEngineSnapshot, plan: List[int]) -> bool:
46
+ env = await SokobanEnvironment._deserialize_engine(start)
47
+ for a in plan:
48
+ await env.step([[Move(a)]])
49
+ return solved(env)
50
+
51
+
52
+ # ----------------------------- test -------------------------------------- #
53
+ @pytest.mark.asyncio
54
+ async def test_environment_solve_and_replay():
55
+ # build minimal TaskInstance
56
+ meta = SokobanTaskInstanceMetadata(
57
+ difficulty="easy",
58
+ num_boxes=1,
59
+ dim_room=(4, 4),
60
+ max_steps=10,
61
+ shortest_path_length=-1,
62
+ seed=-1,
63
+ generation_params="unit‑test",
64
+ )
65
+ ti = SokobanTaskInstance(
66
+ id=uuid4(),
67
+ impetus=Impetus(instructions="solve"),
68
+ intent=Intent(rubric={}, gold_trajectories=None, gold_state_diff={}),
69
+ metadata=meta,
70
+ is_reproducible=True,
71
+ initial_engine_snapshot=SIMPLE_SNAPSHOT,
72
+ )
73
+
74
+ env = SokobanEnvironment(ti)
75
+ await env.initialize()
76
+
77
+ # speed-up: disable image rendering inside gym-sokoban
78
+ env.engine.package_sokoban_env.observation_mode = "raw"
79
+
80
+ root_snapshot = await env._serialize_engine()
81
+
82
+ # plan search (pass custom step_fn using our Move wrapper)
83
+ plan = await astar(
84
+ root_obj=env,
85
+ step_fn=lambda e, act: e.step([[Move(act)]]), # Renamed action to act to avoid conflict
86
+ deserialize_fn=SokobanEnvironment._deserialize_engine,
87
+ max_nodes=500, # circuit-breaker
88
+ )
89
+ assert plan, "Environment A* failed to find a plan"
90
+ assert len(plan) == 2 # expect the 2-move solution
91
+
92
+ # verify replay
93
+ assert await replay(env, root_snapshot, plan)
94
+
95
+
96
+ if __name__ == "__main__":
97
+ asyncio.run(test_environment_solve_and_replay())
98
+ pass
@@ -0,0 +1,364 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ mcts_sokoban_env_example.py
4
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
+ Tiny Monte-Carlo-Tree-Search demo that
6
+ • wraps a 4×4 toy Sokoban level in `SokobanEnvironment`
7
+ • stores every state in a FilesystemSnapshotStore
8
+ • expands / rolls-out with a TrajectoryTreeStore
9
+ • returns the most visited root-child as the "plan"
10
+
11
+ Run with pytest: pytest Environments/examples/sokoban/units/test_tree.py
12
+ """
13
+
14
+ import asyncio
15
+ import gzip
16
+ import pickle
17
+ import time
18
+ import logging
19
+ import uuid
20
+ from pathlib import Path
21
+
22
+ import numpy as np
23
+ import pytest
24
+
25
+ from synth_ai.environments.reproducibility.tree import FilesystemSnapshotStore, TrajectoryTreeStore
26
+ from synth_ai.environments.examples.sokoban.taskset import (
27
+ SokobanTaskInstance,
28
+ SokobanTaskInstanceMetadata,
29
+ )
30
+
31
+ # from examples.sokoban.engine import SokobanEngineSnapshot # only a type
32
+ from synth_ai.environments.tasks.core import Impetus, Intent
33
+ from synth_ai.environments.examples.sokoban.environment import SokobanEnvironment
34
+ from synth_ai.environments.examples.sokoban.units.astar_common import ENGINE_ASTAR # A* helper
35
+ from gym_sokoban.envs.sokoban_env import ACTION_LOOKUP # Added for full action set
36
+
37
+ logging.basicConfig(level=logging.DEBUG, format="%(message)s")
38
+ LOG = logging.getLogger("mcts-debug")
39
+
40
+ # ─────────────────────────── toy level ──────────────────────────────── #
41
+
42
+ SNAP = {
43
+ "dim_room": [4, 4],
44
+ "room_fixed": [
45
+ [0, 0, 0, 0],
46
+ [0, 1, 2, 1], # target at (1,2)
47
+ [0, 1, 1, 1],
48
+ [0, 0, 0, 0],
49
+ ],
50
+ "room_state": [
51
+ [0, 0, 0, 0],
52
+ [0, 1, 1, 1],
53
+ [0, 1, 4, 1], # box at (2,2)
54
+ [0, 5, 1, 1], # player at (3,1)
55
+ ],
56
+ "boxes_on_target": 0,
57
+ "max_steps": 10,
58
+ "num_boxes": 1,
59
+ }
60
+
61
+
62
+ # ─────────────────────────── env wrapper ────────────────────────────── #
63
+ # (import placed here to avoid circulars; uses the code you pasted)
64
+
65
+ # ─────────────────────────── helpers ─────────────────────────────────── #
66
+
67
+
68
+ def solved(env: SokobanEnvironment) -> bool:
69
+ """All targets covered?"""
70
+ eng = env.engine
71
+ return eng.package_sokoban_env.boxes_on_target == np.sum(
72
+ eng.package_sokoban_env.room_fixed == 2
73
+ )
74
+
75
+
76
+ async def debug_basic_actions():
77
+ """Debug function to test basic action execution"""
78
+ inst = SokobanTaskInstance(
79
+ id=uuid.uuid4(),
80
+ impetus=Impetus(instructions="solve"),
81
+ intent=Intent(rubric={}, gold_trajectories=None, gold_state_diff={}),
82
+ metadata=SokobanTaskInstanceMetadata(
83
+ difficulty="easy",
84
+ num_boxes=1,
85
+ dim_room=(4, 4),
86
+ max_steps=10,
87
+ shortest_path_length=-1,
88
+ seed=0,
89
+ generation_params="debug",
90
+ ),
91
+ is_reproducible=True,
92
+ initial_engine_snapshot=SNAP,
93
+ )
94
+ env = SokobanEnvironment(inst)
95
+ await env.initialize()
96
+
97
+ LOG.debug("Initial state:")
98
+ LOG.debug(f"player @ {env.engine.package_sokoban_env.player_position}")
99
+ LOG.debug(f"room_state:\n{env.engine.package_sokoban_env.room_state}")
100
+ LOG.debug(f"room_fixed:\n{env.engine.package_sokoban_env.room_fixed}")
101
+ LOG.debug(f"boxes_on_target AFTER INIT: {env.engine.package_sokoban_env.boxes_on_target}")
102
+ LOG.debug(
103
+ f"Actual count of boxes on target (value 3): {np.sum(env.engine.package_sokoban_env.room_state == 3)}"
104
+ )
105
+ LOG.debug(f"Boxes at value 4: {np.sum(env.engine.package_sokoban_env.room_state == 4)}")
106
+ LOG.debug(f"Targets at value 2: {np.sum(env.engine.package_sokoban_env.room_fixed == 2)}")
107
+
108
+ # Try each action individually
109
+ for a in range(len(ACTION_LOOKUP)):
110
+ LOG.debug(f"\nTrying action {a} ({ACTION_LOOKUP[a]}):")
111
+ snapshot = await env._serialize_engine()
112
+ try:
113
+ test_env = await SokobanEnvironment._deserialize_engine(snapshot, inst)
114
+ LOG.debug(
115
+ f" Before: player @ {test_env.engine.package_sokoban_env.player_position}, boxes @ {np.argwhere((test_env.engine.package_sokoban_env.room_state == 3) | (test_env.engine.package_sokoban_env.room_state == 4))}"
116
+ )
117
+ await test_env.engine._step_engine(a)
118
+ LOG.debug(
119
+ f" After: player @ {test_env.engine.package_sokoban_env.player_position}, boxes @ {np.argwhere((test_env.engine.package_sokoban_env.room_state == 3) | (test_env.engine.package_sokoban_env.room_state == 4))}"
120
+ )
121
+ LOG.debug(
122
+ f" Action {a} succeeded, boxes_on_target: {test_env.engine.package_sokoban_env.boxes_on_target}"
123
+ )
124
+ except Exception as e:
125
+ LOG.debug(f" Action {a} failed: {e}")
126
+
127
+ return env
128
+
129
+
130
+ # ╰────────────────────────────────────────────────────────────────────╯ #
131
+
132
+
133
+ # ───────── greedy search that *writes/reads* via TrajectoryTreeStore ─────────
134
+ async def greedy_tree_mcts_plan(
135
+ tree: TrajectoryTreeStore,
136
+ root_id: str,
137
+ task_instance: SokobanTaskInstance,
138
+ *,
139
+ rollouts_per_action: int = 50,
140
+ max_depth: int = 30,
141
+ timeout_s: float | None = None, # Added timeout parameter
142
+ ) -> tuple[list[int], list[dict[int, float]]]:
143
+ start = time.monotonic() # Start timer
144
+ plan, q_hist, node_id = [], [], root_id
145
+
146
+ for depth in range(max_depth):
147
+ LOG.debug(f"\n--- depth {depth} --- node={node_id[:6]}") # LOGGING
148
+ if timeout_s is not None and time.monotonic() - start >= timeout_s:
149
+ break # time budget exhausted
150
+
151
+ env_blob = tree.load_snapshot_blob(node_id)
152
+ env = await SokobanEnvironment._deserialize_engine(
153
+ pickle.loads(gzip.decompress(env_blob)), task_instance
154
+ )
155
+ LOG.debug(
156
+ f"player @ {env.engine.package_sokoban_env.player_position} boxes @ {np.argwhere((env.engine.package_sokoban_env.room_state == 3) | (env.engine.package_sokoban_env.room_state == 4))}"
157
+ ) # LOGGING
158
+
159
+ # legal_n = env.engine.package_sokoban_env.action_space.n # Old way
160
+ q_vals: dict[int, float] = {} # Initialize q_vals here
161
+ # enumerate every Sokoban action (4 moves + 4 pushes + no-op = 9)
162
+ for a in range(len(ACTION_LOOKUP)): # Use full ACTION_LOOKUP length
163
+ if timeout_s is not None and time.monotonic() - start >= timeout_s:
164
+ break # time budget exhausted in inner loop
165
+
166
+ action_type_log = ""
167
+ child_id = next(
168
+ (
169
+ cid
170
+ for cid in tree.get_children(node_id)
171
+ if tree.graph[node_id][cid]["action"] == a
172
+ ),
173
+ None,
174
+ )
175
+
176
+ if child_id is None: # expand once
177
+ action_type_log = f"expand a={a}" # Store log message
178
+ # Create a new environment from the current env state for stepping
179
+ tmp_env_for_step = await SokobanEnvironment._deserialize_engine(
180
+ pickle.loads(gzip.decompress(env_blob)),
181
+ task_instance, # Re-deserialize parent to ensure clean state for step
182
+ )
183
+ try:
184
+ await tmp_env_for_step.engine._step_engine(a)
185
+ except Exception: # Catch potential errors from illegal actions
186
+ # q_vals[a] = -1.0 # No q-value assigned if action is illegal and cannot be expanded
187
+ LOG.debug(f"illegal expand a={a}, skipping") # Log illegal action here
188
+ continue # illegal → skip
189
+ cid_blob = gzip.compress(pickle.dumps(await tmp_env_for_step._serialize_engine()))
190
+ child_id = tree.add_child(
191
+ node_id,
192
+ cid_blob,
193
+ action=a,
194
+ reward=0.0,
195
+ terminated=solved(tmp_env_for_step),
196
+ info={},
197
+ )
198
+ else:
199
+ action_type_log = f"reuse a={a}" # Store log message
200
+
201
+ # deterministic rollout: A* from child snapshot
202
+ if child_id is None:
203
+ # This case should ideally be hit if the 'continue' for illegal expansion was triggered.
204
+ # No valid child_id means no Q-value can be computed.
205
+ continue
206
+
207
+ child_env = await SokobanEnvironment._deserialize_engine(
208
+ pickle.loads(gzip.decompress(tree.load_snapshot_blob(child_id))),
209
+ task_instance,
210
+ )
211
+ # run A* on the *engine*, not the env wrapper
212
+ path = await ENGINE_ASTAR(child_env.engine, max_nodes=1_000) # try to solve
213
+
214
+ # Calculate Q-value considering the cost of the current action 'a'
215
+ if path is None: # search failed / gave up
216
+ q_value_for_a = 0.0
217
+ elif len(path) == 0: # child state already solved
218
+ # 1 for the current action 'a', but state is solved, so highest Q.
219
+ # The '1 + len(path)' for total_len doesn't strictly apply here in the same way,
220
+ # as no further steps are needed from A*.
221
+ # Assigning 1.0 directly makes it the best possible Q.
222
+ q_value_for_a = (
223
+ 1.0 # best possible, accounts for the step taken to reach solved state.
224
+ )
225
+ else: # solution in |path| further steps
226
+ # cost of taking the action itself ↓
227
+ total_len = 1 + len(path) # 1 = the step 'a' + A* path from child
228
+ q_value_for_a = 1.0 / (
229
+ 1 + total_len
230
+ ) # shorter total → higher Q. Effectively 1.0 / (2 + len(path))
231
+
232
+ q_vals[a] = q_value_for_a
233
+
234
+ # Prepare path string for logging
235
+ if path is None:
236
+ path_str = "No solution found / A* failed"
237
+ elif path == []:
238
+ path_str = "✓ (already solved)"
239
+ else:
240
+ path_str = str(path)
241
+ LOG.debug(
242
+ f"{action_type_log}, Q={q_value_for_a:.4f}, Path={path_str}"
243
+ ) # Log action type, Q, and A* path
244
+
245
+ if not q_vals: # No actions evaluated, possibly due to timeout or all actions illegal
246
+ break
247
+ LOG.debug(f"Q={q_vals}") # LOGGING
248
+
249
+ q_hist.append(q_vals)
250
+
251
+ current_children_ids = tree.get_children(node_id)
252
+ if current_children_ids is None:
253
+ current_children_ids = [] # Ensure it's an iterable for the comprehension
254
+
255
+ valid_actions = {
256
+ action_key: q_value
257
+ for action_key, q_value in q_vals.items()
258
+ # Ensure that the action resulted in an actual child node being added to the tree
259
+ if any(
260
+ tree.graph[node_id][child_id_loop]["action"] == action_key
261
+ for child_id_loop in current_children_ids
262
+ )
263
+ }
264
+
265
+ if not valid_actions:
266
+ # This means no actions evaluated (or timed out) or none of the evaluated actions
267
+ # correspond to an actual created child node (e.g., all were illegal).
268
+ LOG.debug(f"No valid actions available at depth {depth}. Stopping MCTS.")
269
+ break
270
+
271
+ best_a = max(valid_actions, key=valid_actions.get)
272
+ plan.append(best_a)
273
+
274
+ # Move to child node
275
+ node_id = next(
276
+ cid_loop
277
+ for cid_loop in current_children_ids
278
+ if tree.graph[node_id][cid_loop]["action"] == best_a
279
+ )
280
+ LOG.debug(f"best={best_a} → new node={node_id[:6]}") # LOGGING
281
+ # Check if the chosen child is a terminal state, and break after adding final action
282
+ child_blob = tree.load_snapshot_blob(node_id)
283
+ child_env = await SokobanEnvironment._deserialize_engine(
284
+ pickle.loads(gzip.decompress(child_blob)), task_instance
285
+ )
286
+ if solved(child_env):
287
+ LOG.debug("Child node is terminal. Ending plan.")
288
+ break
289
+
290
+ return plan, q_hist
291
+
292
+
293
+ # ───────────────────── pytest driver (add this AFTER helpers) ─────────────
294
+ @pytest.mark.asyncio
295
+ async def test_mcts_sokoban_run(tmp_path: Path) -> None:
296
+ # 1) build an env around the tiny 4×4 level
297
+ inst = SokobanTaskInstance(
298
+ id=uuid.uuid4(),
299
+ impetus=Impetus(instructions="solve"),
300
+ intent=Intent(rubric={}, gold_trajectories=None, gold_state_diff={}),
301
+ metadata=SokobanTaskInstanceMetadata(
302
+ difficulty="easy",
303
+ num_boxes=1,
304
+ dim_room=(4, 4),
305
+ max_steps=10,
306
+ shortest_path_length=-1,
307
+ seed=0,
308
+ generation_params="demo",
309
+ ),
310
+ is_reproducible=True,
311
+ initial_engine_snapshot=SNAP,
312
+ )
313
+ env = SokobanEnvironment(inst)
314
+ await env.initialize()
315
+
316
+ # 2) root snapshot → tree
317
+ snap_store_path = tmp_path / "mcts_snaps"
318
+ tree = TrajectoryTreeStore(FilesystemSnapshotStore(snap_store_path))
319
+ root_blob = gzip.compress(pickle.dumps(await env._serialize_engine()))
320
+ root_id = tree.add_root(root_blob)
321
+
322
+ # Diagnostic: Test A* directly on the root SNAP state
323
+ diag_env = await SokobanEnvironment._deserialize_engine(
324
+ pickle.loads(gzip.decompress(root_blob)), inst
325
+ )
326
+ LOG.debug("Diagnostic A* on initial state with max_nodes=5000:")
327
+ diag_path = await ENGINE_ASTAR(diag_env.engine, max_nodes=5000)
328
+ LOG.debug(f"Diagnostic A* path from root: {diag_path if diag_path else 'No solution found'}")
329
+
330
+ # 3) greedy tree search
331
+ plan, q_hist = await greedy_tree_mcts_plan(
332
+ tree,
333
+ root_id,
334
+ inst,
335
+ rollouts_per_action=50,
336
+ max_depth=30,
337
+ timeout_s=30.0,
338
+ )
339
+ print("plan:", plan)
340
+ print("q-history:", q_hist)
341
+ assert plan, "empty plan"
342
+
343
+ # 4) verify the plan solves the puzzle
344
+ checker_env = await SokobanEnvironment._deserialize_engine(
345
+ pickle.loads(gzip.decompress(root_blob)), inst
346
+ )
347
+ for a in plan:
348
+ await checker_env.engine._step_engine(a)
349
+ assert solved(checker_env), "plan did not solve the puzzle"
350
+
351
+
352
+ # Removed if __name__ == "__main__": block as pytest handles execution
353
+ if __name__ == "__main__":
354
+ import tempfile
355
+
356
+ async def main():
357
+ # First run debug to understand action issues
358
+ await debug_basic_actions()
359
+
360
+ # Then run the main test
361
+ with tempfile.TemporaryDirectory() as tmpdir:
362
+ await test_mcts_sokoban_run(Path(tmpdir))
363
+
364
+ asyncio.run(main())
@@ -0,0 +1 @@
1
+ # TicTacToe Environment Module