synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,331 @@
1
+ import random
2
+ import numpy as np
3
+ import marshal
4
+
5
+
6
+ def generate_room(
7
+ dim=(13, 13), p_change_directions=0.35, num_steps=25, num_boxes=3, tries=4, second_player=False
8
+ ):
9
+ """
10
+ Generates a Sokoban room, represented by an integer matrix. The elements are encoded as follows:
11
+ wall = 0
12
+ empty space = 1
13
+ box target = 2
14
+ box not on target = 3
15
+ box on target = 4
16
+ player = 5
17
+
18
+ :param dim:
19
+ :param p_change_directions:
20
+ :param num_steps:
21
+ :return: Numpy 2d Array
22
+ """
23
+ room_state = np.zeros(shape=dim)
24
+ room_structure = np.zeros(shape=dim)
25
+
26
+ # Some times rooms with a score == 0 are the only possibility.
27
+ # In these case, we try another model.
28
+ for t in range(tries):
29
+ room = room_topology_generation(dim, p_change_directions, num_steps)
30
+ room = place_boxes_and_player(room, num_boxes=num_boxes, second_player=second_player)
31
+
32
+ # Room fixed represents all not movable parts of the room
33
+ room_structure = np.copy(room)
34
+ room_structure[room_structure == 5] = 1
35
+
36
+ # Room structure represents the current state of the room including movable parts
37
+ room_state = room.copy()
38
+ room_state[room_state == 2] = 4
39
+
40
+ room_state, score, box_mapping = reverse_playing(room_state, room_structure)
41
+ room_state[room_state == 3] = 4
42
+
43
+ if score > 0:
44
+ break
45
+
46
+ if score == 0:
47
+ raise RuntimeWarning("Generated Model with score == 0")
48
+
49
+ return room_structure, room_state, box_mapping
50
+
51
+
52
+ def room_topology_generation(dim=(10, 10), p_change_directions=0.35, num_steps=15):
53
+ """
54
+ Generate a room topology, which consits of empty floors and walls.
55
+
56
+ :param dim:
57
+ :param p_change_directions:
58
+ :param num_steps:
59
+ :return:
60
+ """
61
+ dim_x, dim_y = dim
62
+
63
+ # The ones in the mask represent all fields which will be set to floors
64
+ # during the random walk. The centered one will be placed over the current
65
+ # position of the walk.
66
+ masks = [
67
+ [[0, 0, 0], [1, 1, 1], [0, 0, 0]],
68
+ [[0, 1, 0], [0, 1, 0], [0, 1, 0]],
69
+ [[0, 0, 0], [1, 1, 0], [0, 1, 0]],
70
+ [[0, 0, 0], [1, 1, 0], [1, 1, 0]],
71
+ [[0, 0, 0], [0, 1, 1], [0, 1, 0]],
72
+ ]
73
+
74
+ # Possible directions during the walk
75
+ directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
76
+ direction = random.sample(directions, 1)[0]
77
+
78
+ # Starting position of random walk
79
+ position = np.array([random.randint(1, dim_x - 1), random.randint(1, dim_y - 1)])
80
+
81
+ level = np.zeros(dim, dtype=int)
82
+
83
+ for s in range(num_steps):
84
+ # Change direction randomly
85
+ if random.random() < p_change_directions:
86
+ direction = random.sample(directions, 1)[0]
87
+
88
+ # Update position
89
+ position = position + direction
90
+ position[0] = max(min(position[0], dim_x - 2), 1)
91
+ position[1] = max(min(position[1], dim_y - 2), 1)
92
+
93
+ # Apply mask
94
+ mask = random.sample(masks, 1)[0]
95
+ mask_start = position - 1
96
+ level[mask_start[0] : mask_start[0] + 3, mask_start[1] : mask_start[1] + 3] += mask
97
+
98
+ level[level > 0] = 1
99
+ level[:, [0, dim_y - 1]] = 0
100
+ level[[0, dim_x - 1], :] = 0
101
+
102
+ return level
103
+
104
+
105
+ def place_boxes_and_player(room, num_boxes, second_player):
106
+ """
107
+ Places the player and the boxes into the floors in a room.
108
+
109
+ :param room:
110
+ :param num_boxes:
111
+ :return:
112
+ """
113
+ # Get all available positions
114
+ possible_positions = np.where(room == 1)
115
+ num_possible_positions = possible_positions[0].shape[0]
116
+ num_players = 2 if second_player else 1
117
+
118
+ if num_possible_positions <= num_boxes + num_players:
119
+ raise RuntimeError(
120
+ "Not enough free spots (#{}) to place {} player and {} boxes.".format(
121
+ num_possible_positions, num_players, num_boxes
122
+ )
123
+ )
124
+
125
+ # Place player(s)
126
+ ind = np.random.randint(num_possible_positions)
127
+ player_position = possible_positions[0][ind], possible_positions[1][ind]
128
+ room[player_position] = 5
129
+
130
+ if second_player:
131
+ ind = np.random.randint(num_possible_positions)
132
+ player_position = possible_positions[0][ind], possible_positions[1][ind]
133
+ room[player_position] = 5
134
+
135
+ # Place boxes
136
+ for n in range(num_boxes):
137
+ possible_positions = np.where(room == 1)
138
+ num_possible_positions = possible_positions[0].shape[0]
139
+
140
+ ind = np.random.randint(num_possible_positions)
141
+ box_position = possible_positions[0][ind], possible_positions[1][ind]
142
+ room[box_position] = 2
143
+
144
+ return room
145
+
146
+
147
+ # Global variables used for reverse playing.
148
+ explored_states = set()
149
+ num_boxes = 0
150
+ best_room_score = -1
151
+ best_room = None
152
+ best_box_mapping = None
153
+
154
+
155
+ def reverse_playing(room_state, room_structure, search_depth=100):
156
+ """
157
+ This function plays Sokoban reverse in a way, such that the player can
158
+ move and pull boxes.
159
+ It ensures a solvable level with all boxes not being placed on a box target.
160
+ :param room_state:
161
+ :param room_structure:
162
+ :param search_depth:
163
+ :return: 2d array
164
+ """
165
+ global explored_states, num_boxes, best_room_score, best_room, best_box_mapping
166
+
167
+ # Box_Mapping is used to calculate the box displacement for every box
168
+ box_mapping = {}
169
+ box_locations = np.where(room_structure == 2)
170
+ num_boxes = len(box_locations[0])
171
+ for l in range(num_boxes):
172
+ box = (box_locations[0][l], box_locations[1][l])
173
+ box_mapping[box] = box
174
+
175
+ # explored_states globally stores the best room state and score found during search
176
+ explored_states = set()
177
+ best_room_score = -1
178
+ best_box_mapping = box_mapping
179
+ depth_first_search(
180
+ room_state, room_structure, box_mapping, box_swaps=0, last_pull=(-1, -1), ttl=300
181
+ )
182
+
183
+ return best_room, best_room_score, best_box_mapping
184
+
185
+
186
+ def depth_first_search(
187
+ room_state, room_structure, box_mapping, box_swaps=0, last_pull=(-1, -1), ttl=300
188
+ ):
189
+ """
190
+ Searches through all possible states of the room.
191
+ This is a recursive function, which stops if the tll is reduced to 0 or
192
+ over 1.000.000 states have been explored.
193
+ :param room_state:
194
+ :param room_structure:
195
+ :param box_mapping:
196
+ :param box_swaps:
197
+ :param last_pull:
198
+ :param ttl:
199
+ :return:
200
+ """
201
+ global explored_states, num_boxes, best_room_score, best_room, best_box_mapping
202
+
203
+ ttl -= 1
204
+ if ttl <= 0 or len(explored_states) >= 300000:
205
+ return
206
+
207
+ state_tohash = marshal.dumps(room_state)
208
+
209
+ # Only search this state, if it not yet has been explored
210
+ if not (state_tohash in explored_states):
211
+ # Add current state and its score to explored states
212
+ room_score = box_swaps * box_displacement_score(box_mapping)
213
+ if np.where(room_state == 2)[0].shape[0] != num_boxes:
214
+ room_score = 0
215
+
216
+ if room_score > best_room_score:
217
+ best_room = room_state
218
+ best_room_score = room_score
219
+ best_box_mapping = box_mapping
220
+
221
+ explored_states.add(state_tohash)
222
+
223
+ for action in ACTION_LOOKUP.keys():
224
+ # The state and box mapping need to be copied to ensure
225
+ # every action start from a similar state.
226
+ room_state_next = room_state.copy()
227
+ box_mapping_next = box_mapping.copy()
228
+
229
+ room_state_next, box_mapping_next, last_pull_next = reverse_move(
230
+ room_state_next, room_structure, box_mapping_next, last_pull, action
231
+ )
232
+
233
+ box_swaps_next = box_swaps
234
+ if last_pull_next != last_pull:
235
+ box_swaps_next += 1
236
+
237
+ depth_first_search(
238
+ room_state_next, room_structure, box_mapping_next, box_swaps_next, last_pull, ttl
239
+ )
240
+
241
+
242
+ def reverse_move(room_state, room_structure, box_mapping, last_pull, action):
243
+ """
244
+ Perform reverse action. Where all actions in the range [0, 3] correspond to
245
+ push actions and the ones greater 3 are simmple move actions.
246
+ :param room_state:
247
+ :param room_structure:
248
+ :param box_mapping:
249
+ :param last_pull:
250
+ :param action:
251
+ :return:
252
+ """
253
+ player_position = np.where(room_state == 5)
254
+ player_position = np.array([player_position[0][0], player_position[1][0]])
255
+
256
+ change = CHANGE_COORDINATES[action % 4]
257
+ next_position = player_position + change
258
+
259
+ # Check if next position is an empty floor or an empty box target
260
+ if room_state[next_position[0], next_position[1]] in [1, 2]:
261
+ # Move player, independent of pull or move action.
262
+ room_state[player_position[0], player_position[1]] = room_structure[
263
+ player_position[0], player_position[1]
264
+ ]
265
+ room_state[next_position[0], next_position[1]] = 5
266
+
267
+ # In addition try to pull a box if the action is a pull action
268
+ if action < 4:
269
+ possible_box_location = change[0] * -1, change[1] * -1
270
+ possible_box_location += player_position
271
+
272
+ if room_state[possible_box_location[0], possible_box_location[1]] in [3, 4]:
273
+ # Perform pull of the adjacent box
274
+ room_state[player_position[0], player_position[1]] = 3
275
+ room_state[possible_box_location[0], possible_box_location[1]] = room_structure[
276
+ possible_box_location[0], possible_box_location[1]
277
+ ]
278
+
279
+ # Update the box mapping
280
+ for k in box_mapping.keys():
281
+ if box_mapping[k] == (possible_box_location[0], possible_box_location[1]):
282
+ box_mapping[k] = (player_position[0], player_position[1])
283
+ last_pull = k
284
+
285
+ return room_state, box_mapping, last_pull
286
+
287
+
288
+ def box_displacement_score(box_mapping):
289
+ """
290
+ Calculates the sum of all Manhattan distances, between the boxes
291
+ and their origin box targets.
292
+ :param box_mapping:
293
+ :return:
294
+ """
295
+ score = 0
296
+
297
+ for box_target in box_mapping.keys():
298
+ box_location = np.array(box_mapping[box_target])
299
+ box_target = np.array(box_target)
300
+ dist = np.sum(np.abs(box_location - box_target))
301
+ score += dist
302
+
303
+ return score
304
+
305
+
306
+ TYPE_LOOKUP = {
307
+ 0: "wall",
308
+ 1: "empty space",
309
+ 2: "box target",
310
+ 3: "box on target",
311
+ 4: "box not on target",
312
+ 5: "player",
313
+ }
314
+
315
+ ACTION_LOOKUP = {
316
+ 0: "push up",
317
+ 1: "push down",
318
+ 2: "push left",
319
+ 3: "push right",
320
+ 4: "move up",
321
+ 5: "move down",
322
+ 6: "move left",
323
+ 7: "move right",
324
+ }
325
+
326
+ # Moves are mapped to coordinate changes as follows
327
+ # 0: Move up
328
+ # 1: Move down
329
+ # 2: Move left
330
+ # 3: Move right
331
+ CHANGE_COORDINATES = {0: (-1, 0), 1: (1, 0), 2: (0, -1), 3: (0, 1)}
@@ -0,0 +1,305 @@
1
+ import gym
2
+ from gym.utils import seeding
3
+ from gym.spaces.discrete import Discrete
4
+ from gym.spaces import Box
5
+ from .room_utils import generate_room
6
+ from .render_utils import room_to_rgb, room_to_tiny_world_rgb
7
+ import numpy as np
8
+
9
+
10
+ class SokobanEnv(gym.Env):
11
+ metadata = {
12
+ "render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array", "raw"],
13
+ "render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array", "raw"],
14
+ }
15
+
16
+ def __init__(
17
+ self, dim_room=(10, 10), max_steps=120, num_boxes=4, num_gen_steps=None, reset=True
18
+ ):
19
+ # General Configuration
20
+ self.dim_room = dim_room
21
+ if num_gen_steps == None:
22
+ self.num_gen_steps = int(1.7 * (dim_room[0] + dim_room[1]))
23
+ else:
24
+ self.num_gen_steps = num_gen_steps
25
+
26
+ self.num_boxes = num_boxes
27
+ self.boxes_on_target = 0
28
+
29
+ # Penalties and Rewards
30
+ self.penalty_for_step = -0.1
31
+ self.penalty_box_off_target = -1
32
+ self.reward_box_on_target = 1
33
+ self.reward_finished = 10
34
+ self.reward_last = 0
35
+
36
+ # Other Settings
37
+ self.viewer = None
38
+ self.max_steps = max_steps
39
+ self.action_space = Discrete(len(ACTION_LOOKUP))
40
+ screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
41
+ self.observation_space = Box(
42
+ low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8
43
+ )
44
+
45
+ if reset:
46
+ # Initialize Room
47
+ _ = self.reset()
48
+
49
+ def seed(self, seed=None):
50
+ self.np_random, seed = seeding.np_random(seed)
51
+ return [seed]
52
+
53
+ def step(self, action, observation_mode="rgb_array"):
54
+ assert action in ACTION_LOOKUP
55
+ assert observation_mode in ["rgb_array", "tiny_rgb_array", "raw"]
56
+
57
+ self.num_env_steps += 1
58
+
59
+ self.new_box_position = None
60
+ self.old_box_position = None
61
+
62
+ moved_box = False
63
+
64
+ if action == 0:
65
+ moved_player = False
66
+
67
+ # All push actions are in the range of [0, 3]
68
+ elif action < 5:
69
+ moved_player, moved_box = self._push(action)
70
+
71
+ else:
72
+ moved_player = self._move(action)
73
+
74
+ self._calc_reward()
75
+
76
+ done = self._check_if_done()
77
+
78
+ # Convert the observation to RGB frame
79
+ observation = self.render(mode=observation_mode)
80
+
81
+ info = {
82
+ "action.name": ACTION_LOOKUP[action],
83
+ "action.moved_player": moved_player,
84
+ "action.moved_box": moved_box,
85
+ }
86
+ if done:
87
+ info["maxsteps_used"] = self._check_if_maxsteps()
88
+ info["all_boxes_on_target"] = self._check_if_all_boxes_on_target()
89
+
90
+ return observation, self.reward_last, done, info
91
+
92
+ def _push(self, action):
93
+ """
94
+ Perform a push, if a box is adjacent in the right direction.
95
+ If no box, can be pushed, try to move.
96
+ :param action:
97
+ :return: Boolean, indicating a change of the room's state
98
+ """
99
+ change = CHANGE_COORDINATES[(action - 1) % 4]
100
+ new_position = self.player_position + change
101
+ current_position = self.player_position.copy()
102
+
103
+ # No push, if the push would get the box out of the room's grid
104
+ new_box_position = new_position + change
105
+ if (
106
+ new_box_position[0] >= self.room_state.shape[0]
107
+ or new_box_position[1] >= self.room_state.shape[1]
108
+ ):
109
+ return False, False
110
+
111
+ can_push_box = self.room_state[new_position[0], new_position[1]] in [3, 4]
112
+ can_push_box &= self.room_state[new_box_position[0], new_box_position[1]] in [1, 2]
113
+ if can_push_box:
114
+ self.new_box_position = tuple(new_box_position)
115
+ self.old_box_position = tuple(new_position)
116
+
117
+ # Move Player
118
+ self.player_position = new_position
119
+ self.room_state[(new_position[0], new_position[1])] = 5
120
+ self.room_state[current_position[0], current_position[1]] = self.room_fixed[
121
+ current_position[0], current_position[1]
122
+ ]
123
+
124
+ # Move Box
125
+ box_type = 4
126
+ if self.room_fixed[new_box_position[0], new_box_position[1]] == 2:
127
+ box_type = 3
128
+ self.room_state[new_box_position[0], new_box_position[1]] = box_type
129
+ return True, True
130
+
131
+ # Try to move if no box to push, available
132
+ else:
133
+ return self._move(action), False
134
+
135
+ def _move(self, action):
136
+ """
137
+ Moves the player to the next field, if it is not occupied.
138
+ :param action:
139
+ :return: Boolean, indicating a change of the room's state
140
+ """
141
+ change = CHANGE_COORDINATES[(action - 1) % 4]
142
+ new_position = self.player_position + change
143
+ current_position = self.player_position.copy()
144
+
145
+ # Move player if the field in the moving direction is either
146
+ # an empty field or an empty box target.
147
+ if self.room_state[new_position[0], new_position[1]] in [1, 2]:
148
+ self.player_position = new_position
149
+ self.room_state[(new_position[0], new_position[1])] = 5
150
+ self.room_state[current_position[0], current_position[1]] = self.room_fixed[
151
+ current_position[0], current_position[1]
152
+ ]
153
+
154
+ return True
155
+
156
+ return False
157
+
158
+ def _calc_reward(self):
159
+ """
160
+ Calculate Reward Based on
161
+ :return:
162
+ """
163
+ # Every step a small penalty is given, This ensures
164
+ # that short solutions have a higher reward.
165
+ self.reward_last = self.penalty_for_step
166
+
167
+ # count boxes off or on the target
168
+ empty_targets = self.room_state == 2
169
+ player_on_target = (self.room_fixed == 2) & (self.room_state == 5)
170
+ total_targets = empty_targets | player_on_target
171
+
172
+ current_boxes_on_target = self.num_boxes - np.where(total_targets)[0].shape[0]
173
+
174
+ # Add the reward if a box is pushed on the target and give a
175
+ # penalty if a box is pushed off the target.
176
+ if current_boxes_on_target > self.boxes_on_target:
177
+ self.reward_last += self.reward_box_on_target
178
+ elif current_boxes_on_target < self.boxes_on_target:
179
+ self.reward_last += self.penalty_box_off_target
180
+
181
+ game_won = self._check_if_all_boxes_on_target()
182
+ if game_won:
183
+ self.reward_last += self.reward_finished
184
+
185
+ self.boxes_on_target = current_boxes_on_target
186
+
187
+ def _check_if_done(self):
188
+ # Check if the game is over either through reaching the maximum number
189
+ # of available steps or by pushing all boxes on the targets.
190
+ return self._check_if_all_boxes_on_target() or self._check_if_maxsteps()
191
+
192
+ def _check_if_all_boxes_on_target(self):
193
+ empty_targets = self.room_state == 2
194
+ player_hiding_target = (self.room_fixed == 2) & (self.room_state == 5)
195
+ are_all_boxes_on_targets = np.where(empty_targets | player_hiding_target)[0].shape[0] == 0
196
+ return are_all_boxes_on_targets
197
+
198
+ def _check_if_maxsteps(self):
199
+ return self.max_steps == self.num_env_steps
200
+
201
+ def reset(
202
+ self,
203
+ second_player: bool = False,
204
+ render_mode: str = "rgb_array",
205
+ seed=None,
206
+ *,
207
+ max_attempts: int = 5,
208
+ ):
209
+ if seed is not None:
210
+ self.seed(seed)
211
+
212
+ for attempt in range(max_attempts):
213
+ try:
214
+ self.room_fixed, self.room_state, self.box_mapping = generate_room(
215
+ dim=self.dim_room,
216
+ num_steps=self.num_gen_steps,
217
+ num_boxes=self.num_boxes,
218
+ second_player=second_player,
219
+ )
220
+ break # success
221
+ except (RuntimeError, RuntimeWarning) as e:
222
+ print(f"[SOKOBAN] {e} – retry {attempt + 1}/{max_attempts}")
223
+ if attempt == max_attempts - 1:
224
+ # fallback: trivial 3×3 room with player only
225
+ self.room_fixed = np.pad(np.ones((1, 1), dtype=int), 1, constant_values=0)
226
+ self.room_state = self.room_fixed.copy()
227
+ self.room_state[1, 1] = 5
228
+ self.box_mapping = {}
229
+
230
+ self.player_position = np.argwhere(self.room_state == 5)[0]
231
+ self.num_env_steps = 0
232
+ self.reward_last = 0
233
+ self.boxes_on_target = 0
234
+
235
+ return self.render(render_mode)
236
+
237
+ def render(self, mode="human", close=None, scale=1):
238
+ assert mode in RENDERING_MODES
239
+
240
+ img = self.get_image(mode, scale)
241
+
242
+ if "rgb_array" in mode:
243
+ return img
244
+
245
+ elif "human" in mode:
246
+ from gym.envs.classic_control import rendering
247
+
248
+ if self.viewer is None:
249
+ self.viewer = rendering.SimpleImageViewer()
250
+ self.viewer.imshow(img)
251
+ return self.viewer.isopen
252
+
253
+ elif "raw" in mode:
254
+ arr_walls = (self.room_fixed == 0).view(np.int8)
255
+ arr_goals = (self.room_fixed == 2).view(np.int8)
256
+ arr_boxes = ((self.room_state == 4) + (self.room_state == 3)).view(np.int8)
257
+ arr_player = (self.room_state == 5).view(np.int8)
258
+
259
+ return arr_walls, arr_goals, arr_boxes, arr_player
260
+
261
+ else:
262
+ super(SokobanEnv, self).render(mode=mode) # just raise an exception
263
+
264
+ def get_image(self, mode, scale=1):
265
+ if mode.startswith("tiny_"):
266
+ img = room_to_tiny_world_rgb(self.room_state, self.room_fixed, scale=scale)
267
+ else:
268
+ img = room_to_rgb(self.room_state, self.room_fixed)
269
+
270
+ return img
271
+
272
+ def close(self):
273
+ if self.viewer is not None:
274
+ self.viewer.close()
275
+
276
+ def set_maxsteps(self, num_steps):
277
+ self.max_steps = num_steps
278
+
279
+ def get_action_lookup(self):
280
+ return ACTION_LOOKUP
281
+
282
+ def get_action_meanings(self):
283
+ return ACTION_LOOKUP
284
+
285
+
286
+ ACTION_LOOKUP = {
287
+ 0: "no operation",
288
+ 1: "push up",
289
+ 2: "push down",
290
+ 3: "push left",
291
+ 4: "push right",
292
+ 5: "move up",
293
+ 6: "move down",
294
+ 7: "move left",
295
+ 8: "move right",
296
+ }
297
+
298
+ # Moves are mapped to coordinate changes as follows
299
+ # 0: Move up
300
+ # 1: Move down
301
+ # 2: Move left
302
+ # 3: Move right
303
+ CHANGE_COORDINATES = {0: (-1, 0), 1: (1, 0), 2: (0, -1), 3: (0, 1)}
304
+
305
+ RENDERING_MODES = ["rgb_array", "human", "tiny_rgb_array", "tiny_human", "raw"]
@@ -0,0 +1,66 @@
1
+ from .sokoban_env import SokobanEnv
2
+ from .render_utils import room_to_rgb_FT, room_to_tiny_world_rgb_FT
3
+ from gym.spaces import Box
4
+
5
+
6
+ class FixedTargetsSokobanEnv(SokobanEnv):
7
+ def __init__(self, dim_room=(10, 10), max_steps=120, num_boxes=3, num_gen_steps=None):
8
+ super(FixedTargetsSokobanEnv, self).__init__(dim_room, max_steps, num_boxes, num_gen_steps)
9
+ screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
10
+ self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3))
11
+ self.boxes_are_on_target = [False] * num_boxes
12
+
13
+ _ = self.reset()
14
+
15
+ def get_image(self, mode, scale=1):
16
+ if mode.startswith("tiny_"):
17
+ img = room_to_tiny_world_rgb_FT(
18
+ self.room_state, self.box_mapping, self.room_fixed, scale=scale
19
+ )
20
+ else:
21
+ img = room_to_rgb_FT(self.room_state, self.box_mapping, self.room_fixed)
22
+
23
+ return img
24
+
25
+ def step(self, action, observation_mode="rgb_array"):
26
+ observation, self.reward_last, done, info = super(FixedTargetsSokobanEnv, self).step(
27
+ action, observation_mode
28
+ )
29
+
30
+ return observation, self.reward_last, done, info
31
+
32
+ def _calc_reward(self):
33
+ self._update_box_mapping()
34
+
35
+ # Every step a small penalty is given, This ensures
36
+ # that short solutions have a higher reward.
37
+ self.reward_last = self.penalty_for_step
38
+
39
+ for b in range(len(self.boxes_are_on_target)):
40
+ previous_state = self.boxes_are_on_target[b]
41
+
42
+ # Calculate new state
43
+ box_id = list(self.box_mapping.keys())[b]
44
+ new_state = self.box_mapping[box_id] == box_id
45
+
46
+ if previous_state and not new_state:
47
+ # Box was pushed of its target
48
+ self.reward_last += self.penalty_box_off_target
49
+ elif not previous_state and new_state:
50
+ # box was pushed on its target
51
+ self.reward_last += self.reward_box_on_target
52
+
53
+ self.boxes_are_on_target[b] = new_state
54
+
55
+ def _update_box_mapping(self):
56
+ if self.new_box_position is not None:
57
+ box_index = list(self.box_mapping.values()).index(self.old_box_position)
58
+ box_id = list(self.box_mapping.keys())[box_index]
59
+ self.box_mapping[box_id] = self.new_box_position
60
+
61
+ def _check_if_all_boxes_on_target(self):
62
+ for key in self.box_mapping.keys():
63
+ if not key == self.box_mapping[key]:
64
+ return False
65
+
66
+ return True