synth-ai 0.2.4.dev4__py3-none-any.whl → 0.2.4.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. synth_ai/environments/examples/__init__.py +1 -0
  2. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  3. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  4. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  5. synth_ai/environments/examples/crafter_classic/engine.py +575 -0
  6. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  7. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  8. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  9. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
  10. synth_ai/environments/examples/crafter_classic/environment.py +364 -0
  11. synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
  12. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
  13. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
  14. synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
  15. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
  16. synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
  17. synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
  18. synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
  19. synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
  20. synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
  21. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
  22. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
  23. synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
  24. synth_ai/environments/examples/crafter_custom/environment.py +312 -0
  25. synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
  26. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  27. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  28. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  29. synth_ai/environments/examples/enron/engine.py +291 -0
  30. synth_ai/environments/examples/enron/environment.py +165 -0
  31. synth_ai/environments/examples/enron/taskset.py +112 -0
  32. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  33. synth_ai/environments/examples/minigrid/engine.py +589 -0
  34. synth_ai/environments/examples/minigrid/environment.py +274 -0
  35. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  36. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  37. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  38. synth_ai/environments/examples/nethack/__init__.py +7 -0
  39. synth_ai/environments/examples/nethack/achievements.py +337 -0
  40. synth_ai/environments/examples/nethack/engine.py +738 -0
  41. synth_ai/environments/examples/nethack/environment.py +255 -0
  42. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  43. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  44. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  45. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  46. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  47. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  48. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  49. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  50. synth_ai/environments/examples/nethack/taskset.py +323 -0
  51. synth_ai/environments/examples/red/__init__.py +7 -0
  52. synth_ai/environments/examples/red/config_logging.py +110 -0
  53. synth_ai/environments/examples/red/engine.py +693 -0
  54. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  55. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  56. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  57. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  58. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  59. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  60. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  61. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  62. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  63. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  64. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  65. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  66. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  67. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  68. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  69. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  70. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  71. synth_ai/environments/examples/red/environment.py +235 -0
  72. synth_ai/environments/examples/red/taskset.py +77 -0
  73. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  74. synth_ai/environments/examples/sokoban/engine.py +675 -0
  75. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  76. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  77. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  78. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  79. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  80. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  81. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  82. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  83. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  84. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  85. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  86. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  87. synth_ai/environments/examples/sokoban/environment.py +228 -0
  88. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  89. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  90. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  91. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  92. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  93. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  94. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  95. synth_ai/environments/examples/verilog/__init__.py +10 -0
  96. synth_ai/environments/examples/verilog/engine.py +328 -0
  97. synth_ai/environments/examples/verilog/environment.py +349 -0
  98. synth_ai/environments/examples/verilog/taskset.py +418 -0
  99. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/METADATA +1 -1
  100. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/RECORD +104 -6
  101. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/WHEEL +0 -0
  102. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/entry_points.txt +0 -0
  103. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/licenses/LICENSE +0 -0
  104. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,656 @@
1
+ import numpy as np
2
+ import marshal
3
+ import copy
4
+ from collections import deque
5
+ from typing import Tuple
6
+
7
+ # --- ADDED CONSTANT ---
8
+ MAX_GENERATION_RETRIES = 100
9
+ # --- END ADDED CONSTANT ---
10
+
11
+ # import matplotlib.pyplot as plt
12
+ # import matplotlib.animation as animation
13
+
14
+
15
+ def get_shortest_action_path(room_fixed, room_state, MAX_DEPTH=100):
16
+ """
17
+ Get the shortest action path to push all boxes to the target spots.
18
+ Use BFS to find the shortest path.
19
+ NOTE currently only support one player, only one shortest solution
20
+ =========================================================
21
+ Parameters:
22
+ room_state (np.ndarray): the state of the room
23
+ - 0: wall
24
+ - 1: empty space
25
+ - 2: box target
26
+ - 3: box on target
27
+ - 4: box not on target
28
+ - 5: player
29
+ room_fixed (np.ndarray): the fixed part of the room
30
+ - 0: wall
31
+ - 1: empty space
32
+ - 2: box target
33
+ MAX_DEPTH (int): the maximum depth of the search
34
+ =========================================================
35
+ Returns:
36
+ action_sequence (list): the action sequence to push all boxes to the target spots
37
+ """
38
+
39
+ # BFS queue stores (room_state, path)
40
+ queue = deque([(copy.deepcopy(room_state), [])])
41
+ explored_states = set()
42
+
43
+ # Possible moves: up, down, left, right
44
+ moves = [(-1, 0), (1, 0), (0, -1), (0, 1)]
45
+ actions = [1, 2, 3, 4] # Corresponding action numbers
46
+
47
+ while queue:
48
+ room_state, path = queue.popleft()
49
+ if len(path) > MAX_DEPTH:
50
+ return [] # No solution found
51
+
52
+ # reduce the search space by checking if the state has been explored
53
+ state_tohash = marshal.dumps(room_state)
54
+ if state_tohash in explored_states:
55
+ continue
56
+ explored_states.add(state_tohash)
57
+
58
+ # get information of the room
59
+ player_pos = tuple(np.argwhere(room_state == 5)[0])
60
+ boxes_on_target = set(map(tuple, np.argwhere((room_state == 3))))
61
+ boxes_not_on_target = set(map(tuple, np.argwhere((room_state == 4))))
62
+ boxes = boxes_on_target | boxes_not_on_target
63
+
64
+ # Check if all boxes are on targets
65
+ if not boxes_not_on_target:
66
+ return path
67
+
68
+ # Try each direction
69
+ for move, action in zip(moves, actions):
70
+ new_room_state = copy.deepcopy(room_state)
71
+ new_player_pos = (player_pos[0] + move[0], player_pos[1] + move[1])
72
+
73
+ # Check is new player position is wall or out of bound
74
+ if (
75
+ new_player_pos[0] < 0
76
+ or new_player_pos[0] >= room_fixed.shape[0]
77
+ or new_player_pos[1] < 0
78
+ or new_player_pos[1] >= room_fixed.shape[1]
79
+ or room_fixed[new_player_pos] == 0
80
+ ):
81
+ continue
82
+
83
+ # If there's a box, check if we can push it
84
+ if new_player_pos in boxes:
85
+ box_pos = new_player_pos # the original box position
86
+ new_box_pos = (new_player_pos[0] + move[0], new_player_pos[1] + move[1])
87
+
88
+ # Can't push if hitting wall or another box or out of bound
89
+ if (
90
+ room_fixed[new_box_pos] == 0
91
+ or new_box_pos in boxes
92
+ or new_box_pos[0] < 0
93
+ or new_box_pos[0] >= room_fixed.shape[0]
94
+ or new_box_pos[1] < 0
95
+ or new_box_pos[1] >= room_fixed.shape[1]
96
+ ):
97
+ continue
98
+
99
+ # move the box
100
+
101
+ new_room_state[box_pos] = room_fixed[box_pos]
102
+ if room_fixed[new_box_pos] == 2:
103
+ new_room_state[new_box_pos] = 3
104
+ else:
105
+ new_room_state[new_box_pos] = 4
106
+
107
+ # player moves
108
+ new_room_state[player_pos] = room_fixed[player_pos]
109
+ new_room_state[new_player_pos] = 5
110
+ queue.append((new_room_state, path + [action]))
111
+
112
+ return [] # No solution found
113
+
114
+
115
+ # def plot_animation(imgs):
116
+ # fig, ax = plt.subplots()
117
+ # im = ax.imshow(imgs[0])
118
+ # def init():
119
+ # im.set_data(imgs[0])
120
+ # return [im]
121
+ # def update(i):
122
+ # im.set_data(imgs[i])
123
+ # return [im]
124
+ # ani = animation.FuncAnimation(fig, update, frames=len(imgs), init_func=init, blit=True)
125
+ # return ani
126
+
127
+
128
+ # def plot_animation(imgs):
129
+ # height, width = imgs[0].shape[:2]
130
+ # fig = plt.figure(figsize=(width / 100, height / 100), dpi=500)
131
+
132
+ # ax = fig.add_axes([0, 0, 1, 1])
133
+
134
+ # ax.set_xticks([])
135
+ # ax.set_yticks([])
136
+ # ax.set_frame_on(False)
137
+
138
+ # im = ax.imshow(imgs[0])
139
+
140
+ # def init():
141
+ # im.set_data(imgs[0])
142
+ # return [im]
143
+
144
+ # def update(i):
145
+ # im.set_data(imgs[i])
146
+ # return [im]
147
+
148
+ # ani = animation.FuncAnimation(
149
+ # fig, update, frames=len(imgs), init_func=init, blit=True
150
+ # )
151
+ # return ani
152
+
153
+
154
+ # def solve_sokoban(env, saved_animation_path):
155
+ # """
156
+ # Solve the given sokoban environment and save the animation
157
+ # """
158
+ # actions = get_shortest_action_path(env.room_fixed, env.room_state)
159
+ # print(f"Found {len(actions)} actions: {actions}")
160
+ # imgs = []
161
+ # img_before_action = env.render("rgb_array")
162
+ # imgs.append(img_before_action)
163
+ # for action in actions:
164
+ # env.step(action)
165
+ # img_after_action = env.render("rgb_array")
166
+ # imgs.append(img_after_action)
167
+ # ani = plot_animation(imgs)
168
+ # ani.save(saved_animation_path)
169
+
170
+
171
+ def add_random_player_movement(
172
+ room_state: np.ndarray,
173
+ room_structure: np.ndarray,
174
+ rng: np.random.Generator,
175
+ move_probability=0.5,
176
+ continue_probability=0.5,
177
+ max_steps=3,
178
+ ) -> np.ndarray:
179
+ """
180
+ Randomly move the player after reverse_playing.
181
+ Uses the provided RNG instance.
182
+ """
183
+ # Check if we should move the player at all
184
+ if rng.random() > move_probability:
185
+ return room_state
186
+
187
+ # Find player position
188
+ player_pos_coords = np.where(room_state == 5)
189
+ if len(player_pos_coords[0]) == 0:
190
+ # Should not happen if called correctly, but handle gracefully
191
+ print("Warning: Player not found in room_state during add_random_player_movement.")
192
+ return room_state
193
+ player_pos = np.array([player_pos_coords[0][0], player_pos_coords[1][0]])
194
+
195
+ # Keep track of previous positions to avoid moving back
196
+ previous_positions = {tuple(player_pos)}
197
+
198
+ # Make 1-3 random moves
199
+ steps_taken = 0
200
+ while steps_taken < max_steps:
201
+ # Get all valid moves (can't move into walls or boxes)
202
+ valid_moves = []
203
+ for action in range(4): # 0: up, 1: down, 2: left, 3: right
204
+ change = CHANGE_COORDINATES[action]
205
+ next_pos = player_pos + change
206
+
207
+ # Check bounds first
208
+ if not (
209
+ 0 <= next_pos[0] < room_state.shape[0] and 0 <= next_pos[1] < room_state.shape[1]
210
+ ):
211
+ continue
212
+
213
+ # Check if next position is valid (empty space or target) and not a previous position
214
+ if (
215
+ room_state[next_pos[0], next_pos[1]] in [1, 2]
216
+ and tuple(next_pos) not in previous_positions
217
+ ):
218
+ valid_moves.append((action, next_pos))
219
+
220
+ # If no valid moves, break
221
+ if not valid_moves:
222
+ break
223
+
224
+ # Choose a random valid move using the passed RNG
225
+ move_index = rng.integers(0, len(valid_moves))
226
+ chosen_action, next_pos = valid_moves[move_index]
227
+
228
+ # Move player
229
+ room_state[player_pos[0], player_pos[1]] = room_structure[player_pos[0], player_pos[1]]
230
+ room_state[next_pos[0], next_pos[1]] = 5
231
+
232
+ # Update player position and track previous position
233
+ player_pos = next_pos
234
+ previous_positions.add(tuple(player_pos))
235
+
236
+ steps_taken += 1
237
+
238
+ # Decide whether to continue moving using passed RNG
239
+ if steps_taken >= max_steps or rng.random() > continue_probability:
240
+ break
241
+
242
+ return room_state
243
+
244
+
245
+ """
246
+ Following code is adapted from the nicely written gym_sokoban repo
247
+ """
248
+
249
+
250
+ def generate_room(
251
+ dim: Tuple[int, int] = (13, 13),
252
+ initial_seed: int = 42,
253
+ p_change_directions=0.35,
254
+ num_steps=25,
255
+ num_boxes=3,
256
+ second_player=False,
257
+ search_depth=100,
258
+ ):
259
+ """
260
+ Generates a Sokoban room deterministically based on initial_seed.
261
+ Retries generation with derived seeds if the first attempt fails (score=0).
262
+ Uses numpy.random.default_rng for internal randomness.
263
+ """
264
+
265
+ for retry_offset in range(MAX_GENERATION_RETRIES):
266
+ current_seed = initial_seed + retry_offset
267
+ # Create a new RNG instance for this attempt, seeded deterministically
268
+ rng = np.random.default_rng(current_seed)
269
+
270
+ # Generate topology using the temporary RNG
271
+ room = room_topology_generation(dim, p_change_directions, num_steps, rng)
272
+
273
+ # Place boxes and player using the temporary RNG
274
+ try:
275
+ room = place_boxes_and_player(room, num_boxes, second_player, rng)
276
+ except RuntimeError as e:
277
+ # This can happen if not enough space; treat as generation failure for this seed
278
+ print(f"Warning: place_boxes_and_player failed for seed {current_seed}: {e}")
279
+ continue # Try next seed offset
280
+
281
+ # Prepare for reverse playing
282
+ room_structure = np.copy(room)
283
+ room_structure[room_structure == 5] = 1
284
+ room_state = room.copy()
285
+ room_state[room_state == 2] = 4 # Mark initial boxes as 'not on target'
286
+
287
+ # Perform reverse playing (doesn't need RNG currently)
288
+ room_state_rev, box_mapping, action_sequence = reverse_playing(
289
+ room_state, room_structure, search_depth
290
+ )
291
+
292
+ # If reverse playing failed (returned None), try next seed offset
293
+ if room_state_rev is None:
294
+ print(f"Warning: reverse_playing failed for seed {current_seed}.")
295
+ continue
296
+
297
+ # Check score
298
+ score = box_displacement_score(box_mapping)
299
+ if score > 0:
300
+ # Successfully generated a potentially solvable room
301
+ final_room_state = room_state_rev.copy()
302
+ final_room_state[final_room_state == 3] = (
303
+ 4 # Ensure boxes are marked correctly after reverse play
304
+ )
305
+
306
+ # Add random player movement using the temporary RNG
307
+ move_prob = 0.8 if score == 1 else 0.5
308
+ final_room_state = add_random_player_movement(
309
+ final_room_state,
310
+ room_structure,
311
+ rng, # Pass the temporary RNG
312
+ move_probability=move_prob,
313
+ continue_probability=0.5,
314
+ max_steps=3,
315
+ )
316
+ # print(
317
+ # f"Successfully generated room with score {score} using seed {current_seed} (offset {retry_offset})."
318
+ # )
319
+ return room_structure, final_room_state, box_mapping, action_sequence
320
+ else:
321
+ # Score is 0, log and continue to next retry offset
322
+
323
+ pass
324
+ # print(
325
+ # f"Warning: Generated room with score 0 for seed {current_seed} (offset {retry_offset}). Retrying..."
326
+ # )
327
+
328
+ # If loop completes without success
329
+ raise RuntimeWarning(
330
+ f"Generated Model with score == 0 after {MAX_GENERATION_RETRIES} attempts. "
331
+ f"Parameters: initial_seed={initial_seed}, dim={dim}, "
332
+ f"p_change_directions={p_change_directions}, num_steps={num_steps}, "
333
+ f"num_boxes={num_boxes}, second_player={second_player}, "
334
+ f"search_depth={search_depth}."
335
+ )
336
+
337
+
338
+ def room_topology_generation(
339
+ dim: Tuple[int, int],
340
+ p_change_directions: float,
341
+ num_steps: int,
342
+ rng: np.random.Generator,
343
+ ) -> np.ndarray:
344
+ """Generate room topology using the provided RNG instance."""
345
+ dim_x, dim_y = dim
346
+
347
+ # The ones in the mask represent all fields which will be set to floors
348
+ # during the random walk. The centered one will be placed over the current
349
+ # position of the walk.
350
+ masks = [
351
+ [[0, 0, 0], [1, 1, 1], [0, 0, 0]],
352
+ [[0, 1, 0], [0, 1, 0], [0, 1, 0]],
353
+ [[0, 0, 0], [1, 1, 0], [0, 1, 0]],
354
+ [[0, 0, 0], [1, 1, 0], [1, 1, 0]],
355
+ [[0, 0, 0], [0, 1, 1], [0, 1, 0]],
356
+ ]
357
+
358
+ directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
359
+ # Use rng.choice for single sample
360
+ direction_index = rng.choice(len(directions))
361
+ direction = directions[direction_index]
362
+
363
+ # Starting position uses rng.integers
364
+ position = np.array([rng.integers(1, dim_x - 1), rng.integers(1, dim_y - 1)])
365
+
366
+ level = np.zeros(dim, dtype=int)
367
+
368
+ for _ in range(num_steps):
369
+ # Change direction randomly using rng.random
370
+ if rng.random() < p_change_directions:
371
+ direction_index = rng.choice(len(directions))
372
+ direction = directions[direction_index]
373
+
374
+ # Update position
375
+ position = position + direction
376
+ position[0] = max(min(position[0], dim_x - 2), 1)
377
+ position[1] = max(min(position[1], dim_y - 2), 1)
378
+
379
+ # Apply mask using rng.choice
380
+ mask_index = rng.choice(len(masks))
381
+ mask = masks[mask_index]
382
+ mask_start = position - 1
383
+ level[mask_start[0] : mask_start[0] + 3, mask_start[1] : mask_start[1] + 3] += mask
384
+
385
+ level[level > 0] = 1
386
+ level[:, [0, dim_y - 1]] = 0
387
+ level[[0, dim_x - 1], :] = 0
388
+
389
+ return level
390
+
391
+
392
+ def place_boxes_and_player(
393
+ room: np.ndarray, num_boxes: int, second_player: bool, rng: np.random.Generator
394
+ ) -> np.ndarray:
395
+ """
396
+ Places player and boxes using the provided RNG instance.
397
+ Raises RuntimeError if not enough space.
398
+ """
399
+ # Get all available positions
400
+ possible_indices = np.where(room == 1)
401
+ possible_positions = list(zip(possible_indices[0], possible_indices[1]))
402
+ num_possible_positions = len(possible_positions)
403
+ num_players = 2 if second_player else 1
404
+
405
+ if num_possible_positions <= num_boxes + num_players:
406
+ raise RuntimeError(
407
+ f"Not enough free spots ({num_possible_positions}) to place {num_players} player(s) and {num_boxes} boxes."
408
+ )
409
+
410
+ # Choose positions using rng.choice without replacement
411
+ chosen_indices = rng.choice(num_possible_positions, size=num_boxes + num_players, replace=False)
412
+ chosen_positions = [possible_positions[i] for i in chosen_indices]
413
+
414
+ # Place player(s)
415
+ player_positions = chosen_positions[:num_players]
416
+ for pos in player_positions:
417
+ room[pos] = 5
418
+
419
+ # Place boxes
420
+ box_positions = chosen_positions[num_players:]
421
+ for pos in box_positions:
422
+ room[pos] = 2 # Place as box target initially
423
+
424
+ return room
425
+
426
+
427
+ # Global variables used for reverse playing.
428
+ explored_states = set()
429
+ num_boxes = 0
430
+ best_room_score = -1
431
+ best_room = None
432
+ best_box_mapping = None
433
+
434
+
435
+ def reverse_playing(room_state, room_structure, search_depth=100):
436
+ """
437
+ This function plays Sokoban reverse in a way, such that the player can
438
+ move and pull boxes.
439
+ It ensures a solvable level with all boxes not being placed on a box target.
440
+ :param room_state:
441
+ :param room_structure:
442
+ :param search_depth:
443
+ :return: 2d array, box mapping, action sequence
444
+ """
445
+ global \
446
+ explored_states, \
447
+ num_boxes, \
448
+ best_room_score, \
449
+ best_room, \
450
+ best_box_mapping, \
451
+ best_action_sequence
452
+
453
+ # Box_Mapping is used to calculate the box displacement for every box
454
+ box_mapping = {}
455
+ box_locations = np.where(room_structure == 2)
456
+ num_boxes = len(box_locations[0])
457
+ for l in range(num_boxes):
458
+ box = (box_locations[0][l], box_locations[1][l])
459
+ box_mapping[box] = box
460
+
461
+ # explored_states globally stores the best room state and score found during search
462
+ explored_states = set()
463
+ best_room_score = -1
464
+ best_room = None
465
+ best_box_mapping = box_mapping
466
+ best_action_sequence = []
467
+
468
+ depth_first_search(
469
+ room_state,
470
+ room_structure,
471
+ box_mapping,
472
+ box_swaps=0,
473
+ last_pull=(-1, -1),
474
+ ttl=search_depth,
475
+ action_sequence=[],
476
+ )
477
+
478
+ return best_room, best_box_mapping, best_action_sequence
479
+
480
+
481
+ def depth_first_search(
482
+ room_state,
483
+ room_structure,
484
+ box_mapping,
485
+ box_swaps=0,
486
+ last_pull=(-1, -1),
487
+ ttl=300,
488
+ action_sequence=[],
489
+ ):
490
+ """
491
+ Searches through all possible states of the room.
492
+ This is a recursive function, which stops if the ttl is reduced to 0 or
493
+ over 1.000.000 states have been explored.
494
+ :param room_state:
495
+ :param room_structure:
496
+ :param box_mapping:
497
+ :param box_swaps:
498
+ :param last_pull:
499
+ :param ttl:
500
+ :param action_sequence:
501
+ :return:
502
+ """
503
+ global \
504
+ explored_states, \
505
+ num_boxes, \
506
+ best_room_score, \
507
+ best_room, \
508
+ best_box_mapping, \
509
+ best_action_sequence
510
+
511
+ ttl -= 1
512
+ if ttl <= 0 or len(explored_states) >= 300000:
513
+ return
514
+
515
+ state_tohash = marshal.dumps(room_state)
516
+
517
+ # Only search this state, if it not yet has been explored
518
+ if state_tohash not in explored_states:
519
+ # Add current state and its score to explored states
520
+ room_score = box_swaps * box_displacement_score(box_mapping)
521
+ if np.where(room_state == 2)[0].shape[0] != num_boxes:
522
+ room_score = 0
523
+
524
+ if room_score > best_room_score:
525
+ best_room = room_state.copy()
526
+ best_room_score = room_score
527
+ best_box_mapping = box_mapping.copy()
528
+ best_action_sequence = action_sequence.copy()
529
+
530
+ explored_states.add(state_tohash)
531
+
532
+ for action in ACTION_LOOKUP.keys():
533
+ # The state and box mapping need to be copied to ensure
534
+ # every action starts from a similar state.
535
+
536
+ # TODO: A tentitive try here to make less moves
537
+ if action >= 4:
538
+ continue
539
+
540
+ room_state_next = room_state.copy()
541
+ box_mapping_next = box_mapping.copy()
542
+
543
+ room_state_next, box_mapping_next, last_pull_next = reverse_move(
544
+ room_state_next, room_structure, box_mapping_next, last_pull, action
545
+ )
546
+
547
+ box_swaps_next = box_swaps
548
+ if last_pull_next != last_pull:
549
+ box_swaps_next += 1
550
+
551
+ action_sequence_next = action_sequence + [action]
552
+ # action_sequence_next = action_sequence + [(action, box_mapping_next != box_mapping)] # add whether a box is moved
553
+ depth_first_search(
554
+ room_state_next,
555
+ room_structure,
556
+ box_mapping_next,
557
+ box_swaps_next,
558
+ last_pull_next,
559
+ ttl,
560
+ action_sequence_next,
561
+ )
562
+
563
+
564
+ def reverse_move(room_state, room_structure, box_mapping, last_pull, action):
565
+ """
566
+ Perform reverse action. Where all actions in the range [0, 3] correspond to
567
+ push actions and the ones greater 3 are simmple move actions.
568
+ :param room_state:
569
+ :param room_structure:
570
+ :param box_mapping:
571
+ :param last_pull:
572
+ :param action:
573
+ :return:
574
+ """
575
+ player_position = np.where(room_state == 5)
576
+ player_position = np.array([player_position[0][0], player_position[1][0]])
577
+
578
+ change = CHANGE_COORDINATES[action % 4]
579
+ next_position = player_position + change
580
+
581
+ # Check if next position is an empty floor or an empty box target
582
+ if room_state[next_position[0], next_position[1]] in [1, 2]:
583
+ # Move player, independent of pull or move action.
584
+ room_state[player_position[0], player_position[1]] = room_structure[
585
+ player_position[0], player_position[1]
586
+ ]
587
+ room_state[next_position[0], next_position[1]] = 5
588
+
589
+ # In addition try to pull a box if the action is a pull action
590
+ if action < 4:
591
+ possible_box_location = change[0] * -1, change[1] * -1
592
+ possible_box_location += player_position
593
+
594
+ if room_state[possible_box_location[0], possible_box_location[1]] in [3, 4]:
595
+ # Perform pull of the adjacent box
596
+ room_state[player_position[0], player_position[1]] = 3
597
+ room_state[possible_box_location[0], possible_box_location[1]] = room_structure[
598
+ possible_box_location[0], possible_box_location[1]
599
+ ]
600
+
601
+ # Update the box mapping
602
+ for k in box_mapping.keys():
603
+ if box_mapping[k] == (
604
+ possible_box_location[0],
605
+ possible_box_location[1],
606
+ ):
607
+ box_mapping[k] = (player_position[0], player_position[1])
608
+ last_pull = k
609
+
610
+ return room_state, box_mapping, last_pull
611
+
612
+
613
+ def box_displacement_score(box_mapping):
614
+ """
615
+ Calculates the sum of all Manhattan distances, between the boxes
616
+ and their origin box targets.
617
+ :param box_mapping:
618
+ :return:
619
+ """
620
+ score = 0
621
+
622
+ for box_target in box_mapping.keys():
623
+ box_location = np.array(box_mapping[box_target])
624
+ box_target = np.array(box_target)
625
+ dist = np.sum(np.abs(box_location - box_target))
626
+ score += dist
627
+
628
+ return score
629
+
630
+
631
+ TYPE_LOOKUP = {
632
+ 0: "wall",
633
+ 1: "empty space",
634
+ 2: "box target",
635
+ 3: "box on target",
636
+ 4: "box not on target",
637
+ 5: "player",
638
+ }
639
+
640
+ ACTION_LOOKUP = {
641
+ 0: "push up",
642
+ 1: "push down",
643
+ 2: "push left",
644
+ 3: "push right",
645
+ 4: "move up",
646
+ 5: "move down",
647
+ 6: "move left",
648
+ 7: "move right",
649
+ }
650
+
651
+ # Moves are mapped to coordinate changes as follows
652
+ # 0: Move up
653
+ # 1: Move down
654
+ # 2: Move left
655
+ # 3: Move right
656
+ CHANGE_COORDINATES = {0: (-1, 0), 1: (1, 0), 2: (0, -1), 3: (0, 1)}
@@ -0,0 +1,17 @@
1
+ import logging
2
+ import pkg_resources
3
+ import json
4
+ from gym.envs.registration import register
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ resource_package = __name__
9
+ env_json = pkg_resources.resource_filename(
10
+ resource_package, "/".join(("envs", "available_envs.json"))
11
+ )
12
+
13
+ with open(env_json) as f:
14
+ envs = json.load(f)
15
+
16
+ for env in envs:
17
+ register(id=env["id"], entry_point=env["entry_point"])
@@ -0,0 +1,3 @@
1
+ from .sokoban_env import SokobanEnv, ACTION_LOOKUP, CHANGE_COORDINATES
2
+ from . import room_utils
3
+ from .sokoban_env_variations import *