synth-ai 0.2.4.dev4__py3-none-any.whl → 0.2.4.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +579 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
- synth_ai/environments/examples/crafter_classic/environment.py +364 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
- synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
- synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
- synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
- synth_ai/environments/examples/crafter_custom/environment.py +312 -0
- synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/wordle/__init__.py +29 -0
- synth_ai/environments/examples/wordle/engine.py +391 -0
- synth_ai/environments/examples/wordle/environment.py +154 -0
- synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +75 -0
- synth_ai/environments/examples/wordle/taskset.py +222 -0
- synth_ai/environments/service/app.py +8 -0
- synth_ai/environments/service/core_routes.py +38 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +163 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +201 -0
- synth_ai/learning/prompts/mipro.py +273 -1
- synth_ai/learning/prompts/random_search.py +247 -0
- synth_ai/learning/prompts/run_mipro_banking77.py +160 -0
- synth_ai/learning/prompts/run_random_search_banking77.py +305 -0
- synth_ai/lm/injection.py +81 -0
- synth_ai/lm/overrides.py +204 -0
- synth_ai/lm/provider_support/anthropic.py +39 -12
- synth_ai/lm/provider_support/openai.py +31 -4
- synth_ai/lm/vendors/core/anthropic_api.py +16 -0
- synth_ai/lm/vendors/openai_standard.py +35 -5
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/METADATA +2 -1
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/RECORD +123 -13
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,656 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import marshal
|
3
|
+
import copy
|
4
|
+
from collections import deque
|
5
|
+
from typing import Tuple
|
6
|
+
|
7
|
+
# --- ADDED CONSTANT ---
|
8
|
+
MAX_GENERATION_RETRIES = 100
|
9
|
+
# --- END ADDED CONSTANT ---
|
10
|
+
|
11
|
+
# import matplotlib.pyplot as plt
|
12
|
+
# import matplotlib.animation as animation
|
13
|
+
|
14
|
+
|
15
|
+
def get_shortest_action_path(room_fixed, room_state, MAX_DEPTH=100):
|
16
|
+
"""
|
17
|
+
Get the shortest action path to push all boxes to the target spots.
|
18
|
+
Use BFS to find the shortest path.
|
19
|
+
NOTE currently only support one player, only one shortest solution
|
20
|
+
=========================================================
|
21
|
+
Parameters:
|
22
|
+
room_state (np.ndarray): the state of the room
|
23
|
+
- 0: wall
|
24
|
+
- 1: empty space
|
25
|
+
- 2: box target
|
26
|
+
- 3: box on target
|
27
|
+
- 4: box not on target
|
28
|
+
- 5: player
|
29
|
+
room_fixed (np.ndarray): the fixed part of the room
|
30
|
+
- 0: wall
|
31
|
+
- 1: empty space
|
32
|
+
- 2: box target
|
33
|
+
MAX_DEPTH (int): the maximum depth of the search
|
34
|
+
=========================================================
|
35
|
+
Returns:
|
36
|
+
action_sequence (list): the action sequence to push all boxes to the target spots
|
37
|
+
"""
|
38
|
+
|
39
|
+
# BFS queue stores (room_state, path)
|
40
|
+
queue = deque([(copy.deepcopy(room_state), [])])
|
41
|
+
explored_states = set()
|
42
|
+
|
43
|
+
# Possible moves: up, down, left, right
|
44
|
+
moves = [(-1, 0), (1, 0), (0, -1), (0, 1)]
|
45
|
+
actions = [1, 2, 3, 4] # Corresponding action numbers
|
46
|
+
|
47
|
+
while queue:
|
48
|
+
room_state, path = queue.popleft()
|
49
|
+
if len(path) > MAX_DEPTH:
|
50
|
+
return [] # No solution found
|
51
|
+
|
52
|
+
# reduce the search space by checking if the state has been explored
|
53
|
+
state_tohash = marshal.dumps(room_state)
|
54
|
+
if state_tohash in explored_states:
|
55
|
+
continue
|
56
|
+
explored_states.add(state_tohash)
|
57
|
+
|
58
|
+
# get information of the room
|
59
|
+
player_pos = tuple(np.argwhere(room_state == 5)[0])
|
60
|
+
boxes_on_target = set(map(tuple, np.argwhere((room_state == 3))))
|
61
|
+
boxes_not_on_target = set(map(tuple, np.argwhere((room_state == 4))))
|
62
|
+
boxes = boxes_on_target | boxes_not_on_target
|
63
|
+
|
64
|
+
# Check if all boxes are on targets
|
65
|
+
if not boxes_not_on_target:
|
66
|
+
return path
|
67
|
+
|
68
|
+
# Try each direction
|
69
|
+
for move, action in zip(moves, actions):
|
70
|
+
new_room_state = copy.deepcopy(room_state)
|
71
|
+
new_player_pos = (player_pos[0] + move[0], player_pos[1] + move[1])
|
72
|
+
|
73
|
+
# Check is new player position is wall or out of bound
|
74
|
+
if (
|
75
|
+
new_player_pos[0] < 0
|
76
|
+
or new_player_pos[0] >= room_fixed.shape[0]
|
77
|
+
or new_player_pos[1] < 0
|
78
|
+
or new_player_pos[1] >= room_fixed.shape[1]
|
79
|
+
or room_fixed[new_player_pos] == 0
|
80
|
+
):
|
81
|
+
continue
|
82
|
+
|
83
|
+
# If there's a box, check if we can push it
|
84
|
+
if new_player_pos in boxes:
|
85
|
+
box_pos = new_player_pos # the original box position
|
86
|
+
new_box_pos = (new_player_pos[0] + move[0], new_player_pos[1] + move[1])
|
87
|
+
|
88
|
+
# Can't push if hitting wall or another box or out of bound
|
89
|
+
if (
|
90
|
+
room_fixed[new_box_pos] == 0
|
91
|
+
or new_box_pos in boxes
|
92
|
+
or new_box_pos[0] < 0
|
93
|
+
or new_box_pos[0] >= room_fixed.shape[0]
|
94
|
+
or new_box_pos[1] < 0
|
95
|
+
or new_box_pos[1] >= room_fixed.shape[1]
|
96
|
+
):
|
97
|
+
continue
|
98
|
+
|
99
|
+
# move the box
|
100
|
+
|
101
|
+
new_room_state[box_pos] = room_fixed[box_pos]
|
102
|
+
if room_fixed[new_box_pos] == 2:
|
103
|
+
new_room_state[new_box_pos] = 3
|
104
|
+
else:
|
105
|
+
new_room_state[new_box_pos] = 4
|
106
|
+
|
107
|
+
# player moves
|
108
|
+
new_room_state[player_pos] = room_fixed[player_pos]
|
109
|
+
new_room_state[new_player_pos] = 5
|
110
|
+
queue.append((new_room_state, path + [action]))
|
111
|
+
|
112
|
+
return [] # No solution found
|
113
|
+
|
114
|
+
|
115
|
+
# def plot_animation(imgs):
|
116
|
+
# fig, ax = plt.subplots()
|
117
|
+
# im = ax.imshow(imgs[0])
|
118
|
+
# def init():
|
119
|
+
# im.set_data(imgs[0])
|
120
|
+
# return [im]
|
121
|
+
# def update(i):
|
122
|
+
# im.set_data(imgs[i])
|
123
|
+
# return [im]
|
124
|
+
# ani = animation.FuncAnimation(fig, update, frames=len(imgs), init_func=init, blit=True)
|
125
|
+
# return ani
|
126
|
+
|
127
|
+
|
128
|
+
# def plot_animation(imgs):
|
129
|
+
# height, width = imgs[0].shape[:2]
|
130
|
+
# fig = plt.figure(figsize=(width / 100, height / 100), dpi=500)
|
131
|
+
|
132
|
+
# ax = fig.add_axes([0, 0, 1, 1])
|
133
|
+
|
134
|
+
# ax.set_xticks([])
|
135
|
+
# ax.set_yticks([])
|
136
|
+
# ax.set_frame_on(False)
|
137
|
+
|
138
|
+
# im = ax.imshow(imgs[0])
|
139
|
+
|
140
|
+
# def init():
|
141
|
+
# im.set_data(imgs[0])
|
142
|
+
# return [im]
|
143
|
+
|
144
|
+
# def update(i):
|
145
|
+
# im.set_data(imgs[i])
|
146
|
+
# return [im]
|
147
|
+
|
148
|
+
# ani = animation.FuncAnimation(
|
149
|
+
# fig, update, frames=len(imgs), init_func=init, blit=True
|
150
|
+
# )
|
151
|
+
# return ani
|
152
|
+
|
153
|
+
|
154
|
+
# def solve_sokoban(env, saved_animation_path):
|
155
|
+
# """
|
156
|
+
# Solve the given sokoban environment and save the animation
|
157
|
+
# """
|
158
|
+
# actions = get_shortest_action_path(env.room_fixed, env.room_state)
|
159
|
+
# print(f"Found {len(actions)} actions: {actions}")
|
160
|
+
# imgs = []
|
161
|
+
# img_before_action = env.render("rgb_array")
|
162
|
+
# imgs.append(img_before_action)
|
163
|
+
# for action in actions:
|
164
|
+
# env.step(action)
|
165
|
+
# img_after_action = env.render("rgb_array")
|
166
|
+
# imgs.append(img_after_action)
|
167
|
+
# ani = plot_animation(imgs)
|
168
|
+
# ani.save(saved_animation_path)
|
169
|
+
|
170
|
+
|
171
|
+
def add_random_player_movement(
|
172
|
+
room_state: np.ndarray,
|
173
|
+
room_structure: np.ndarray,
|
174
|
+
rng: np.random.Generator,
|
175
|
+
move_probability=0.5,
|
176
|
+
continue_probability=0.5,
|
177
|
+
max_steps=3,
|
178
|
+
) -> np.ndarray:
|
179
|
+
"""
|
180
|
+
Randomly move the player after reverse_playing.
|
181
|
+
Uses the provided RNG instance.
|
182
|
+
"""
|
183
|
+
# Check if we should move the player at all
|
184
|
+
if rng.random() > move_probability:
|
185
|
+
return room_state
|
186
|
+
|
187
|
+
# Find player position
|
188
|
+
player_pos_coords = np.where(room_state == 5)
|
189
|
+
if len(player_pos_coords[0]) == 0:
|
190
|
+
# Should not happen if called correctly, but handle gracefully
|
191
|
+
print("Warning: Player not found in room_state during add_random_player_movement.")
|
192
|
+
return room_state
|
193
|
+
player_pos = np.array([player_pos_coords[0][0], player_pos_coords[1][0]])
|
194
|
+
|
195
|
+
# Keep track of previous positions to avoid moving back
|
196
|
+
previous_positions = {tuple(player_pos)}
|
197
|
+
|
198
|
+
# Make 1-3 random moves
|
199
|
+
steps_taken = 0
|
200
|
+
while steps_taken < max_steps:
|
201
|
+
# Get all valid moves (can't move into walls or boxes)
|
202
|
+
valid_moves = []
|
203
|
+
for action in range(4): # 0: up, 1: down, 2: left, 3: right
|
204
|
+
change = CHANGE_COORDINATES[action]
|
205
|
+
next_pos = player_pos + change
|
206
|
+
|
207
|
+
# Check bounds first
|
208
|
+
if not (
|
209
|
+
0 <= next_pos[0] < room_state.shape[0] and 0 <= next_pos[1] < room_state.shape[1]
|
210
|
+
):
|
211
|
+
continue
|
212
|
+
|
213
|
+
# Check if next position is valid (empty space or target) and not a previous position
|
214
|
+
if (
|
215
|
+
room_state[next_pos[0], next_pos[1]] in [1, 2]
|
216
|
+
and tuple(next_pos) not in previous_positions
|
217
|
+
):
|
218
|
+
valid_moves.append((action, next_pos))
|
219
|
+
|
220
|
+
# If no valid moves, break
|
221
|
+
if not valid_moves:
|
222
|
+
break
|
223
|
+
|
224
|
+
# Choose a random valid move using the passed RNG
|
225
|
+
move_index = rng.integers(0, len(valid_moves))
|
226
|
+
chosen_action, next_pos = valid_moves[move_index]
|
227
|
+
|
228
|
+
# Move player
|
229
|
+
room_state[player_pos[0], player_pos[1]] = room_structure[player_pos[0], player_pos[1]]
|
230
|
+
room_state[next_pos[0], next_pos[1]] = 5
|
231
|
+
|
232
|
+
# Update player position and track previous position
|
233
|
+
player_pos = next_pos
|
234
|
+
previous_positions.add(tuple(player_pos))
|
235
|
+
|
236
|
+
steps_taken += 1
|
237
|
+
|
238
|
+
# Decide whether to continue moving using passed RNG
|
239
|
+
if steps_taken >= max_steps or rng.random() > continue_probability:
|
240
|
+
break
|
241
|
+
|
242
|
+
return room_state
|
243
|
+
|
244
|
+
|
245
|
+
"""
|
246
|
+
Following code is adapted from the nicely written gym_sokoban repo
|
247
|
+
"""
|
248
|
+
|
249
|
+
|
250
|
+
def generate_room(
|
251
|
+
dim: Tuple[int, int] = (13, 13),
|
252
|
+
initial_seed: int = 42,
|
253
|
+
p_change_directions=0.35,
|
254
|
+
num_steps=25,
|
255
|
+
num_boxes=3,
|
256
|
+
second_player=False,
|
257
|
+
search_depth=100,
|
258
|
+
):
|
259
|
+
"""
|
260
|
+
Generates a Sokoban room deterministically based on initial_seed.
|
261
|
+
Retries generation with derived seeds if the first attempt fails (score=0).
|
262
|
+
Uses numpy.random.default_rng for internal randomness.
|
263
|
+
"""
|
264
|
+
|
265
|
+
for retry_offset in range(MAX_GENERATION_RETRIES):
|
266
|
+
current_seed = initial_seed + retry_offset
|
267
|
+
# Create a new RNG instance for this attempt, seeded deterministically
|
268
|
+
rng = np.random.default_rng(current_seed)
|
269
|
+
|
270
|
+
# Generate topology using the temporary RNG
|
271
|
+
room = room_topology_generation(dim, p_change_directions, num_steps, rng)
|
272
|
+
|
273
|
+
# Place boxes and player using the temporary RNG
|
274
|
+
try:
|
275
|
+
room = place_boxes_and_player(room, num_boxes, second_player, rng)
|
276
|
+
except RuntimeError as e:
|
277
|
+
# This can happen if not enough space; treat as generation failure for this seed
|
278
|
+
print(f"Warning: place_boxes_and_player failed for seed {current_seed}: {e}")
|
279
|
+
continue # Try next seed offset
|
280
|
+
|
281
|
+
# Prepare for reverse playing
|
282
|
+
room_structure = np.copy(room)
|
283
|
+
room_structure[room_structure == 5] = 1
|
284
|
+
room_state = room.copy()
|
285
|
+
room_state[room_state == 2] = 4 # Mark initial boxes as 'not on target'
|
286
|
+
|
287
|
+
# Perform reverse playing (doesn't need RNG currently)
|
288
|
+
room_state_rev, box_mapping, action_sequence = reverse_playing(
|
289
|
+
room_state, room_structure, search_depth
|
290
|
+
)
|
291
|
+
|
292
|
+
# If reverse playing failed (returned None), try next seed offset
|
293
|
+
if room_state_rev is None:
|
294
|
+
print(f"Warning: reverse_playing failed for seed {current_seed}.")
|
295
|
+
continue
|
296
|
+
|
297
|
+
# Check score
|
298
|
+
score = box_displacement_score(box_mapping)
|
299
|
+
if score > 0:
|
300
|
+
# Successfully generated a potentially solvable room
|
301
|
+
final_room_state = room_state_rev.copy()
|
302
|
+
final_room_state[final_room_state == 3] = (
|
303
|
+
4 # Ensure boxes are marked correctly after reverse play
|
304
|
+
)
|
305
|
+
|
306
|
+
# Add random player movement using the temporary RNG
|
307
|
+
move_prob = 0.8 if score == 1 else 0.5
|
308
|
+
final_room_state = add_random_player_movement(
|
309
|
+
final_room_state,
|
310
|
+
room_structure,
|
311
|
+
rng, # Pass the temporary RNG
|
312
|
+
move_probability=move_prob,
|
313
|
+
continue_probability=0.5,
|
314
|
+
max_steps=3,
|
315
|
+
)
|
316
|
+
# print(
|
317
|
+
# f"Successfully generated room with score {score} using seed {current_seed} (offset {retry_offset})."
|
318
|
+
# )
|
319
|
+
return room_structure, final_room_state, box_mapping, action_sequence
|
320
|
+
else:
|
321
|
+
# Score is 0, log and continue to next retry offset
|
322
|
+
|
323
|
+
pass
|
324
|
+
# print(
|
325
|
+
# f"Warning: Generated room with score 0 for seed {current_seed} (offset {retry_offset}). Retrying..."
|
326
|
+
# )
|
327
|
+
|
328
|
+
# If loop completes without success
|
329
|
+
raise RuntimeWarning(
|
330
|
+
f"Generated Model with score == 0 after {MAX_GENERATION_RETRIES} attempts. "
|
331
|
+
f"Parameters: initial_seed={initial_seed}, dim={dim}, "
|
332
|
+
f"p_change_directions={p_change_directions}, num_steps={num_steps}, "
|
333
|
+
f"num_boxes={num_boxes}, second_player={second_player}, "
|
334
|
+
f"search_depth={search_depth}."
|
335
|
+
)
|
336
|
+
|
337
|
+
|
338
|
+
def room_topology_generation(
|
339
|
+
dim: Tuple[int, int],
|
340
|
+
p_change_directions: float,
|
341
|
+
num_steps: int,
|
342
|
+
rng: np.random.Generator,
|
343
|
+
) -> np.ndarray:
|
344
|
+
"""Generate room topology using the provided RNG instance."""
|
345
|
+
dim_x, dim_y = dim
|
346
|
+
|
347
|
+
# The ones in the mask represent all fields which will be set to floors
|
348
|
+
# during the random walk. The centered one will be placed over the current
|
349
|
+
# position of the walk.
|
350
|
+
masks = [
|
351
|
+
[[0, 0, 0], [1, 1, 1], [0, 0, 0]],
|
352
|
+
[[0, 1, 0], [0, 1, 0], [0, 1, 0]],
|
353
|
+
[[0, 0, 0], [1, 1, 0], [0, 1, 0]],
|
354
|
+
[[0, 0, 0], [1, 1, 0], [1, 1, 0]],
|
355
|
+
[[0, 0, 0], [0, 1, 1], [0, 1, 0]],
|
356
|
+
]
|
357
|
+
|
358
|
+
directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
|
359
|
+
# Use rng.choice for single sample
|
360
|
+
direction_index = rng.choice(len(directions))
|
361
|
+
direction = directions[direction_index]
|
362
|
+
|
363
|
+
# Starting position uses rng.integers
|
364
|
+
position = np.array([rng.integers(1, dim_x - 1), rng.integers(1, dim_y - 1)])
|
365
|
+
|
366
|
+
level = np.zeros(dim, dtype=int)
|
367
|
+
|
368
|
+
for _ in range(num_steps):
|
369
|
+
# Change direction randomly using rng.random
|
370
|
+
if rng.random() < p_change_directions:
|
371
|
+
direction_index = rng.choice(len(directions))
|
372
|
+
direction = directions[direction_index]
|
373
|
+
|
374
|
+
# Update position
|
375
|
+
position = position + direction
|
376
|
+
position[0] = max(min(position[0], dim_x - 2), 1)
|
377
|
+
position[1] = max(min(position[1], dim_y - 2), 1)
|
378
|
+
|
379
|
+
# Apply mask using rng.choice
|
380
|
+
mask_index = rng.choice(len(masks))
|
381
|
+
mask = masks[mask_index]
|
382
|
+
mask_start = position - 1
|
383
|
+
level[mask_start[0] : mask_start[0] + 3, mask_start[1] : mask_start[1] + 3] += mask
|
384
|
+
|
385
|
+
level[level > 0] = 1
|
386
|
+
level[:, [0, dim_y - 1]] = 0
|
387
|
+
level[[0, dim_x - 1], :] = 0
|
388
|
+
|
389
|
+
return level
|
390
|
+
|
391
|
+
|
392
|
+
def place_boxes_and_player(
|
393
|
+
room: np.ndarray, num_boxes: int, second_player: bool, rng: np.random.Generator
|
394
|
+
) -> np.ndarray:
|
395
|
+
"""
|
396
|
+
Places player and boxes using the provided RNG instance.
|
397
|
+
Raises RuntimeError if not enough space.
|
398
|
+
"""
|
399
|
+
# Get all available positions
|
400
|
+
possible_indices = np.where(room == 1)
|
401
|
+
possible_positions = list(zip(possible_indices[0], possible_indices[1]))
|
402
|
+
num_possible_positions = len(possible_positions)
|
403
|
+
num_players = 2 if second_player else 1
|
404
|
+
|
405
|
+
if num_possible_positions <= num_boxes + num_players:
|
406
|
+
raise RuntimeError(
|
407
|
+
f"Not enough free spots ({num_possible_positions}) to place {num_players} player(s) and {num_boxes} boxes."
|
408
|
+
)
|
409
|
+
|
410
|
+
# Choose positions using rng.choice without replacement
|
411
|
+
chosen_indices = rng.choice(num_possible_positions, size=num_boxes + num_players, replace=False)
|
412
|
+
chosen_positions = [possible_positions[i] for i in chosen_indices]
|
413
|
+
|
414
|
+
# Place player(s)
|
415
|
+
player_positions = chosen_positions[:num_players]
|
416
|
+
for pos in player_positions:
|
417
|
+
room[pos] = 5
|
418
|
+
|
419
|
+
# Place boxes
|
420
|
+
box_positions = chosen_positions[num_players:]
|
421
|
+
for pos in box_positions:
|
422
|
+
room[pos] = 2 # Place as box target initially
|
423
|
+
|
424
|
+
return room
|
425
|
+
|
426
|
+
|
427
|
+
# Global variables used for reverse playing.
|
428
|
+
explored_states = set()
|
429
|
+
num_boxes = 0
|
430
|
+
best_room_score = -1
|
431
|
+
best_room = None
|
432
|
+
best_box_mapping = None
|
433
|
+
|
434
|
+
|
435
|
+
def reverse_playing(room_state, room_structure, search_depth=100):
|
436
|
+
"""
|
437
|
+
This function plays Sokoban reverse in a way, such that the player can
|
438
|
+
move and pull boxes.
|
439
|
+
It ensures a solvable level with all boxes not being placed on a box target.
|
440
|
+
:param room_state:
|
441
|
+
:param room_structure:
|
442
|
+
:param search_depth:
|
443
|
+
:return: 2d array, box mapping, action sequence
|
444
|
+
"""
|
445
|
+
global \
|
446
|
+
explored_states, \
|
447
|
+
num_boxes, \
|
448
|
+
best_room_score, \
|
449
|
+
best_room, \
|
450
|
+
best_box_mapping, \
|
451
|
+
best_action_sequence
|
452
|
+
|
453
|
+
# Box_Mapping is used to calculate the box displacement for every box
|
454
|
+
box_mapping = {}
|
455
|
+
box_locations = np.where(room_structure == 2)
|
456
|
+
num_boxes = len(box_locations[0])
|
457
|
+
for l in range(num_boxes):
|
458
|
+
box = (box_locations[0][l], box_locations[1][l])
|
459
|
+
box_mapping[box] = box
|
460
|
+
|
461
|
+
# explored_states globally stores the best room state and score found during search
|
462
|
+
explored_states = set()
|
463
|
+
best_room_score = -1
|
464
|
+
best_room = None
|
465
|
+
best_box_mapping = box_mapping
|
466
|
+
best_action_sequence = []
|
467
|
+
|
468
|
+
depth_first_search(
|
469
|
+
room_state,
|
470
|
+
room_structure,
|
471
|
+
box_mapping,
|
472
|
+
box_swaps=0,
|
473
|
+
last_pull=(-1, -1),
|
474
|
+
ttl=search_depth,
|
475
|
+
action_sequence=[],
|
476
|
+
)
|
477
|
+
|
478
|
+
return best_room, best_box_mapping, best_action_sequence
|
479
|
+
|
480
|
+
|
481
|
+
def depth_first_search(
|
482
|
+
room_state,
|
483
|
+
room_structure,
|
484
|
+
box_mapping,
|
485
|
+
box_swaps=0,
|
486
|
+
last_pull=(-1, -1),
|
487
|
+
ttl=300,
|
488
|
+
action_sequence=[],
|
489
|
+
):
|
490
|
+
"""
|
491
|
+
Searches through all possible states of the room.
|
492
|
+
This is a recursive function, which stops if the ttl is reduced to 0 or
|
493
|
+
over 1.000.000 states have been explored.
|
494
|
+
:param room_state:
|
495
|
+
:param room_structure:
|
496
|
+
:param box_mapping:
|
497
|
+
:param box_swaps:
|
498
|
+
:param last_pull:
|
499
|
+
:param ttl:
|
500
|
+
:param action_sequence:
|
501
|
+
:return:
|
502
|
+
"""
|
503
|
+
global \
|
504
|
+
explored_states, \
|
505
|
+
num_boxes, \
|
506
|
+
best_room_score, \
|
507
|
+
best_room, \
|
508
|
+
best_box_mapping, \
|
509
|
+
best_action_sequence
|
510
|
+
|
511
|
+
ttl -= 1
|
512
|
+
if ttl <= 0 or len(explored_states) >= 300000:
|
513
|
+
return
|
514
|
+
|
515
|
+
state_tohash = marshal.dumps(room_state)
|
516
|
+
|
517
|
+
# Only search this state, if it not yet has been explored
|
518
|
+
if state_tohash not in explored_states:
|
519
|
+
# Add current state and its score to explored states
|
520
|
+
room_score = box_swaps * box_displacement_score(box_mapping)
|
521
|
+
if np.where(room_state == 2)[0].shape[0] != num_boxes:
|
522
|
+
room_score = 0
|
523
|
+
|
524
|
+
if room_score > best_room_score:
|
525
|
+
best_room = room_state.copy()
|
526
|
+
best_room_score = room_score
|
527
|
+
best_box_mapping = box_mapping.copy()
|
528
|
+
best_action_sequence = action_sequence.copy()
|
529
|
+
|
530
|
+
explored_states.add(state_tohash)
|
531
|
+
|
532
|
+
for action in ACTION_LOOKUP.keys():
|
533
|
+
# The state and box mapping need to be copied to ensure
|
534
|
+
# every action starts from a similar state.
|
535
|
+
|
536
|
+
# TODO: A tentitive try here to make less moves
|
537
|
+
if action >= 4:
|
538
|
+
continue
|
539
|
+
|
540
|
+
room_state_next = room_state.copy()
|
541
|
+
box_mapping_next = box_mapping.copy()
|
542
|
+
|
543
|
+
room_state_next, box_mapping_next, last_pull_next = reverse_move(
|
544
|
+
room_state_next, room_structure, box_mapping_next, last_pull, action
|
545
|
+
)
|
546
|
+
|
547
|
+
box_swaps_next = box_swaps
|
548
|
+
if last_pull_next != last_pull:
|
549
|
+
box_swaps_next += 1
|
550
|
+
|
551
|
+
action_sequence_next = action_sequence + [action]
|
552
|
+
# action_sequence_next = action_sequence + [(action, box_mapping_next != box_mapping)] # add whether a box is moved
|
553
|
+
depth_first_search(
|
554
|
+
room_state_next,
|
555
|
+
room_structure,
|
556
|
+
box_mapping_next,
|
557
|
+
box_swaps_next,
|
558
|
+
last_pull_next,
|
559
|
+
ttl,
|
560
|
+
action_sequence_next,
|
561
|
+
)
|
562
|
+
|
563
|
+
|
564
|
+
def reverse_move(room_state, room_structure, box_mapping, last_pull, action):
|
565
|
+
"""
|
566
|
+
Perform reverse action. Where all actions in the range [0, 3] correspond to
|
567
|
+
push actions and the ones greater 3 are simmple move actions.
|
568
|
+
:param room_state:
|
569
|
+
:param room_structure:
|
570
|
+
:param box_mapping:
|
571
|
+
:param last_pull:
|
572
|
+
:param action:
|
573
|
+
:return:
|
574
|
+
"""
|
575
|
+
player_position = np.where(room_state == 5)
|
576
|
+
player_position = np.array([player_position[0][0], player_position[1][0]])
|
577
|
+
|
578
|
+
change = CHANGE_COORDINATES[action % 4]
|
579
|
+
next_position = player_position + change
|
580
|
+
|
581
|
+
# Check if next position is an empty floor or an empty box target
|
582
|
+
if room_state[next_position[0], next_position[1]] in [1, 2]:
|
583
|
+
# Move player, independent of pull or move action.
|
584
|
+
room_state[player_position[0], player_position[1]] = room_structure[
|
585
|
+
player_position[0], player_position[1]
|
586
|
+
]
|
587
|
+
room_state[next_position[0], next_position[1]] = 5
|
588
|
+
|
589
|
+
# In addition try to pull a box if the action is a pull action
|
590
|
+
if action < 4:
|
591
|
+
possible_box_location = change[0] * -1, change[1] * -1
|
592
|
+
possible_box_location += player_position
|
593
|
+
|
594
|
+
if room_state[possible_box_location[0], possible_box_location[1]] in [3, 4]:
|
595
|
+
# Perform pull of the adjacent box
|
596
|
+
room_state[player_position[0], player_position[1]] = 3
|
597
|
+
room_state[possible_box_location[0], possible_box_location[1]] = room_structure[
|
598
|
+
possible_box_location[0], possible_box_location[1]
|
599
|
+
]
|
600
|
+
|
601
|
+
# Update the box mapping
|
602
|
+
for k in box_mapping.keys():
|
603
|
+
if box_mapping[k] == (
|
604
|
+
possible_box_location[0],
|
605
|
+
possible_box_location[1],
|
606
|
+
):
|
607
|
+
box_mapping[k] = (player_position[0], player_position[1])
|
608
|
+
last_pull = k
|
609
|
+
|
610
|
+
return room_state, box_mapping, last_pull
|
611
|
+
|
612
|
+
|
613
|
+
def box_displacement_score(box_mapping):
|
614
|
+
"""
|
615
|
+
Calculates the sum of all Manhattan distances, between the boxes
|
616
|
+
and their origin box targets.
|
617
|
+
:param box_mapping:
|
618
|
+
:return:
|
619
|
+
"""
|
620
|
+
score = 0
|
621
|
+
|
622
|
+
for box_target in box_mapping.keys():
|
623
|
+
box_location = np.array(box_mapping[box_target])
|
624
|
+
box_target = np.array(box_target)
|
625
|
+
dist = np.sum(np.abs(box_location - box_target))
|
626
|
+
score += dist
|
627
|
+
|
628
|
+
return score
|
629
|
+
|
630
|
+
|
631
|
+
TYPE_LOOKUP = {
|
632
|
+
0: "wall",
|
633
|
+
1: "empty space",
|
634
|
+
2: "box target",
|
635
|
+
3: "box on target",
|
636
|
+
4: "box not on target",
|
637
|
+
5: "player",
|
638
|
+
}
|
639
|
+
|
640
|
+
ACTION_LOOKUP = {
|
641
|
+
0: "push up",
|
642
|
+
1: "push down",
|
643
|
+
2: "push left",
|
644
|
+
3: "push right",
|
645
|
+
4: "move up",
|
646
|
+
5: "move down",
|
647
|
+
6: "move left",
|
648
|
+
7: "move right",
|
649
|
+
}
|
650
|
+
|
651
|
+
# Moves are mapped to coordinate changes as follows
|
652
|
+
# 0: Move up
|
653
|
+
# 1: Move down
|
654
|
+
# 2: Move left
|
655
|
+
# 3: Move right
|
656
|
+
CHANGE_COORDINATES = {0: (-1, 0), 1: (1, 0), 2: (0, -1), 3: (0, 1)}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
import logging
|
2
|
+
import pkg_resources
|
3
|
+
import json
|
4
|
+
from gym.envs.registration import register
|
5
|
+
|
6
|
+
logger = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
resource_package = __name__
|
9
|
+
env_json = pkg_resources.resource_filename(
|
10
|
+
resource_package, "/".join(("envs", "available_envs.json"))
|
11
|
+
)
|
12
|
+
|
13
|
+
with open(env_json) as f:
|
14
|
+
envs = json.load(f)
|
15
|
+
|
16
|
+
for env in envs:
|
17
|
+
register(id=env["id"], entry_point=env["entry_point"])
|