synth-ai 0.2.4.dev4__py3-none-any.whl → 0.2.4.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +575 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
- synth_ai/environments/examples/crafter_classic/environment.py +364 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
- synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
- synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
- synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
- synth_ai/environments/examples/crafter_custom/environment.py +312 -0
- synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/METADATA +1 -1
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/RECORD +104 -6
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,331 @@
|
|
1
|
+
import random
|
2
|
+
import numpy as np
|
3
|
+
import marshal
|
4
|
+
|
5
|
+
|
6
|
+
def generate_room(
|
7
|
+
dim=(13, 13), p_change_directions=0.35, num_steps=25, num_boxes=3, tries=4, second_player=False
|
8
|
+
):
|
9
|
+
"""
|
10
|
+
Generates a Sokoban room, represented by an integer matrix. The elements are encoded as follows:
|
11
|
+
wall = 0
|
12
|
+
empty space = 1
|
13
|
+
box target = 2
|
14
|
+
box not on target = 3
|
15
|
+
box on target = 4
|
16
|
+
player = 5
|
17
|
+
|
18
|
+
:param dim:
|
19
|
+
:param p_change_directions:
|
20
|
+
:param num_steps:
|
21
|
+
:return: Numpy 2d Array
|
22
|
+
"""
|
23
|
+
room_state = np.zeros(shape=dim)
|
24
|
+
room_structure = np.zeros(shape=dim)
|
25
|
+
|
26
|
+
# Some times rooms with a score == 0 are the only possibility.
|
27
|
+
# In these case, we try another model.
|
28
|
+
for t in range(tries):
|
29
|
+
room = room_topology_generation(dim, p_change_directions, num_steps)
|
30
|
+
room = place_boxes_and_player(room, num_boxes=num_boxes, second_player=second_player)
|
31
|
+
|
32
|
+
# Room fixed represents all not movable parts of the room
|
33
|
+
room_structure = np.copy(room)
|
34
|
+
room_structure[room_structure == 5] = 1
|
35
|
+
|
36
|
+
# Room structure represents the current state of the room including movable parts
|
37
|
+
room_state = room.copy()
|
38
|
+
room_state[room_state == 2] = 4
|
39
|
+
|
40
|
+
room_state, score, box_mapping = reverse_playing(room_state, room_structure)
|
41
|
+
room_state[room_state == 3] = 4
|
42
|
+
|
43
|
+
if score > 0:
|
44
|
+
break
|
45
|
+
|
46
|
+
if score == 0:
|
47
|
+
raise RuntimeWarning("Generated Model with score == 0")
|
48
|
+
|
49
|
+
return room_structure, room_state, box_mapping
|
50
|
+
|
51
|
+
|
52
|
+
def room_topology_generation(dim=(10, 10), p_change_directions=0.35, num_steps=15):
|
53
|
+
"""
|
54
|
+
Generate a room topology, which consits of empty floors and walls.
|
55
|
+
|
56
|
+
:param dim:
|
57
|
+
:param p_change_directions:
|
58
|
+
:param num_steps:
|
59
|
+
:return:
|
60
|
+
"""
|
61
|
+
dim_x, dim_y = dim
|
62
|
+
|
63
|
+
# The ones in the mask represent all fields which will be set to floors
|
64
|
+
# during the random walk. The centered one will be placed over the current
|
65
|
+
# position of the walk.
|
66
|
+
masks = [
|
67
|
+
[[0, 0, 0], [1, 1, 1], [0, 0, 0]],
|
68
|
+
[[0, 1, 0], [0, 1, 0], [0, 1, 0]],
|
69
|
+
[[0, 0, 0], [1, 1, 0], [0, 1, 0]],
|
70
|
+
[[0, 0, 0], [1, 1, 0], [1, 1, 0]],
|
71
|
+
[[0, 0, 0], [0, 1, 1], [0, 1, 0]],
|
72
|
+
]
|
73
|
+
|
74
|
+
# Possible directions during the walk
|
75
|
+
directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
|
76
|
+
direction = random.sample(directions, 1)[0]
|
77
|
+
|
78
|
+
# Starting position of random walk
|
79
|
+
position = np.array([random.randint(1, dim_x - 1), random.randint(1, dim_y - 1)])
|
80
|
+
|
81
|
+
level = np.zeros(dim, dtype=int)
|
82
|
+
|
83
|
+
for s in range(num_steps):
|
84
|
+
# Change direction randomly
|
85
|
+
if random.random() < p_change_directions:
|
86
|
+
direction = random.sample(directions, 1)[0]
|
87
|
+
|
88
|
+
# Update position
|
89
|
+
position = position + direction
|
90
|
+
position[0] = max(min(position[0], dim_x - 2), 1)
|
91
|
+
position[1] = max(min(position[1], dim_y - 2), 1)
|
92
|
+
|
93
|
+
# Apply mask
|
94
|
+
mask = random.sample(masks, 1)[0]
|
95
|
+
mask_start = position - 1
|
96
|
+
level[mask_start[0] : mask_start[0] + 3, mask_start[1] : mask_start[1] + 3] += mask
|
97
|
+
|
98
|
+
level[level > 0] = 1
|
99
|
+
level[:, [0, dim_y - 1]] = 0
|
100
|
+
level[[0, dim_x - 1], :] = 0
|
101
|
+
|
102
|
+
return level
|
103
|
+
|
104
|
+
|
105
|
+
def place_boxes_and_player(room, num_boxes, second_player):
|
106
|
+
"""
|
107
|
+
Places the player and the boxes into the floors in a room.
|
108
|
+
|
109
|
+
:param room:
|
110
|
+
:param num_boxes:
|
111
|
+
:return:
|
112
|
+
"""
|
113
|
+
# Get all available positions
|
114
|
+
possible_positions = np.where(room == 1)
|
115
|
+
num_possible_positions = possible_positions[0].shape[0]
|
116
|
+
num_players = 2 if second_player else 1
|
117
|
+
|
118
|
+
if num_possible_positions <= num_boxes + num_players:
|
119
|
+
raise RuntimeError(
|
120
|
+
"Not enough free spots (#{}) to place {} player and {} boxes.".format(
|
121
|
+
num_possible_positions, num_players, num_boxes
|
122
|
+
)
|
123
|
+
)
|
124
|
+
|
125
|
+
# Place player(s)
|
126
|
+
ind = np.random.randint(num_possible_positions)
|
127
|
+
player_position = possible_positions[0][ind], possible_positions[1][ind]
|
128
|
+
room[player_position] = 5
|
129
|
+
|
130
|
+
if second_player:
|
131
|
+
ind = np.random.randint(num_possible_positions)
|
132
|
+
player_position = possible_positions[0][ind], possible_positions[1][ind]
|
133
|
+
room[player_position] = 5
|
134
|
+
|
135
|
+
# Place boxes
|
136
|
+
for n in range(num_boxes):
|
137
|
+
possible_positions = np.where(room == 1)
|
138
|
+
num_possible_positions = possible_positions[0].shape[0]
|
139
|
+
|
140
|
+
ind = np.random.randint(num_possible_positions)
|
141
|
+
box_position = possible_positions[0][ind], possible_positions[1][ind]
|
142
|
+
room[box_position] = 2
|
143
|
+
|
144
|
+
return room
|
145
|
+
|
146
|
+
|
147
|
+
# Global variables used for reverse playing.
|
148
|
+
explored_states = set()
|
149
|
+
num_boxes = 0
|
150
|
+
best_room_score = -1
|
151
|
+
best_room = None
|
152
|
+
best_box_mapping = None
|
153
|
+
|
154
|
+
|
155
|
+
def reverse_playing(room_state, room_structure, search_depth=100):
|
156
|
+
"""
|
157
|
+
This function plays Sokoban reverse in a way, such that the player can
|
158
|
+
move and pull boxes.
|
159
|
+
It ensures a solvable level with all boxes not being placed on a box target.
|
160
|
+
:param room_state:
|
161
|
+
:param room_structure:
|
162
|
+
:param search_depth:
|
163
|
+
:return: 2d array
|
164
|
+
"""
|
165
|
+
global explored_states, num_boxes, best_room_score, best_room, best_box_mapping
|
166
|
+
|
167
|
+
# Box_Mapping is used to calculate the box displacement for every box
|
168
|
+
box_mapping = {}
|
169
|
+
box_locations = np.where(room_structure == 2)
|
170
|
+
num_boxes = len(box_locations[0])
|
171
|
+
for l in range(num_boxes):
|
172
|
+
box = (box_locations[0][l], box_locations[1][l])
|
173
|
+
box_mapping[box] = box
|
174
|
+
|
175
|
+
# explored_states globally stores the best room state and score found during search
|
176
|
+
explored_states = set()
|
177
|
+
best_room_score = -1
|
178
|
+
best_box_mapping = box_mapping
|
179
|
+
depth_first_search(
|
180
|
+
room_state, room_structure, box_mapping, box_swaps=0, last_pull=(-1, -1), ttl=300
|
181
|
+
)
|
182
|
+
|
183
|
+
return best_room, best_room_score, best_box_mapping
|
184
|
+
|
185
|
+
|
186
|
+
def depth_first_search(
|
187
|
+
room_state, room_structure, box_mapping, box_swaps=0, last_pull=(-1, -1), ttl=300
|
188
|
+
):
|
189
|
+
"""
|
190
|
+
Searches through all possible states of the room.
|
191
|
+
This is a recursive function, which stops if the tll is reduced to 0 or
|
192
|
+
over 1.000.000 states have been explored.
|
193
|
+
:param room_state:
|
194
|
+
:param room_structure:
|
195
|
+
:param box_mapping:
|
196
|
+
:param box_swaps:
|
197
|
+
:param last_pull:
|
198
|
+
:param ttl:
|
199
|
+
:return:
|
200
|
+
"""
|
201
|
+
global explored_states, num_boxes, best_room_score, best_room, best_box_mapping
|
202
|
+
|
203
|
+
ttl -= 1
|
204
|
+
if ttl <= 0 or len(explored_states) >= 300000:
|
205
|
+
return
|
206
|
+
|
207
|
+
state_tohash = marshal.dumps(room_state)
|
208
|
+
|
209
|
+
# Only search this state, if it not yet has been explored
|
210
|
+
if not (state_tohash in explored_states):
|
211
|
+
# Add current state and its score to explored states
|
212
|
+
room_score = box_swaps * box_displacement_score(box_mapping)
|
213
|
+
if np.where(room_state == 2)[0].shape[0] != num_boxes:
|
214
|
+
room_score = 0
|
215
|
+
|
216
|
+
if room_score > best_room_score:
|
217
|
+
best_room = room_state
|
218
|
+
best_room_score = room_score
|
219
|
+
best_box_mapping = box_mapping
|
220
|
+
|
221
|
+
explored_states.add(state_tohash)
|
222
|
+
|
223
|
+
for action in ACTION_LOOKUP.keys():
|
224
|
+
# The state and box mapping need to be copied to ensure
|
225
|
+
# every action start from a similar state.
|
226
|
+
room_state_next = room_state.copy()
|
227
|
+
box_mapping_next = box_mapping.copy()
|
228
|
+
|
229
|
+
room_state_next, box_mapping_next, last_pull_next = reverse_move(
|
230
|
+
room_state_next, room_structure, box_mapping_next, last_pull, action
|
231
|
+
)
|
232
|
+
|
233
|
+
box_swaps_next = box_swaps
|
234
|
+
if last_pull_next != last_pull:
|
235
|
+
box_swaps_next += 1
|
236
|
+
|
237
|
+
depth_first_search(
|
238
|
+
room_state_next, room_structure, box_mapping_next, box_swaps_next, last_pull, ttl
|
239
|
+
)
|
240
|
+
|
241
|
+
|
242
|
+
def reverse_move(room_state, room_structure, box_mapping, last_pull, action):
|
243
|
+
"""
|
244
|
+
Perform reverse action. Where all actions in the range [0, 3] correspond to
|
245
|
+
push actions and the ones greater 3 are simmple move actions.
|
246
|
+
:param room_state:
|
247
|
+
:param room_structure:
|
248
|
+
:param box_mapping:
|
249
|
+
:param last_pull:
|
250
|
+
:param action:
|
251
|
+
:return:
|
252
|
+
"""
|
253
|
+
player_position = np.where(room_state == 5)
|
254
|
+
player_position = np.array([player_position[0][0], player_position[1][0]])
|
255
|
+
|
256
|
+
change = CHANGE_COORDINATES[action % 4]
|
257
|
+
next_position = player_position + change
|
258
|
+
|
259
|
+
# Check if next position is an empty floor or an empty box target
|
260
|
+
if room_state[next_position[0], next_position[1]] in [1, 2]:
|
261
|
+
# Move player, independent of pull or move action.
|
262
|
+
room_state[player_position[0], player_position[1]] = room_structure[
|
263
|
+
player_position[0], player_position[1]
|
264
|
+
]
|
265
|
+
room_state[next_position[0], next_position[1]] = 5
|
266
|
+
|
267
|
+
# In addition try to pull a box if the action is a pull action
|
268
|
+
if action < 4:
|
269
|
+
possible_box_location = change[0] * -1, change[1] * -1
|
270
|
+
possible_box_location += player_position
|
271
|
+
|
272
|
+
if room_state[possible_box_location[0], possible_box_location[1]] in [3, 4]:
|
273
|
+
# Perform pull of the adjacent box
|
274
|
+
room_state[player_position[0], player_position[1]] = 3
|
275
|
+
room_state[possible_box_location[0], possible_box_location[1]] = room_structure[
|
276
|
+
possible_box_location[0], possible_box_location[1]
|
277
|
+
]
|
278
|
+
|
279
|
+
# Update the box mapping
|
280
|
+
for k in box_mapping.keys():
|
281
|
+
if box_mapping[k] == (possible_box_location[0], possible_box_location[1]):
|
282
|
+
box_mapping[k] = (player_position[0], player_position[1])
|
283
|
+
last_pull = k
|
284
|
+
|
285
|
+
return room_state, box_mapping, last_pull
|
286
|
+
|
287
|
+
|
288
|
+
def box_displacement_score(box_mapping):
|
289
|
+
"""
|
290
|
+
Calculates the sum of all Manhattan distances, between the boxes
|
291
|
+
and their origin box targets.
|
292
|
+
:param box_mapping:
|
293
|
+
:return:
|
294
|
+
"""
|
295
|
+
score = 0
|
296
|
+
|
297
|
+
for box_target in box_mapping.keys():
|
298
|
+
box_location = np.array(box_mapping[box_target])
|
299
|
+
box_target = np.array(box_target)
|
300
|
+
dist = np.sum(np.abs(box_location - box_target))
|
301
|
+
score += dist
|
302
|
+
|
303
|
+
return score
|
304
|
+
|
305
|
+
|
306
|
+
TYPE_LOOKUP = {
|
307
|
+
0: "wall",
|
308
|
+
1: "empty space",
|
309
|
+
2: "box target",
|
310
|
+
3: "box on target",
|
311
|
+
4: "box not on target",
|
312
|
+
5: "player",
|
313
|
+
}
|
314
|
+
|
315
|
+
ACTION_LOOKUP = {
|
316
|
+
0: "push up",
|
317
|
+
1: "push down",
|
318
|
+
2: "push left",
|
319
|
+
3: "push right",
|
320
|
+
4: "move up",
|
321
|
+
5: "move down",
|
322
|
+
6: "move left",
|
323
|
+
7: "move right",
|
324
|
+
}
|
325
|
+
|
326
|
+
# Moves are mapped to coordinate changes as follows
|
327
|
+
# 0: Move up
|
328
|
+
# 1: Move down
|
329
|
+
# 2: Move left
|
330
|
+
# 3: Move right
|
331
|
+
CHANGE_COORDINATES = {0: (-1, 0), 1: (1, 0), 2: (0, -1), 3: (0, 1)}
|
@@ -0,0 +1,305 @@
|
|
1
|
+
import gym
|
2
|
+
from gym.utils import seeding
|
3
|
+
from gym.spaces.discrete import Discrete
|
4
|
+
from gym.spaces import Box
|
5
|
+
from .room_utils import generate_room
|
6
|
+
from .render_utils import room_to_rgb, room_to_tiny_world_rgb
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
|
10
|
+
class SokobanEnv(gym.Env):
|
11
|
+
metadata = {
|
12
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array", "raw"],
|
13
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array", "raw"],
|
14
|
+
}
|
15
|
+
|
16
|
+
def __init__(
|
17
|
+
self, dim_room=(10, 10), max_steps=120, num_boxes=4, num_gen_steps=None, reset=True
|
18
|
+
):
|
19
|
+
# General Configuration
|
20
|
+
self.dim_room = dim_room
|
21
|
+
if num_gen_steps == None:
|
22
|
+
self.num_gen_steps = int(1.7 * (dim_room[0] + dim_room[1]))
|
23
|
+
else:
|
24
|
+
self.num_gen_steps = num_gen_steps
|
25
|
+
|
26
|
+
self.num_boxes = num_boxes
|
27
|
+
self.boxes_on_target = 0
|
28
|
+
|
29
|
+
# Penalties and Rewards
|
30
|
+
self.penalty_for_step = -0.1
|
31
|
+
self.penalty_box_off_target = -1
|
32
|
+
self.reward_box_on_target = 1
|
33
|
+
self.reward_finished = 10
|
34
|
+
self.reward_last = 0
|
35
|
+
|
36
|
+
# Other Settings
|
37
|
+
self.viewer = None
|
38
|
+
self.max_steps = max_steps
|
39
|
+
self.action_space = Discrete(len(ACTION_LOOKUP))
|
40
|
+
screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
|
41
|
+
self.observation_space = Box(
|
42
|
+
low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8
|
43
|
+
)
|
44
|
+
|
45
|
+
if reset:
|
46
|
+
# Initialize Room
|
47
|
+
_ = self.reset()
|
48
|
+
|
49
|
+
def seed(self, seed=None):
|
50
|
+
self.np_random, seed = seeding.np_random(seed)
|
51
|
+
return [seed]
|
52
|
+
|
53
|
+
def step(self, action, observation_mode="rgb_array"):
|
54
|
+
assert action in ACTION_LOOKUP
|
55
|
+
assert observation_mode in ["rgb_array", "tiny_rgb_array", "raw"]
|
56
|
+
|
57
|
+
self.num_env_steps += 1
|
58
|
+
|
59
|
+
self.new_box_position = None
|
60
|
+
self.old_box_position = None
|
61
|
+
|
62
|
+
moved_box = False
|
63
|
+
|
64
|
+
if action == 0:
|
65
|
+
moved_player = False
|
66
|
+
|
67
|
+
# All push actions are in the range of [0, 3]
|
68
|
+
elif action < 5:
|
69
|
+
moved_player, moved_box = self._push(action)
|
70
|
+
|
71
|
+
else:
|
72
|
+
moved_player = self._move(action)
|
73
|
+
|
74
|
+
self._calc_reward()
|
75
|
+
|
76
|
+
done = self._check_if_done()
|
77
|
+
|
78
|
+
# Convert the observation to RGB frame
|
79
|
+
observation = self.render(mode=observation_mode)
|
80
|
+
|
81
|
+
info = {
|
82
|
+
"action.name": ACTION_LOOKUP[action],
|
83
|
+
"action.moved_player": moved_player,
|
84
|
+
"action.moved_box": moved_box,
|
85
|
+
}
|
86
|
+
if done:
|
87
|
+
info["maxsteps_used"] = self._check_if_maxsteps()
|
88
|
+
info["all_boxes_on_target"] = self._check_if_all_boxes_on_target()
|
89
|
+
|
90
|
+
return observation, self.reward_last, done, info
|
91
|
+
|
92
|
+
def _push(self, action):
|
93
|
+
"""
|
94
|
+
Perform a push, if a box is adjacent in the right direction.
|
95
|
+
If no box, can be pushed, try to move.
|
96
|
+
:param action:
|
97
|
+
:return: Boolean, indicating a change of the room's state
|
98
|
+
"""
|
99
|
+
change = CHANGE_COORDINATES[(action - 1) % 4]
|
100
|
+
new_position = self.player_position + change
|
101
|
+
current_position = self.player_position.copy()
|
102
|
+
|
103
|
+
# No push, if the push would get the box out of the room's grid
|
104
|
+
new_box_position = new_position + change
|
105
|
+
if (
|
106
|
+
new_box_position[0] >= self.room_state.shape[0]
|
107
|
+
or new_box_position[1] >= self.room_state.shape[1]
|
108
|
+
):
|
109
|
+
return False, False
|
110
|
+
|
111
|
+
can_push_box = self.room_state[new_position[0], new_position[1]] in [3, 4]
|
112
|
+
can_push_box &= self.room_state[new_box_position[0], new_box_position[1]] in [1, 2]
|
113
|
+
if can_push_box:
|
114
|
+
self.new_box_position = tuple(new_box_position)
|
115
|
+
self.old_box_position = tuple(new_position)
|
116
|
+
|
117
|
+
# Move Player
|
118
|
+
self.player_position = new_position
|
119
|
+
self.room_state[(new_position[0], new_position[1])] = 5
|
120
|
+
self.room_state[current_position[0], current_position[1]] = self.room_fixed[
|
121
|
+
current_position[0], current_position[1]
|
122
|
+
]
|
123
|
+
|
124
|
+
# Move Box
|
125
|
+
box_type = 4
|
126
|
+
if self.room_fixed[new_box_position[0], new_box_position[1]] == 2:
|
127
|
+
box_type = 3
|
128
|
+
self.room_state[new_box_position[0], new_box_position[1]] = box_type
|
129
|
+
return True, True
|
130
|
+
|
131
|
+
# Try to move if no box to push, available
|
132
|
+
else:
|
133
|
+
return self._move(action), False
|
134
|
+
|
135
|
+
def _move(self, action):
|
136
|
+
"""
|
137
|
+
Moves the player to the next field, if it is not occupied.
|
138
|
+
:param action:
|
139
|
+
:return: Boolean, indicating a change of the room's state
|
140
|
+
"""
|
141
|
+
change = CHANGE_COORDINATES[(action - 1) % 4]
|
142
|
+
new_position = self.player_position + change
|
143
|
+
current_position = self.player_position.copy()
|
144
|
+
|
145
|
+
# Move player if the field in the moving direction is either
|
146
|
+
# an empty field or an empty box target.
|
147
|
+
if self.room_state[new_position[0], new_position[1]] in [1, 2]:
|
148
|
+
self.player_position = new_position
|
149
|
+
self.room_state[(new_position[0], new_position[1])] = 5
|
150
|
+
self.room_state[current_position[0], current_position[1]] = self.room_fixed[
|
151
|
+
current_position[0], current_position[1]
|
152
|
+
]
|
153
|
+
|
154
|
+
return True
|
155
|
+
|
156
|
+
return False
|
157
|
+
|
158
|
+
def _calc_reward(self):
|
159
|
+
"""
|
160
|
+
Calculate Reward Based on
|
161
|
+
:return:
|
162
|
+
"""
|
163
|
+
# Every step a small penalty is given, This ensures
|
164
|
+
# that short solutions have a higher reward.
|
165
|
+
self.reward_last = self.penalty_for_step
|
166
|
+
|
167
|
+
# count boxes off or on the target
|
168
|
+
empty_targets = self.room_state == 2
|
169
|
+
player_on_target = (self.room_fixed == 2) & (self.room_state == 5)
|
170
|
+
total_targets = empty_targets | player_on_target
|
171
|
+
|
172
|
+
current_boxes_on_target = self.num_boxes - np.where(total_targets)[0].shape[0]
|
173
|
+
|
174
|
+
# Add the reward if a box is pushed on the target and give a
|
175
|
+
# penalty if a box is pushed off the target.
|
176
|
+
if current_boxes_on_target > self.boxes_on_target:
|
177
|
+
self.reward_last += self.reward_box_on_target
|
178
|
+
elif current_boxes_on_target < self.boxes_on_target:
|
179
|
+
self.reward_last += self.penalty_box_off_target
|
180
|
+
|
181
|
+
game_won = self._check_if_all_boxes_on_target()
|
182
|
+
if game_won:
|
183
|
+
self.reward_last += self.reward_finished
|
184
|
+
|
185
|
+
self.boxes_on_target = current_boxes_on_target
|
186
|
+
|
187
|
+
def _check_if_done(self):
|
188
|
+
# Check if the game is over either through reaching the maximum number
|
189
|
+
# of available steps or by pushing all boxes on the targets.
|
190
|
+
return self._check_if_all_boxes_on_target() or self._check_if_maxsteps()
|
191
|
+
|
192
|
+
def _check_if_all_boxes_on_target(self):
|
193
|
+
empty_targets = self.room_state == 2
|
194
|
+
player_hiding_target = (self.room_fixed == 2) & (self.room_state == 5)
|
195
|
+
are_all_boxes_on_targets = np.where(empty_targets | player_hiding_target)[0].shape[0] == 0
|
196
|
+
return are_all_boxes_on_targets
|
197
|
+
|
198
|
+
def _check_if_maxsteps(self):
|
199
|
+
return self.max_steps == self.num_env_steps
|
200
|
+
|
201
|
+
def reset(
|
202
|
+
self,
|
203
|
+
second_player: bool = False,
|
204
|
+
render_mode: str = "rgb_array",
|
205
|
+
seed=None,
|
206
|
+
*,
|
207
|
+
max_attempts: int = 5,
|
208
|
+
):
|
209
|
+
if seed is not None:
|
210
|
+
self.seed(seed)
|
211
|
+
|
212
|
+
for attempt in range(max_attempts):
|
213
|
+
try:
|
214
|
+
self.room_fixed, self.room_state, self.box_mapping = generate_room(
|
215
|
+
dim=self.dim_room,
|
216
|
+
num_steps=self.num_gen_steps,
|
217
|
+
num_boxes=self.num_boxes,
|
218
|
+
second_player=second_player,
|
219
|
+
)
|
220
|
+
break # success
|
221
|
+
except (RuntimeError, RuntimeWarning) as e:
|
222
|
+
print(f"[SOKOBAN] {e} – retry {attempt + 1}/{max_attempts}")
|
223
|
+
if attempt == max_attempts - 1:
|
224
|
+
# fallback: trivial 3×3 room with player only
|
225
|
+
self.room_fixed = np.pad(np.ones((1, 1), dtype=int), 1, constant_values=0)
|
226
|
+
self.room_state = self.room_fixed.copy()
|
227
|
+
self.room_state[1, 1] = 5
|
228
|
+
self.box_mapping = {}
|
229
|
+
|
230
|
+
self.player_position = np.argwhere(self.room_state == 5)[0]
|
231
|
+
self.num_env_steps = 0
|
232
|
+
self.reward_last = 0
|
233
|
+
self.boxes_on_target = 0
|
234
|
+
|
235
|
+
return self.render(render_mode)
|
236
|
+
|
237
|
+
def render(self, mode="human", close=None, scale=1):
|
238
|
+
assert mode in RENDERING_MODES
|
239
|
+
|
240
|
+
img = self.get_image(mode, scale)
|
241
|
+
|
242
|
+
if "rgb_array" in mode:
|
243
|
+
return img
|
244
|
+
|
245
|
+
elif "human" in mode:
|
246
|
+
from gym.envs.classic_control import rendering
|
247
|
+
|
248
|
+
if self.viewer is None:
|
249
|
+
self.viewer = rendering.SimpleImageViewer()
|
250
|
+
self.viewer.imshow(img)
|
251
|
+
return self.viewer.isopen
|
252
|
+
|
253
|
+
elif "raw" in mode:
|
254
|
+
arr_walls = (self.room_fixed == 0).view(np.int8)
|
255
|
+
arr_goals = (self.room_fixed == 2).view(np.int8)
|
256
|
+
arr_boxes = ((self.room_state == 4) + (self.room_state == 3)).view(np.int8)
|
257
|
+
arr_player = (self.room_state == 5).view(np.int8)
|
258
|
+
|
259
|
+
return arr_walls, arr_goals, arr_boxes, arr_player
|
260
|
+
|
261
|
+
else:
|
262
|
+
super(SokobanEnv, self).render(mode=mode) # just raise an exception
|
263
|
+
|
264
|
+
def get_image(self, mode, scale=1):
|
265
|
+
if mode.startswith("tiny_"):
|
266
|
+
img = room_to_tiny_world_rgb(self.room_state, self.room_fixed, scale=scale)
|
267
|
+
else:
|
268
|
+
img = room_to_rgb(self.room_state, self.room_fixed)
|
269
|
+
|
270
|
+
return img
|
271
|
+
|
272
|
+
def close(self):
|
273
|
+
if self.viewer is not None:
|
274
|
+
self.viewer.close()
|
275
|
+
|
276
|
+
def set_maxsteps(self, num_steps):
|
277
|
+
self.max_steps = num_steps
|
278
|
+
|
279
|
+
def get_action_lookup(self):
|
280
|
+
return ACTION_LOOKUP
|
281
|
+
|
282
|
+
def get_action_meanings(self):
|
283
|
+
return ACTION_LOOKUP
|
284
|
+
|
285
|
+
|
286
|
+
ACTION_LOOKUP = {
|
287
|
+
0: "no operation",
|
288
|
+
1: "push up",
|
289
|
+
2: "push down",
|
290
|
+
3: "push left",
|
291
|
+
4: "push right",
|
292
|
+
5: "move up",
|
293
|
+
6: "move down",
|
294
|
+
7: "move left",
|
295
|
+
8: "move right",
|
296
|
+
}
|
297
|
+
|
298
|
+
# Moves are mapped to coordinate changes as follows
|
299
|
+
# 0: Move up
|
300
|
+
# 1: Move down
|
301
|
+
# 2: Move left
|
302
|
+
# 3: Move right
|
303
|
+
CHANGE_COORDINATES = {0: (-1, 0), 1: (1, 0), 2: (0, -1), 3: (0, 1)}
|
304
|
+
|
305
|
+
RENDERING_MODES = ["rgb_array", "human", "tiny_rgb_array", "tiny_human", "raw"]
|
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
from .sokoban_env import SokobanEnv
|
2
|
+
from .render_utils import room_to_rgb_FT, room_to_tiny_world_rgb_FT
|
3
|
+
from gym.spaces import Box
|
4
|
+
|
5
|
+
|
6
|
+
class FixedTargetsSokobanEnv(SokobanEnv):
|
7
|
+
def __init__(self, dim_room=(10, 10), max_steps=120, num_boxes=3, num_gen_steps=None):
|
8
|
+
super(FixedTargetsSokobanEnv, self).__init__(dim_room, max_steps, num_boxes, num_gen_steps)
|
9
|
+
screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
|
10
|
+
self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3))
|
11
|
+
self.boxes_are_on_target = [False] * num_boxes
|
12
|
+
|
13
|
+
_ = self.reset()
|
14
|
+
|
15
|
+
def get_image(self, mode, scale=1):
|
16
|
+
if mode.startswith("tiny_"):
|
17
|
+
img = room_to_tiny_world_rgb_FT(
|
18
|
+
self.room_state, self.box_mapping, self.room_fixed, scale=scale
|
19
|
+
)
|
20
|
+
else:
|
21
|
+
img = room_to_rgb_FT(self.room_state, self.box_mapping, self.room_fixed)
|
22
|
+
|
23
|
+
return img
|
24
|
+
|
25
|
+
def step(self, action, observation_mode="rgb_array"):
|
26
|
+
observation, self.reward_last, done, info = super(FixedTargetsSokobanEnv, self).step(
|
27
|
+
action, observation_mode
|
28
|
+
)
|
29
|
+
|
30
|
+
return observation, self.reward_last, done, info
|
31
|
+
|
32
|
+
def _calc_reward(self):
|
33
|
+
self._update_box_mapping()
|
34
|
+
|
35
|
+
# Every step a small penalty is given, This ensures
|
36
|
+
# that short solutions have a higher reward.
|
37
|
+
self.reward_last = self.penalty_for_step
|
38
|
+
|
39
|
+
for b in range(len(self.boxes_are_on_target)):
|
40
|
+
previous_state = self.boxes_are_on_target[b]
|
41
|
+
|
42
|
+
# Calculate new state
|
43
|
+
box_id = list(self.box_mapping.keys())[b]
|
44
|
+
new_state = self.box_mapping[box_id] == box_id
|
45
|
+
|
46
|
+
if previous_state and not new_state:
|
47
|
+
# Box was pushed of its target
|
48
|
+
self.reward_last += self.penalty_box_off_target
|
49
|
+
elif not previous_state and new_state:
|
50
|
+
# box was pushed on its target
|
51
|
+
self.reward_last += self.reward_box_on_target
|
52
|
+
|
53
|
+
self.boxes_are_on_target[b] = new_state
|
54
|
+
|
55
|
+
def _update_box_mapping(self):
|
56
|
+
if self.new_box_position is not None:
|
57
|
+
box_index = list(self.box_mapping.values()).index(self.old_box_position)
|
58
|
+
box_id = list(self.box_mapping.keys())[box_index]
|
59
|
+
self.box_mapping[box_id] = self.new_box_position
|
60
|
+
|
61
|
+
def _check_if_all_boxes_on_target(self):
|
62
|
+
for key in self.box_mapping.keys():
|
63
|
+
if not key == self.box_mapping[key]:
|
64
|
+
return False
|
65
|
+
|
66
|
+
return True
|