synth-ai 0.2.4.dev4__py3-none-any.whl → 0.2.4.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +579 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
- synth_ai/environments/examples/crafter_classic/environment.py +364 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
- synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
- synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
- synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
- synth_ai/environments/examples/crafter_custom/environment.py +312 -0
- synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/wordle/__init__.py +29 -0
- synth_ai/environments/examples/wordle/engine.py +391 -0
- synth_ai/environments/examples/wordle/environment.py +154 -0
- synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +75 -0
- synth_ai/environments/examples/wordle/taskset.py +222 -0
- synth_ai/environments/service/app.py +8 -0
- synth_ai/environments/service/core_routes.py +38 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +163 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +201 -0
- synth_ai/learning/prompts/mipro.py +273 -1
- synth_ai/learning/prompts/random_search.py +247 -0
- synth_ai/learning/prompts/run_mipro_banking77.py +160 -0
- synth_ai/learning/prompts/run_random_search_banking77.py +305 -0
- synth_ai/lm/injection.py +81 -0
- synth_ai/lm/overrides.py +204 -0
- synth_ai/lm/provider_support/anthropic.py +39 -12
- synth_ai/lm/provider_support/openai.py +31 -4
- synth_ai/lm/vendors/core/anthropic_api.py +16 -0
- synth_ai/lm/vendors/openai_standard.py +35 -5
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/METADATA +2 -1
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/RECORD +123 -13
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,114 @@
|
|
1
|
+
from .sokoban_env import SokobanEnv, CHANGE_COORDINATES
|
2
|
+
from gym.spaces import Box
|
3
|
+
from gym.spaces.discrete import Discrete
|
4
|
+
|
5
|
+
|
6
|
+
class PushAndPullSokobanEnv(SokobanEnv):
|
7
|
+
def __init__(self, dim_room=(10, 10), max_steps=120, num_boxes=3, num_gen_steps=None):
|
8
|
+
super(PushAndPullSokobanEnv, self).__init__(dim_room, max_steps, num_boxes, num_gen_steps)
|
9
|
+
screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
|
10
|
+
self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3))
|
11
|
+
self.boxes_are_on_target = [False] * num_boxes
|
12
|
+
self.action_space = Discrete(len(ACTION_LOOKUP))
|
13
|
+
|
14
|
+
_ = self.reset()
|
15
|
+
|
16
|
+
def step(self, action, observation_mode="rgb_array"):
|
17
|
+
assert action in ACTION_LOOKUP
|
18
|
+
|
19
|
+
self.num_env_steps += 1
|
20
|
+
|
21
|
+
self.new_box_position = None
|
22
|
+
self.old_box_position = None
|
23
|
+
|
24
|
+
moved_box = False
|
25
|
+
if action == 0:
|
26
|
+
moved_player = False
|
27
|
+
|
28
|
+
# All push actions are in the range of [0, 3]
|
29
|
+
if action < 5:
|
30
|
+
moved_player, moved_box = self._push(action)
|
31
|
+
|
32
|
+
elif action < 9:
|
33
|
+
moved_player = self._move(action)
|
34
|
+
|
35
|
+
else:
|
36
|
+
moved_player, moved_box = self._pull(action)
|
37
|
+
|
38
|
+
self._calc_reward()
|
39
|
+
|
40
|
+
done = self._check_if_done()
|
41
|
+
|
42
|
+
# Convert the observation to RGB frame
|
43
|
+
observation = self.render(mode=observation_mode)
|
44
|
+
|
45
|
+
info = {
|
46
|
+
"action.name": ACTION_LOOKUP[action],
|
47
|
+
"action.moved_player": moved_player,
|
48
|
+
"action.moved_box": moved_box,
|
49
|
+
}
|
50
|
+
if done:
|
51
|
+
info["maxsteps_used"] = self._check_if_maxsteps()
|
52
|
+
info["all_boxes_on_target"] = self._check_if_all_boxes_on_target()
|
53
|
+
|
54
|
+
return observation, self.reward_last, done, info
|
55
|
+
|
56
|
+
def _pull(self, action):
|
57
|
+
"""
|
58
|
+
Moves the player to the next field, if it is not occupied.
|
59
|
+
:param action:
|
60
|
+
:return: Boolean, indicating a change of the room's state
|
61
|
+
"""
|
62
|
+
change = CHANGE_COORDINATES[(action - 1) % 4]
|
63
|
+
new_position = self.player_position + change
|
64
|
+
current_position = self.player_position.copy()
|
65
|
+
pull_content_position = self.player_position - change
|
66
|
+
|
67
|
+
# Move player if the field in the moving direction is either
|
68
|
+
# an empty field or an empty box target.
|
69
|
+
if self.room_state[new_position[0], new_position[1]] in [1, 2]:
|
70
|
+
self.player_position = new_position
|
71
|
+
self.room_state[(new_position[0], new_position[1])] = 5
|
72
|
+
self.room_state[current_position[0], current_position[1]] = self.room_fixed[
|
73
|
+
current_position[0], current_position[1]
|
74
|
+
]
|
75
|
+
|
76
|
+
box_next_to_player = self.room_state[
|
77
|
+
pull_content_position[0], pull_content_position[1]
|
78
|
+
] in [3, 4]
|
79
|
+
if box_next_to_player:
|
80
|
+
# Move Box
|
81
|
+
box_type = 4
|
82
|
+
if self.room_fixed[current_position[0], current_position[1]] == 2:
|
83
|
+
box_type = 3
|
84
|
+
self.room_state[current_position[0], current_position[1]] = box_type
|
85
|
+
self.room_state[pull_content_position[0], pull_content_position[1]] = (
|
86
|
+
self.room_fixed[pull_content_position[0], pull_content_position[1]]
|
87
|
+
)
|
88
|
+
|
89
|
+
return True, box_next_to_player
|
90
|
+
|
91
|
+
return False, False
|
92
|
+
|
93
|
+
def get_action_lookup(self):
|
94
|
+
return ACTION_LOOKUP
|
95
|
+
|
96
|
+
def get_action_meanings(self):
|
97
|
+
return ACTION_LOOKUP
|
98
|
+
|
99
|
+
|
100
|
+
ACTION_LOOKUP = {
|
101
|
+
0: "no operation",
|
102
|
+
1: "push up",
|
103
|
+
2: "push down",
|
104
|
+
3: "push left",
|
105
|
+
4: "push right",
|
106
|
+
5: "move up",
|
107
|
+
6: "move down",
|
108
|
+
7: "move left",
|
109
|
+
8: "move right",
|
110
|
+
9: "pull up",
|
111
|
+
10: "pull down",
|
112
|
+
11: "pull left",
|
113
|
+
12: "pull right",
|
114
|
+
}
|
@@ -0,0 +1,122 @@
|
|
1
|
+
from .sokoban_env import SokobanEnv, CHANGE_COORDINATES
|
2
|
+
from gym.spaces import Box
|
3
|
+
from gym.spaces.discrete import Discrete
|
4
|
+
from .render_utils import (
|
5
|
+
room_to_rgb,
|
6
|
+
room_to_tiny_world_rgb,
|
7
|
+
color_player_two,
|
8
|
+
color_tiny_player_two,
|
9
|
+
)
|
10
|
+
import numpy as np
|
11
|
+
|
12
|
+
|
13
|
+
class TwoPlayerSokobanEnv(SokobanEnv):
|
14
|
+
def __init__(self, dim_room=(10, 10), max_steps=120, num_boxes=3, num_gen_steps=None):
|
15
|
+
super(TwoPlayerSokobanEnv, self).__init__(
|
16
|
+
dim_room, max_steps, num_boxes, num_gen_steps, reset=False
|
17
|
+
)
|
18
|
+
screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
|
19
|
+
self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3))
|
20
|
+
self.boxes_are_on_target = [False] * num_boxes
|
21
|
+
self.action_space = Discrete(len(ACTION_LOOKUP))
|
22
|
+
self.player_position = []
|
23
|
+
self.player_positions = {0: [0, 0], 1: [1, 1]}
|
24
|
+
|
25
|
+
_ = self.reset(second_player=True)
|
26
|
+
|
27
|
+
def reset(self, render_mode="rgb_array", second_player=True):
|
28
|
+
super(TwoPlayerSokobanEnv, self).reset(second_player=second_player)
|
29
|
+
|
30
|
+
self.player_positions = {
|
31
|
+
0: np.argwhere(self.room_state == 5)[0],
|
32
|
+
1: np.argwhere(self.room_state == 5)[1],
|
33
|
+
}
|
34
|
+
|
35
|
+
return self.render(mode=render_mode)
|
36
|
+
|
37
|
+
def step(self, action, observation_mode="rgb_array"):
|
38
|
+
assert action in ACTION_LOOKUP
|
39
|
+
|
40
|
+
self.num_env_steps += 1
|
41
|
+
|
42
|
+
self.new_box_position = None
|
43
|
+
self.old_box_position = None
|
44
|
+
|
45
|
+
active_player = 0
|
46
|
+
if action > 8:
|
47
|
+
active_player = 1
|
48
|
+
|
49
|
+
self.player_position = self.player_positions[active_player]
|
50
|
+
|
51
|
+
player_action = (action - 1) % 8
|
52
|
+
|
53
|
+
if action == 0:
|
54
|
+
moved_player = False
|
55
|
+
moved_box = False
|
56
|
+
active_player = -1
|
57
|
+
|
58
|
+
# All push actions are in the range of [0, 3]
|
59
|
+
elif player_action < 4:
|
60
|
+
moved_player, moved_box = self._push(player_action + 1)
|
61
|
+
|
62
|
+
elif player_action < 8:
|
63
|
+
moved_player = self._move(player_action + 1)
|
64
|
+
moved_box = False
|
65
|
+
|
66
|
+
self.player_positions[active_player] = self.player_position
|
67
|
+
|
68
|
+
self._calc_reward()
|
69
|
+
|
70
|
+
done = self._check_if_done()
|
71
|
+
|
72
|
+
# Convert the observation to RGB frame
|
73
|
+
observation = self.render(mode=observation_mode)
|
74
|
+
|
75
|
+
info = {
|
76
|
+
"action.name": ACTION_LOOKUP[action],
|
77
|
+
"action.moved_player": moved_player,
|
78
|
+
"action.moved_box": moved_box,
|
79
|
+
"action,active_player": active_player,
|
80
|
+
}
|
81
|
+
if done:
|
82
|
+
info["maxsteps_used"] = self._check_if_maxsteps()
|
83
|
+
info["all_boxes_on_target"] = self._check_if_all_boxes_on_target()
|
84
|
+
|
85
|
+
return observation, self.reward_last, done, info
|
86
|
+
|
87
|
+
def get_image(self, mode, scale=1):
|
88
|
+
if mode.startswith("tiny_"):
|
89
|
+
img = room_to_tiny_world_rgb(self.room_state, self.room_fixed, scale=scale)
|
90
|
+
img = color_tiny_player_two(img, self.player_positions[1], self.room_fixed, scale=scale)
|
91
|
+
else:
|
92
|
+
img = room_to_rgb(self.room_state, self.room_fixed)
|
93
|
+
img = color_player_two(img, self.player_positions[1], self.room_fixed)
|
94
|
+
|
95
|
+
return img
|
96
|
+
|
97
|
+
def get_action_lookup(self):
|
98
|
+
return ACTION_LOOKUP
|
99
|
+
|
100
|
+
def get_action_meanings(self):
|
101
|
+
return ACTION_LOOKUP
|
102
|
+
|
103
|
+
|
104
|
+
ACTION_LOOKUP = {
|
105
|
+
0: "no operation",
|
106
|
+
1: "P1: push up",
|
107
|
+
2: "P1: push down",
|
108
|
+
3: "P1: push left",
|
109
|
+
4: "P1: push right",
|
110
|
+
5: "P1: move up",
|
111
|
+
6: "P1: move down",
|
112
|
+
7: "P1: move left",
|
113
|
+
8: "P1: move right",
|
114
|
+
9: "P2: push up",
|
115
|
+
10: "P2: push down",
|
116
|
+
11: "P2: push left",
|
117
|
+
12: "P2: push right",
|
118
|
+
13: "P2: move up",
|
119
|
+
14: "P2: move down",
|
120
|
+
15: "P2: move left",
|
121
|
+
16: "P2: move right",
|
122
|
+
}
|
@@ -0,0 +1,394 @@
|
|
1
|
+
from .sokoban_env import SokobanEnv
|
2
|
+
from .sokoban_env_fixed_targets import FixedTargetsSokobanEnv
|
3
|
+
from .sokoban_env_pull import PushAndPullSokobanEnv
|
4
|
+
from .sokoban_env_two_player import TwoPlayerSokobanEnv
|
5
|
+
from .boxoban_env import BoxobanEnv
|
6
|
+
|
7
|
+
|
8
|
+
class SokobanEnv1(SokobanEnv):
|
9
|
+
metadata = {
|
10
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
11
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
12
|
+
}
|
13
|
+
|
14
|
+
def __init__(self, **kwargs):
|
15
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 3)
|
16
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
17
|
+
super(SokobanEnv1, self).__init__(**kwargs)
|
18
|
+
|
19
|
+
|
20
|
+
class SokobanEnv2(SokobanEnv):
|
21
|
+
metadata = {
|
22
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
23
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
24
|
+
}
|
25
|
+
|
26
|
+
def __init__(self, **kwargs):
|
27
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 5)
|
28
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
29
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 40)
|
30
|
+
super(SokobanEnv2, self).__init__(**kwargs)
|
31
|
+
|
32
|
+
|
33
|
+
class SokobanEnv_Small0(SokobanEnv):
|
34
|
+
metadata = {
|
35
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
36
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
37
|
+
}
|
38
|
+
|
39
|
+
def __init__(self, **kwargs):
|
40
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (7, 7))
|
41
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
42
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 2)
|
43
|
+
super(SokobanEnv_Small0, self).__init__(**kwargs)
|
44
|
+
|
45
|
+
|
46
|
+
class SokobanEnv_Small1(SokobanEnv):
|
47
|
+
metadata = {
|
48
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
49
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
50
|
+
}
|
51
|
+
|
52
|
+
def __init__(self, **kwargs):
|
53
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (7, 7))
|
54
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
55
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 3)
|
56
|
+
super(SokobanEnv_Small1, self).__init__(**kwargs)
|
57
|
+
|
58
|
+
|
59
|
+
class SokobanEnv_Large0(SokobanEnv):
|
60
|
+
metadata = {
|
61
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
62
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
63
|
+
}
|
64
|
+
|
65
|
+
def __init__(self, **kwargs):
|
66
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (13, 11))
|
67
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 300)
|
68
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 3)
|
69
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 43)
|
70
|
+
super(SokobanEnv_Large0, self).__init__(**kwargs)
|
71
|
+
|
72
|
+
|
73
|
+
class SokobanEnv_Large1(SokobanEnv):
|
74
|
+
metadata = {
|
75
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
76
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
77
|
+
}
|
78
|
+
|
79
|
+
def __init__(self, **kwargs):
|
80
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (13, 11))
|
81
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 300)
|
82
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 4)
|
83
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 43)
|
84
|
+
super(SokobanEnv_Large1, self).__init__(**kwargs)
|
85
|
+
|
86
|
+
|
87
|
+
class SokobanEnv_Large1(SokobanEnv):
|
88
|
+
metadata = {
|
89
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
90
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
91
|
+
}
|
92
|
+
|
93
|
+
def __init__(self, **kwargs):
|
94
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (13, 11))
|
95
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 300)
|
96
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 5)
|
97
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 43)
|
98
|
+
super(SokobanEnv_Large1, self).__init__(**kwargs)
|
99
|
+
|
100
|
+
|
101
|
+
class SokobanEnv_Huge0(SokobanEnv):
|
102
|
+
metadata = {
|
103
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
104
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
105
|
+
}
|
106
|
+
|
107
|
+
def __init__(self, **kwargs):
|
108
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (13, 13))
|
109
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 300)
|
110
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 5)
|
111
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 50)
|
112
|
+
super(SokobanEnv_Huge0, self).__init__(**kwargs)
|
113
|
+
|
114
|
+
|
115
|
+
class FixedTargets_Env_v0(FixedTargetsSokobanEnv):
|
116
|
+
metadata = {
|
117
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
118
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
119
|
+
}
|
120
|
+
|
121
|
+
def __init__(self, **kwargs):
|
122
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (10, 10))
|
123
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 150)
|
124
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 3)
|
125
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 50)
|
126
|
+
super(FixedTargets_Env_v0, self).__init__(**kwargs)
|
127
|
+
|
128
|
+
|
129
|
+
class FixedTargets_Env_v1(FixedTargetsSokobanEnv):
|
130
|
+
metadata = {
|
131
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
132
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
133
|
+
}
|
134
|
+
|
135
|
+
def __init__(self, **kwargs):
|
136
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (10, 10))
|
137
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 150)
|
138
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 4)
|
139
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 50)
|
140
|
+
super(FixedTargets_Env_v1, self).__init__(**kwargs)
|
141
|
+
|
142
|
+
|
143
|
+
class FixedTargets_Env_v2(FixedTargetsSokobanEnv):
|
144
|
+
metadata = {
|
145
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
146
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
147
|
+
}
|
148
|
+
|
149
|
+
def __init__(self, **kwargs):
|
150
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (7, 7))
|
151
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 150)
|
152
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 2)
|
153
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 50)
|
154
|
+
super(FixedTargets_Env_v2, self).__init__(**kwargs)
|
155
|
+
|
156
|
+
|
157
|
+
class FixedTargets_Env_v3(FixedTargetsSokobanEnv):
|
158
|
+
metadata = {
|
159
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
160
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
161
|
+
}
|
162
|
+
|
163
|
+
def __init__(self, **kwargs):
|
164
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (7, 7))
|
165
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 150)
|
166
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 3)
|
167
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 50)
|
168
|
+
super(FixedTargets_Env_v3, self).__init__(**kwargs)
|
169
|
+
|
170
|
+
|
171
|
+
class PushAndPull_Env_v0(PushAndPullSokobanEnv):
|
172
|
+
metadata = {
|
173
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
174
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
175
|
+
}
|
176
|
+
|
177
|
+
def __init__(self, **kwargs):
|
178
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (10, 10))
|
179
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 150)
|
180
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 3)
|
181
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 50)
|
182
|
+
super(PushAndPull_Env_v0, self).__init__(**kwargs)
|
183
|
+
|
184
|
+
|
185
|
+
class PushAndPull_Env_v1(PushAndPullSokobanEnv):
|
186
|
+
metadata = {
|
187
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
188
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
189
|
+
}
|
190
|
+
|
191
|
+
def __init__(self, **kwargs):
|
192
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (10, 10))
|
193
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 150)
|
194
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 4)
|
195
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 50)
|
196
|
+
super(PushAndPull_Env_v1, self).__init__(**kwargs)
|
197
|
+
|
198
|
+
|
199
|
+
class PushAndPull_Env_v2(PushAndPullSokobanEnv):
|
200
|
+
metadata = {
|
201
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
202
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
203
|
+
}
|
204
|
+
|
205
|
+
def __init__(self, **kwargs):
|
206
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (7, 7))
|
207
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 150)
|
208
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 2)
|
209
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 50)
|
210
|
+
super(PushAndPull_Env_v2, self).__init__(**kwargs)
|
211
|
+
|
212
|
+
|
213
|
+
class PushAndPull_Env_v3(PushAndPullSokobanEnv):
|
214
|
+
metadata = {
|
215
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
216
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
217
|
+
}
|
218
|
+
|
219
|
+
def __init__(self, **kwargs):
|
220
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (7, 7))
|
221
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 150)
|
222
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 3)
|
223
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 50)
|
224
|
+
super(PushAndPull_Env_v3, self).__init__(**kwargs)
|
225
|
+
|
226
|
+
|
227
|
+
class PushAndPull_Env_v4(PushAndPullSokobanEnv):
|
228
|
+
metadata = {
|
229
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
230
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
231
|
+
}
|
232
|
+
|
233
|
+
def __init__(self, **kwargs):
|
234
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (13, 11))
|
235
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 300)
|
236
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 4)
|
237
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 50)
|
238
|
+
super(PushAndPull_Env_v4, self).__init__(**kwargs)
|
239
|
+
|
240
|
+
|
241
|
+
class PushAndPull_Env_v5(PushAndPullSokobanEnv):
|
242
|
+
metadata = {
|
243
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
244
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
245
|
+
}
|
246
|
+
|
247
|
+
def __init__(self, **kwargs):
|
248
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (13, 11))
|
249
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 300)
|
250
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 5)
|
251
|
+
kwargs["num_gen_steps"] = kwargs.get("num_gen_steps", 50)
|
252
|
+
super(PushAndPull_Env_v5, self).__init__(**kwargs)
|
253
|
+
|
254
|
+
|
255
|
+
class TwoPlayer_Env0(TwoPlayerSokobanEnv):
|
256
|
+
metadata = {
|
257
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
258
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
259
|
+
}
|
260
|
+
|
261
|
+
def __init__(self, **kwargs):
|
262
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (7, 7))
|
263
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
264
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 2)
|
265
|
+
super(TwoPlayer_Env0, self).__init__(**kwargs)
|
266
|
+
|
267
|
+
|
268
|
+
class TwoPlayer_Env1(TwoPlayerSokobanEnv):
|
269
|
+
metadata = {
|
270
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
271
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
272
|
+
}
|
273
|
+
|
274
|
+
def __init__(self, **kwargs):
|
275
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (7, 7))
|
276
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
277
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 3)
|
278
|
+
super(TwoPlayer_Env1, self).__init__(**kwargs)
|
279
|
+
|
280
|
+
|
281
|
+
class TwoPlayer_Env2(TwoPlayerSokobanEnv):
|
282
|
+
metadata = {
|
283
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
284
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
285
|
+
}
|
286
|
+
|
287
|
+
def __init__(self, **kwargs):
|
288
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (10, 10))
|
289
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
290
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 3)
|
291
|
+
super(TwoPlayer_Env2, self).__init__(**kwargs)
|
292
|
+
|
293
|
+
|
294
|
+
class TwoPlayer_Env3(TwoPlayerSokobanEnv):
|
295
|
+
metadata = {
|
296
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
297
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
298
|
+
}
|
299
|
+
|
300
|
+
def __init__(self, **kwargs):
|
301
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (10, 10))
|
302
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
303
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 4)
|
304
|
+
super(TwoPlayer_Env3, self).__init__(**kwargs)
|
305
|
+
|
306
|
+
|
307
|
+
class TwoPlayer_Env4(TwoPlayerSokobanEnv):
|
308
|
+
metadata = {
|
309
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
310
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
311
|
+
}
|
312
|
+
|
313
|
+
def __init__(self, **kwargs):
|
314
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (13, 11))
|
315
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
316
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 3)
|
317
|
+
super(TwoPlayer_Env4, self).__init__(**kwargs)
|
318
|
+
|
319
|
+
|
320
|
+
class TwoPlayer_Env5(TwoPlayerSokobanEnv):
|
321
|
+
metadata = {
|
322
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
323
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
324
|
+
}
|
325
|
+
|
326
|
+
def __init__(self, **kwargs):
|
327
|
+
kwargs["dim_room"] = kwargs.get("dim_room", (13, 11))
|
328
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
329
|
+
kwargs["num_boxes"] = kwargs.get("num_boxes", 4)
|
330
|
+
super(TwoPlayer_Env5, self).__init__(**kwargs)
|
331
|
+
|
332
|
+
|
333
|
+
class Boxban_Env0(BoxobanEnv):
|
334
|
+
metadata = {
|
335
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
336
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
337
|
+
}
|
338
|
+
|
339
|
+
def __init__(self, **kwargs):
|
340
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
341
|
+
kwargs["difficulty"] = kwargs.get("difficulty", "unfiltered")
|
342
|
+
kwargs["split"] = kwargs.get("split", "train")
|
343
|
+
super(Boxban_Env0, self).__init__(**kwargs)
|
344
|
+
|
345
|
+
|
346
|
+
class Boxban_Env0_val(BoxobanEnv):
|
347
|
+
metadata = {
|
348
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
349
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
350
|
+
}
|
351
|
+
|
352
|
+
def __init__(self, **kwargs):
|
353
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
354
|
+
kwargs["difficulty"] = kwargs.get("difficulty", "unfiltered")
|
355
|
+
kwargs["split"] = kwargs.get("split", "valid")
|
356
|
+
super(Boxban_Env0_val, self).__init__(**kwargs)
|
357
|
+
|
358
|
+
|
359
|
+
class Boxban_Env0_test(BoxobanEnv):
|
360
|
+
metadata = {
|
361
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
362
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
363
|
+
}
|
364
|
+
|
365
|
+
def __init__(self, **kwargs):
|
366
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
367
|
+
kwargs["difficulty"] = kwargs.get("difficulty", "unfiltered")
|
368
|
+
kwargs["split"] = kwargs.get("split", "test")
|
369
|
+
super(Boxban_Env0_test, self).__init__(**kwargs)
|
370
|
+
|
371
|
+
|
372
|
+
class Boxban_Env1(BoxobanEnv):
|
373
|
+
metadata = {
|
374
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
375
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
376
|
+
}
|
377
|
+
|
378
|
+
def __init__(self, **kwargs):
|
379
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
380
|
+
kwargs["difficulty"] = kwargs.get("difficulty", "medium")
|
381
|
+
super(Boxban_Env1, self).__init__(**kwargs)
|
382
|
+
|
383
|
+
|
384
|
+
class Boxban_Env1_val(BoxobanEnv):
|
385
|
+
metadata = {
|
386
|
+
"render.modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
387
|
+
"render_modes": ["human", "rgb_array", "tiny_human", "tiny_rgb_array"],
|
388
|
+
}
|
389
|
+
|
390
|
+
def __init__(self, **kwargs):
|
391
|
+
kwargs["max_steps"] = kwargs.get("max_steps", 200)
|
392
|
+
kwargs["difficulty"] = kwargs.get("difficulty", "medium")
|
393
|
+
kwargs["split"] = kwargs.get("split", "valid")
|
394
|
+
super(Boxban_Env1_val, self).__init__(**kwargs)
|