synth-ai 0.2.4.dev4__py3-none-any.whl → 0.2.4.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +575 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
- synth_ai/environments/examples/crafter_classic/environment.py +364 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
- synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
- synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
- synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
- synth_ai/environments/examples/crafter_custom/environment.py +312 -0
- synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/METADATA +1 -1
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/RECORD +104 -6
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,401 @@
|
|
1
|
+
"""NLE integration wrapper for NetHack environment."""
|
2
|
+
|
3
|
+
from typing import Dict, Any, Optional, Tuple, List
|
4
|
+
import numpy as np
|
5
|
+
|
6
|
+
try:
|
7
|
+
import nle
|
8
|
+
from nle import nethack
|
9
|
+
except ImportError as e:
|
10
|
+
raise ImportError(
|
11
|
+
"NLE (NetHack Learning Environment) is required but not installed. "
|
12
|
+
"Please install it with: pip install nle"
|
13
|
+
) from e
|
14
|
+
|
15
|
+
|
16
|
+
class NLEWrapper:
|
17
|
+
"""Wrapper around NLE (NetHack Learning Environment) for synth-env integration."""
|
18
|
+
|
19
|
+
def __init__(self, character_role: str = "mon", observation_keys: Optional[List[str]] = None):
|
20
|
+
"""Initialize NLE wrapper.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
character_role: Character class (mon, val, wiz, etc.)
|
24
|
+
observation_keys: Which observations to include
|
25
|
+
"""
|
26
|
+
self.character_role = self._convert_role_name(character_role)
|
27
|
+
|
28
|
+
# Default observation keys
|
29
|
+
if observation_keys is None:
|
30
|
+
observation_keys = [
|
31
|
+
"glyphs",
|
32
|
+
"chars",
|
33
|
+
"colors",
|
34
|
+
"specials",
|
35
|
+
"blstats",
|
36
|
+
"message",
|
37
|
+
"inv_glyphs",
|
38
|
+
"inv_letters",
|
39
|
+
"inv_oclasses",
|
40
|
+
"inv_strs",
|
41
|
+
"tty_chars",
|
42
|
+
"tty_colors",
|
43
|
+
"tty_cursor",
|
44
|
+
]
|
45
|
+
|
46
|
+
# Create NLE environment
|
47
|
+
self.env = nle.env.NLE(character=self.character_role, observation_keys=observation_keys)
|
48
|
+
|
49
|
+
# Build action mapping
|
50
|
+
self._build_action_mapping()
|
51
|
+
|
52
|
+
# Track last observation
|
53
|
+
self.last_obs = None
|
54
|
+
self.last_reward = 0.0
|
55
|
+
self.last_done = False
|
56
|
+
self.last_info = {}
|
57
|
+
|
58
|
+
def _convert_role_name(self, role: str) -> str:
|
59
|
+
"""Convert full role names to NLE abbreviations."""
|
60
|
+
role_map = {
|
61
|
+
"tourist": "tou",
|
62
|
+
"knight": "kni",
|
63
|
+
"wizard": "wiz",
|
64
|
+
"barbarian": "bar",
|
65
|
+
"ranger": "ran",
|
66
|
+
"priest": "pri",
|
67
|
+
"monk": "mon",
|
68
|
+
"rogue": "rog",
|
69
|
+
"valkyrie": "val",
|
70
|
+
"samurai": "sam",
|
71
|
+
"archeologist": "arc",
|
72
|
+
"healer": "hea",
|
73
|
+
"caveman": "cav",
|
74
|
+
}
|
75
|
+
return role_map.get(role.lower(), "mon") # Default to monk
|
76
|
+
|
77
|
+
def _build_action_mapping(self):
|
78
|
+
"""Build mapping from action names to NLE action indices."""
|
79
|
+
self.action_map = {}
|
80
|
+
self.index_to_action = {}
|
81
|
+
|
82
|
+
# Map each action in env.actions to a name
|
83
|
+
for i, action in enumerate(self.env.actions):
|
84
|
+
# Compass directions
|
85
|
+
if action == nethack.CompassDirection.N:
|
86
|
+
name = "north"
|
87
|
+
elif action == nethack.CompassDirection.S:
|
88
|
+
name = "south"
|
89
|
+
elif action == nethack.CompassDirection.E:
|
90
|
+
name = "east"
|
91
|
+
elif action == nethack.CompassDirection.W:
|
92
|
+
name = "west"
|
93
|
+
elif action == nethack.CompassDirection.NE:
|
94
|
+
name = "northeast"
|
95
|
+
elif action == nethack.CompassDirection.NW:
|
96
|
+
name = "northwest"
|
97
|
+
elif action == nethack.CompassDirection.SE:
|
98
|
+
name = "southeast"
|
99
|
+
elif action == nethack.CompassDirection.SW:
|
100
|
+
name = "southwest"
|
101
|
+
|
102
|
+
# Misc directions
|
103
|
+
elif action == nethack.MiscDirection.UP:
|
104
|
+
name = "up"
|
105
|
+
elif action == nethack.MiscDirection.DOWN:
|
106
|
+
name = "down"
|
107
|
+
elif action == nethack.MiscDirection.WAIT:
|
108
|
+
name = "wait"
|
109
|
+
|
110
|
+
# Commands
|
111
|
+
elif action == nethack.Command.SEARCH:
|
112
|
+
name = "search"
|
113
|
+
elif action == nethack.Command.INVENTORY:
|
114
|
+
name = "inventory"
|
115
|
+
elif action == nethack.Command.LOOK:
|
116
|
+
name = "look"
|
117
|
+
elif action == nethack.Command.OPEN:
|
118
|
+
name = "open"
|
119
|
+
elif action == nethack.Command.CLOSE:
|
120
|
+
name = "close"
|
121
|
+
elif action == nethack.Command.KICK:
|
122
|
+
name = "kick"
|
123
|
+
elif action == nethack.Command.PICKUP:
|
124
|
+
name = "pickup"
|
125
|
+
elif action == nethack.Command.DROP:
|
126
|
+
name = "drop"
|
127
|
+
elif action == nethack.Command.EAT:
|
128
|
+
name = "eat"
|
129
|
+
elif action == nethack.Command.WIELD:
|
130
|
+
name = "wield"
|
131
|
+
elif action == nethack.Command.WEAR:
|
132
|
+
name = "wear"
|
133
|
+
elif action == nethack.Command.TAKEOFF:
|
134
|
+
name = "takeoff"
|
135
|
+
elif action == nethack.Command.PUTON:
|
136
|
+
name = "puton"
|
137
|
+
elif action == nethack.Command.REMOVE:
|
138
|
+
name = "remove"
|
139
|
+
elif action == nethack.Command.READ:
|
140
|
+
name = "read"
|
141
|
+
elif action == nethack.Command.QUAFF:
|
142
|
+
name = "quaff"
|
143
|
+
elif action == nethack.Command.ZAP:
|
144
|
+
name = "zap"
|
145
|
+
elif action == nethack.Command.THROW:
|
146
|
+
name = "throw"
|
147
|
+
elif action == nethack.Command.FIRE:
|
148
|
+
name = "fire"
|
149
|
+
elif action == nethack.Command.APPLY:
|
150
|
+
name = "apply"
|
151
|
+
elif action == nethack.Command.PRAY:
|
152
|
+
name = "pray"
|
153
|
+
elif action == nethack.Command.SAVE:
|
154
|
+
name = "save"
|
155
|
+
elif action == nethack.Command.QUIT:
|
156
|
+
name = "quit"
|
157
|
+
elif action == nethack.Command.ESC:
|
158
|
+
name = "escape"
|
159
|
+
elif action == nethack.Command.PAY:
|
160
|
+
name = "pay"
|
161
|
+
elif action == nethack.Command.LOOT:
|
162
|
+
name = "loot"
|
163
|
+
elif action == nethack.Command.ENHANCE:
|
164
|
+
name = "enhance"
|
165
|
+
elif action == nethack.Command.FORCE:
|
166
|
+
name = "force"
|
167
|
+
elif action == nethack.Command.INVOKE:
|
168
|
+
name = "invoke"
|
169
|
+
elif action == nethack.Command.OFFER:
|
170
|
+
name = "offer"
|
171
|
+
elif action == nethack.Command.RUB:
|
172
|
+
name = "rub"
|
173
|
+
elif action == nethack.Command.SIT:
|
174
|
+
name = "sit"
|
175
|
+
elif action == nethack.Command.TURN:
|
176
|
+
name = "turn"
|
177
|
+
elif action == nethack.Command.UNTRAP:
|
178
|
+
name = "untrap"
|
179
|
+
elif action == nethack.Command.WIPE:
|
180
|
+
name = "wipe"
|
181
|
+
elif action == nethack.Command.ENGRAVE:
|
182
|
+
name = "engrave"
|
183
|
+
elif action == nethack.Command.JUMP:
|
184
|
+
name = "jump"
|
185
|
+
elif action == nethack.Command.CHAT:
|
186
|
+
name = "chat"
|
187
|
+
elif action == nethack.Command.DIP:
|
188
|
+
name = "dip"
|
189
|
+
elif action == nethack.Command.RIDE:
|
190
|
+
name = "ride"
|
191
|
+
elif action == nethack.Command.TIP:
|
192
|
+
name = "tip"
|
193
|
+
|
194
|
+
# Special/Misc
|
195
|
+
elif action == nethack.MiscAction.MORE:
|
196
|
+
name = "more"
|
197
|
+
|
198
|
+
# Text characters (for menu selection)
|
199
|
+
elif hasattr(action, "value") and 97 <= action.value <= 122: # a-z
|
200
|
+
name = chr(action.value)
|
201
|
+
elif hasattr(action, "value") and 65 <= action.value <= 90: # A-Z
|
202
|
+
name = chr(action.value)
|
203
|
+
elif hasattr(action, "value") and 48 <= action.value <= 57: # 0-9
|
204
|
+
name = chr(action.value)
|
205
|
+
elif action == nethack.TextCharacters.SPACE:
|
206
|
+
name = "space"
|
207
|
+
elif action == nethack.TextCharacters.APOS:
|
208
|
+
name = "'"
|
209
|
+
elif action == nethack.TextCharacters.QUOTE:
|
210
|
+
name = '"'
|
211
|
+
else:
|
212
|
+
# Skip unmapped actions
|
213
|
+
continue
|
214
|
+
|
215
|
+
self.action_map[name] = i
|
216
|
+
self.index_to_action[i] = name
|
217
|
+
|
218
|
+
def reset(self, seed: Optional[int] = None) -> Dict[str, Any]:
|
219
|
+
"""Reset the NLE environment."""
|
220
|
+
if seed is not None:
|
221
|
+
self.env.seed(seed)
|
222
|
+
|
223
|
+
self.last_obs = self.env.reset()
|
224
|
+
self.last_reward = 0.0
|
225
|
+
self.last_done = False
|
226
|
+
self.last_info = {}
|
227
|
+
|
228
|
+
return self._process_observation(self.last_obs)
|
229
|
+
|
230
|
+
def step(self, action: str) -> Tuple[Dict[str, Any], float, bool, Dict[str, Any]]:
|
231
|
+
"""Take a step in the environment.
|
232
|
+
|
233
|
+
Args:
|
234
|
+
action: Action name (e.g., "north", "pickup", "a" for menu)
|
235
|
+
|
236
|
+
Returns:
|
237
|
+
Tuple of (observation, reward, done, info)
|
238
|
+
"""
|
239
|
+
# Handle compound actions like "open west"
|
240
|
+
if " " in action:
|
241
|
+
# Split compound action
|
242
|
+
parts = action.split(" ", 1)
|
243
|
+
if len(parts) == 2 and parts[0] in self.action_map and parts[1] in self.action_map:
|
244
|
+
# Execute both actions in sequence
|
245
|
+
print(f"Splitting compound action '{action}' into '{parts[0]}' then '{parts[1]}'")
|
246
|
+
# First action
|
247
|
+
action_idx = self.action_map[parts[0]]
|
248
|
+
self.env.step(action_idx)
|
249
|
+
# Second action (the direction)
|
250
|
+
action = parts[1]
|
251
|
+
else:
|
252
|
+
raise ValueError(f"Invalid compound action: {action}. Use separate actions.")
|
253
|
+
|
254
|
+
# Get action index
|
255
|
+
if action not in self.action_map:
|
256
|
+
# Special handling for menu letters that might not be in action_map
|
257
|
+
if len(action) == 1 and action.isalpha():
|
258
|
+
# Try to find ASCII value for single letter
|
259
|
+
ascii_val = ord(action)
|
260
|
+
# Look for an action with this ASCII value
|
261
|
+
for i, act in enumerate(self.env.actions):
|
262
|
+
if hasattr(act, "value") and act.value == ascii_val:
|
263
|
+
print(f"Found menu letter '{action}' at action index {i}")
|
264
|
+
action_idx = i
|
265
|
+
break
|
266
|
+
else:
|
267
|
+
raise ValueError(
|
268
|
+
f"Unknown action: {action}. Valid actions: {list(self.action_map.keys())}"
|
269
|
+
)
|
270
|
+
else:
|
271
|
+
raise ValueError(
|
272
|
+
f"Unknown action: {action}. Valid actions: {list(self.action_map.keys())}"
|
273
|
+
)
|
274
|
+
else:
|
275
|
+
action_idx = self.action_map[action]
|
276
|
+
|
277
|
+
# Take step
|
278
|
+
self.last_obs, self.last_reward, self.last_done, self.last_info = self.env.step(action_idx)
|
279
|
+
|
280
|
+
# Process observation
|
281
|
+
processed_obs = self._process_observation(self.last_obs)
|
282
|
+
|
283
|
+
return processed_obs, self.last_reward, self.last_done, self.last_info
|
284
|
+
|
285
|
+
def _process_observation(self, obs: Dict[str, np.ndarray]) -> Dict[str, Any]:
|
286
|
+
"""Process NLE observation into a more usable format."""
|
287
|
+
processed = {}
|
288
|
+
|
289
|
+
# Decode message
|
290
|
+
if "message" in obs:
|
291
|
+
processed["message"] = obs["message"].tobytes().decode("ascii").strip()
|
292
|
+
|
293
|
+
# Get ASCII map
|
294
|
+
if "chars" in obs:
|
295
|
+
processed["ascii_chars"] = obs["chars"]
|
296
|
+
# Convert to string map
|
297
|
+
lines = []
|
298
|
+
for row in obs["chars"]:
|
299
|
+
line = "".join(chr(c) for c in row)
|
300
|
+
lines.append(line)
|
301
|
+
processed["ascii_map"] = "\n".join(lines)
|
302
|
+
|
303
|
+
# Get player stats from blstats
|
304
|
+
if "blstats" in obs:
|
305
|
+
blstats = obs["blstats"]
|
306
|
+
processed["player_stats"] = {
|
307
|
+
"x": int(blstats[0]),
|
308
|
+
"y": int(blstats[1]),
|
309
|
+
"strength": int(blstats[2]),
|
310
|
+
"strength_pct": int(blstats[3]),
|
311
|
+
"dexterity": int(blstats[4]),
|
312
|
+
"constitution": int(blstats[5]),
|
313
|
+
"intelligence": int(blstats[6]),
|
314
|
+
"wisdom": int(blstats[7]),
|
315
|
+
"charisma": int(blstats[8]),
|
316
|
+
"score": int(blstats[9]),
|
317
|
+
"hp": int(blstats[10]),
|
318
|
+
"max_hp": int(blstats[11]),
|
319
|
+
"depth": int(blstats[12]),
|
320
|
+
"gold": int(blstats[13]),
|
321
|
+
"energy": int(blstats[14]),
|
322
|
+
"max_energy": int(blstats[15]),
|
323
|
+
"ac": int(blstats[16]),
|
324
|
+
"monster_level": int(blstats[17]),
|
325
|
+
"experience_level": int(blstats[18]),
|
326
|
+
"experience_points": int(blstats[19]),
|
327
|
+
"time": int(blstats[20]),
|
328
|
+
"hunger_state": int(blstats[21]),
|
329
|
+
"carrying_capacity": int(blstats[22]),
|
330
|
+
"dungeon_number": int(blstats[23]),
|
331
|
+
"level_number": int(blstats[24]),
|
332
|
+
}
|
333
|
+
|
334
|
+
# Get inventory
|
335
|
+
if "inv_strs" in obs:
|
336
|
+
inv_items = []
|
337
|
+
for i, inv_str in enumerate(obs["inv_strs"]):
|
338
|
+
if inv_str[0] != 0: # Non-empty slot
|
339
|
+
item_str = inv_str.tobytes().decode("ascii").strip("\x00")
|
340
|
+
if item_str:
|
341
|
+
letter = chr(obs["inv_letters"][i]) if "inv_letters" in obs else "?"
|
342
|
+
inv_items.append({"letter": letter, "description": item_str})
|
343
|
+
processed["inventory"] = inv_items
|
344
|
+
|
345
|
+
# Check if in menu
|
346
|
+
if "tty_chars" in obs:
|
347
|
+
tty_text = []
|
348
|
+
for row in obs["tty_chars"][:5]: # Check first 5 rows
|
349
|
+
line = "".join(chr(c) if 32 <= c <= 126 else " " for c in row).strip()
|
350
|
+
if line:
|
351
|
+
tty_text.append(line)
|
352
|
+
|
353
|
+
# Simple menu detection
|
354
|
+
processed["in_menu"] = any(
|
355
|
+
keyword in " ".join(tty_text).lower()
|
356
|
+
for keyword in [
|
357
|
+
"pick up",
|
358
|
+
"drop",
|
359
|
+
"wear",
|
360
|
+
"take off",
|
361
|
+
"what do you want",
|
362
|
+
]
|
363
|
+
)
|
364
|
+
processed["menu_text"] = tty_text
|
365
|
+
|
366
|
+
# TTY cursor position
|
367
|
+
if "tty_cursor" in obs:
|
368
|
+
processed["cursor"] = (int(obs["tty_cursor"][1]), int(obs["tty_cursor"][0]))
|
369
|
+
|
370
|
+
# Raw observations for advanced use
|
371
|
+
processed["_raw"] = obs
|
372
|
+
|
373
|
+
return processed
|
374
|
+
|
375
|
+
def get_valid_actions(self) -> List[str]:
|
376
|
+
"""Get list of valid action names."""
|
377
|
+
return list(self.action_map.keys())
|
378
|
+
|
379
|
+
def get_state(self) -> bytes:
|
380
|
+
"""Get the current NLE state for serialization."""
|
381
|
+
# NLE environments have a clone_state method that returns the full state
|
382
|
+
if hasattr(self.env, "clone_state"):
|
383
|
+
return self.env.clone_state()
|
384
|
+
elif hasattr(self.env, "clone_full_state"):
|
385
|
+
return self.env.clone_full_state()
|
386
|
+
else:
|
387
|
+
raise RuntimeError("NLE environment does not support state cloning")
|
388
|
+
|
389
|
+
def set_state(self, state: bytes):
|
390
|
+
"""Restore NLE state from serialized data."""
|
391
|
+
# NLE environments have a restore_state method
|
392
|
+
if hasattr(self.env, "restore_state"):
|
393
|
+
self.env.restore_state(state)
|
394
|
+
elif hasattr(self.env, "restore_full_state"):
|
395
|
+
self.env.restore_full_state(state)
|
396
|
+
else:
|
397
|
+
raise RuntimeError("NLE environment does not support state restoration")
|
398
|
+
|
399
|
+
def close(self):
|
400
|
+
"""Close the NLE environment."""
|
401
|
+
self.env.close()
|