synth-ai 0.2.4.dev4__py3-none-any.whl → 0.2.4.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +575 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
- synth_ai/environments/examples/crafter_classic/environment.py +364 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
- synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
- synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
- synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
- synth_ai/environments/examples/crafter_custom/environment.py +312 -0
- synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/METADATA +1 -1
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/RECORD +104 -6
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,255 @@
|
|
1
|
+
"""NetHack environment wrapper for synth-env framework."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import json
|
6
|
+
from typing import Dict, Any, Optional, List, Union
|
7
|
+
from pydantic import BaseModel
|
8
|
+
|
9
|
+
from synth_ai.environments.stateful.core import StatefulEnvironment
|
10
|
+
from synth_ai.environments.reproducibility.core import ReproducibleEnvironment
|
11
|
+
from synth_ai.environments.environment.tools import AbstractTool, EnvToolCall, ToolResult
|
12
|
+
from synth_ai.environments.environment.shared_engine import (
|
13
|
+
GetObservationCallable,
|
14
|
+
InternalObservation,
|
15
|
+
)
|
16
|
+
from synth_ai.environments.tasks.core import TaskInstance
|
17
|
+
|
18
|
+
from .engine import (
|
19
|
+
NetHackEngine,
|
20
|
+
NetHackObservationCallable,
|
21
|
+
NetHackCheckpointObservationCallable,
|
22
|
+
NetHackPublicState,
|
23
|
+
NetHackPrivateState,
|
24
|
+
)
|
25
|
+
from .helpers import (
|
26
|
+
validate_action,
|
27
|
+
get_action_description,
|
28
|
+
NETHACK_ACTIONS,
|
29
|
+
MENU_ACTIONS,
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
class NetHackActionInput(BaseModel):
|
34
|
+
"""Pydantic model for NetHack action validation."""
|
35
|
+
|
36
|
+
action: str # Action string from NETHACK_ACTIONS or MENU_ACTIONS
|
37
|
+
|
38
|
+
|
39
|
+
class NetHackInteractTool(AbstractTool):
|
40
|
+
"""Tool for performing actions in NetHack."""
|
41
|
+
|
42
|
+
name = "interact"
|
43
|
+
description = (
|
44
|
+
"Perform an action in the NetHack dungeon. Available actions include "
|
45
|
+
"movement (north, south, east, west), combat (fight), inventory management "
|
46
|
+
"(inventory, pickup, drop), and many others. In menus, use letter keys (a-z) "
|
47
|
+
"or numbers (0-9) to select options, or 'escape' to cancel."
|
48
|
+
)
|
49
|
+
call_schema = NetHackActionInput
|
50
|
+
result_schema = ToolResult
|
51
|
+
|
52
|
+
def __init__(self, engine: NetHackEngine):
|
53
|
+
self.engine = engine
|
54
|
+
|
55
|
+
async def __call__(self, call: EnvToolCall) -> ToolResult:
|
56
|
+
"""Execute the interact tool."""
|
57
|
+
try:
|
58
|
+
action = call.args["action"] # Will KeyError if missing
|
59
|
+
|
60
|
+
# Get current game state for validation
|
61
|
+
priv, pub = self.engine.get_current_states_for_observation()
|
62
|
+
game_state = {
|
63
|
+
"in_menu": pub.in_menu,
|
64
|
+
"terminated": pub.terminated,
|
65
|
+
"stairs_here": False, # Would be determined from map parsing
|
66
|
+
}
|
67
|
+
|
68
|
+
# Validate action
|
69
|
+
is_valid, error_msg = validate_action(action, game_state)
|
70
|
+
if not is_valid:
|
71
|
+
return ToolResult(
|
72
|
+
ok=False,
|
73
|
+
error=error_msg or f"Invalid action: {action}",
|
74
|
+
payload={"public_state": pub, "private_state": priv},
|
75
|
+
)
|
76
|
+
|
77
|
+
# Execute action
|
78
|
+
private_state, public_state = await self.engine._step_engine(action)
|
79
|
+
|
80
|
+
return ToolResult(
|
81
|
+
ok=True,
|
82
|
+
payload={"public_state": public_state, "private_state": private_state},
|
83
|
+
)
|
84
|
+
|
85
|
+
except Exception as e:
|
86
|
+
# Return current state even on error
|
87
|
+
priv, pub = self.engine.get_current_states_for_observation()
|
88
|
+
return ToolResult(
|
89
|
+
ok=False,
|
90
|
+
error=str(e),
|
91
|
+
payload={"public_state": pub, "private_state": priv},
|
92
|
+
)
|
93
|
+
|
94
|
+
|
95
|
+
class NetHackEnvironment(StatefulEnvironment, ReproducibleEnvironment[NetHackEngine]):
|
96
|
+
"""NetHack environment implementation."""
|
97
|
+
|
98
|
+
def __init__(
|
99
|
+
self,
|
100
|
+
task_instance: TaskInstance,
|
101
|
+
custom_step_obs: Optional[GetObservationCallable] = None,
|
102
|
+
custom_ckpt_obs: Optional[GetObservationCallable] = None,
|
103
|
+
):
|
104
|
+
"""Initialize NetHack environment."""
|
105
|
+
self.name = "NetHack"
|
106
|
+
self.task_instance = task_instance
|
107
|
+
self.custom_step_observation_callable = custom_step_obs or NetHackObservationCallable()
|
108
|
+
self.custom_checkpoint_observation_callable = (
|
109
|
+
custom_ckpt_obs or NetHackCheckpointObservationCallable()
|
110
|
+
)
|
111
|
+
self.engine = NetHackEngine(task_instance)
|
112
|
+
self._interact_tool = NetHackInteractTool(self.engine)
|
113
|
+
|
114
|
+
async def initialize(self) -> InternalObservation:
|
115
|
+
"""Initialize the environment and return initial observation."""
|
116
|
+
priv, pub = await self.engine._reset_engine()
|
117
|
+
return await self._to_observation(priv, pub, self.custom_step_observation_callable)
|
118
|
+
|
119
|
+
async def step(
|
120
|
+
self, tool_calls: Union[List[EnvToolCall], EnvToolCall, Dict, List[Dict], str]
|
121
|
+
) -> InternalObservation:
|
122
|
+
"""Execute one step in the environment."""
|
123
|
+
try:
|
124
|
+
validated_call = self.validate_tool_calls(tool_calls)
|
125
|
+
except ValueError as e:
|
126
|
+
# Return current state with error
|
127
|
+
priv, pub = self.engine.get_current_states_for_observation()
|
128
|
+
return await self._to_observation(
|
129
|
+
priv,
|
130
|
+
pub,
|
131
|
+
self.custom_step_observation_callable,
|
132
|
+
extra_obs={"error": str(e)},
|
133
|
+
)
|
134
|
+
|
135
|
+
# Execute the tool
|
136
|
+
result = await self._interact_tool(validated_call)
|
137
|
+
|
138
|
+
if result.ok:
|
139
|
+
priv = result.payload["private_state"]
|
140
|
+
pub = result.payload["public_state"]
|
141
|
+
return await self._to_observation(priv, pub, self.custom_step_observation_callable)
|
142
|
+
else:
|
143
|
+
# Tool failed - return error with current state
|
144
|
+
priv, pub = self.engine.get_current_states_for_observation()
|
145
|
+
return await self._to_observation(
|
146
|
+
priv,
|
147
|
+
pub,
|
148
|
+
self.custom_step_observation_callable,
|
149
|
+
extra_obs={"error": result.error},
|
150
|
+
)
|
151
|
+
|
152
|
+
async def checkpoint(self) -> InternalObservation:
|
153
|
+
"""Create a checkpoint observation."""
|
154
|
+
priv, pub = self.engine.get_current_states_for_observation()
|
155
|
+
return await self._to_observation(priv, pub, self.custom_checkpoint_observation_callable)
|
156
|
+
|
157
|
+
async def terminate(self) -> InternalObservation:
|
158
|
+
"""Terminate the environment."""
|
159
|
+
priv, pub = self.engine.get_current_states_for_observation()
|
160
|
+
|
161
|
+
# Mark as terminated
|
162
|
+
pub.terminated = True
|
163
|
+
priv.terminated = True
|
164
|
+
|
165
|
+
return await self._to_observation(priv, pub, self.custom_checkpoint_observation_callable)
|
166
|
+
|
167
|
+
def validate_tool_calls(
|
168
|
+
self, tool_calls: Union[List[EnvToolCall], EnvToolCall, Dict, List[Dict], str]
|
169
|
+
) -> EnvToolCall:
|
170
|
+
"""Validate and normalize tool calls."""
|
171
|
+
# Handle string input (simple action)
|
172
|
+
if isinstance(tool_calls, str):
|
173
|
+
return EnvToolCall(tool="interact", args={"action": tool_calls})
|
174
|
+
|
175
|
+
# Handle dict input
|
176
|
+
if isinstance(tool_calls, dict):
|
177
|
+
# Check if it's already properly formatted
|
178
|
+
if "tool" in tool_calls and "args" in tool_calls:
|
179
|
+
# Handle tool name aliases
|
180
|
+
tool_name = tool_calls["tool"]
|
181
|
+
if tool_name == "nethack_interact":
|
182
|
+
tool_name = "interact"
|
183
|
+
return EnvToolCall(tool=tool_name, args=tool_calls["args"]) # type: ignore[misc]
|
184
|
+
elif "tool_name" in tool_calls and "args" in tool_calls:
|
185
|
+
# Handle legacy format
|
186
|
+
tool_name = tool_calls["tool_name"]
|
187
|
+
if tool_name == "nethack_interact":
|
188
|
+
tool_name = "interact"
|
189
|
+
return EnvToolCall(tool=tool_name, args=tool_calls["args"])
|
190
|
+
# Check for action key
|
191
|
+
elif "action" in tool_calls:
|
192
|
+
return EnvToolCall(tool="interact", args={"action": tool_calls["action"]})
|
193
|
+
# Check for tool_calls format
|
194
|
+
elif "tool_calls" in tool_calls:
|
195
|
+
tool_calls = tool_calls["tool_calls"]
|
196
|
+
if isinstance(tool_calls, list) and len(tool_calls) > 0:
|
197
|
+
return self.validate_tool_calls(tool_calls[0])
|
198
|
+
# Try to extract action from various formats
|
199
|
+
else:
|
200
|
+
# Look for action in nested structures
|
201
|
+
for key in ["args", "parameters", "input"]:
|
202
|
+
if key in tool_calls and isinstance(tool_calls[key], dict):
|
203
|
+
if "action" in tool_calls[key]:
|
204
|
+
return EnvToolCall(
|
205
|
+
tool="interact",
|
206
|
+
args={"action": tool_calls[key]["action"]},
|
207
|
+
)
|
208
|
+
|
209
|
+
# Handle list input
|
210
|
+
if isinstance(tool_calls, list):
|
211
|
+
if len(tool_calls) == 0:
|
212
|
+
raise ValueError("Empty tool calls list")
|
213
|
+
# Take first tool call
|
214
|
+
return self.validate_tool_calls(tool_calls[0])
|
215
|
+
|
216
|
+
# Handle EnvToolCall object
|
217
|
+
if isinstance(tool_calls, EnvToolCall):
|
218
|
+
return tool_calls
|
219
|
+
|
220
|
+
raise ValueError(
|
221
|
+
f"Invalid tool call format. Expected action string, dict with 'action' key, "
|
222
|
+
f"or EnvToolCall object. Got: {type(tool_calls)}"
|
223
|
+
)
|
224
|
+
|
225
|
+
async def _to_observation(
|
226
|
+
self,
|
227
|
+
private_state: NetHackPrivateState,
|
228
|
+
public_state: NetHackPublicState,
|
229
|
+
observation_callable: GetObservationCallable,
|
230
|
+
extra_obs: Optional[Dict[str, Any]] = None,
|
231
|
+
) -> InternalObservation:
|
232
|
+
"""Convert states to observation using the callable."""
|
233
|
+
obs = await observation_callable.get_observation(public_state, private_state) # type: ignore[call-arg]
|
234
|
+
|
235
|
+
if extra_obs:
|
236
|
+
obs.update(extra_obs)
|
237
|
+
|
238
|
+
return obs
|
239
|
+
|
240
|
+
async def _serialize_engine(self) -> Any:
|
241
|
+
"""Serialize the engine state."""
|
242
|
+
return await self.engine._serialize_engine()
|
243
|
+
|
244
|
+
@classmethod
|
245
|
+
async def _deserialize_engine(cls, task_instance: TaskInstance, snapshot: Any) -> NetHackEngine:
|
246
|
+
"""Deserialize the engine from a snapshot."""
|
247
|
+
return await NetHackEngine._deserialize_engine(snapshot)
|
248
|
+
|
249
|
+
def get_available_actions(self) -> List[str]:
|
250
|
+
"""Get list of all available actions."""
|
251
|
+
return list(NETHACK_ACTIONS.keys()) + list(MENU_ACTIONS.keys())
|
252
|
+
|
253
|
+
def get_action_descriptions(self) -> Dict[str, str]:
|
254
|
+
"""Get descriptions of all actions."""
|
255
|
+
return {**NETHACK_ACTIONS, **MENU_ACTIONS}
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""Helper utilities for NetHack environment."""
|
2
|
+
|
3
|
+
from .action_mapping import (
|
4
|
+
NETHACK_ACTIONS,
|
5
|
+
MENU_ACTIONS,
|
6
|
+
ALL_ACTIONS,
|
7
|
+
ACTION_CATEGORIES,
|
8
|
+
validate_action,
|
9
|
+
get_action_description,
|
10
|
+
get_actions_for_context,
|
11
|
+
convert_action_to_nle,
|
12
|
+
parse_compound_action,
|
13
|
+
)
|
14
|
+
|
15
|
+
from .observation_utils import (
|
16
|
+
format_observation_for_llm,
|
17
|
+
parse_ascii_map,
|
18
|
+
extract_game_context,
|
19
|
+
simplify_observation,
|
20
|
+
extract_inventory_from_message,
|
21
|
+
identify_item_type,
|
22
|
+
)
|
23
|
+
|
24
|
+
__all__ = [
|
25
|
+
# Action mapping
|
26
|
+
"NETHACK_ACTIONS",
|
27
|
+
"MENU_ACTIONS",
|
28
|
+
"ALL_ACTIONS",
|
29
|
+
"ACTION_CATEGORIES",
|
30
|
+
"validate_action",
|
31
|
+
"get_action_description",
|
32
|
+
"get_actions_for_context",
|
33
|
+
"convert_action_to_nle",
|
34
|
+
"parse_compound_action",
|
35
|
+
# Observation utils
|
36
|
+
"format_observation_for_llm",
|
37
|
+
"parse_ascii_map",
|
38
|
+
"extract_game_context",
|
39
|
+
"simplify_observation",
|
40
|
+
"extract_inventory_from_message",
|
41
|
+
"identify_item_type",
|
42
|
+
]
|
@@ -0,0 +1,301 @@
|
|
1
|
+
"""Action mapping and validation for NetHack."""
|
2
|
+
|
3
|
+
from typing import Dict, List, Optional, Tuple
|
4
|
+
from dataclasses import dataclass
|
5
|
+
|
6
|
+
|
7
|
+
@dataclass
|
8
|
+
class ActionCategory:
|
9
|
+
"""Category of actions with description."""
|
10
|
+
|
11
|
+
name: str
|
12
|
+
description: str
|
13
|
+
actions: List[str]
|
14
|
+
|
15
|
+
|
16
|
+
# Comprehensive NetHack action mapping
|
17
|
+
NETHACK_ACTIONS: Dict[str, str] = {
|
18
|
+
# Movement actions (8 directions + wait)
|
19
|
+
"north": "move north",
|
20
|
+
"south": "move south",
|
21
|
+
"east": "move east",
|
22
|
+
"west": "move west",
|
23
|
+
"northeast": "move northeast",
|
24
|
+
"northwest": "move northwest",
|
25
|
+
"southeast": "move southeast",
|
26
|
+
"southwest": "move southwest",
|
27
|
+
"wait": "wait/rest for one turn",
|
28
|
+
# Movement modifiers
|
29
|
+
"run_north": "run north until something interesting",
|
30
|
+
"run_south": "run south until something interesting",
|
31
|
+
"run_east": "run east until something interesting",
|
32
|
+
"run_west": "run west until something interesting",
|
33
|
+
"go_up": "go up stairs/ladder",
|
34
|
+
"go_down": "go down stairs/ladder",
|
35
|
+
# Basic interactions
|
36
|
+
"search": "search for secret doors/traps",
|
37
|
+
"open": "open a door",
|
38
|
+
"close": "close a door",
|
39
|
+
"kick": "kick something",
|
40
|
+
"force": "force a lock",
|
41
|
+
"untrap": "untrap something",
|
42
|
+
# Inventory and items
|
43
|
+
"inventory": "check inventory",
|
44
|
+
"pickup": "pick up items",
|
45
|
+
"drop": "drop items",
|
46
|
+
"dropall": "drop all items",
|
47
|
+
"wear": "wear armor/accessories",
|
48
|
+
"take_off": "take off armor/accessories",
|
49
|
+
"wield": "wield a weapon",
|
50
|
+
"unwield": "unwield current weapon",
|
51
|
+
"quiver": "ready ammunition",
|
52
|
+
"put_on": "put on accessories",
|
53
|
+
"remove": "remove accessories",
|
54
|
+
# Using items
|
55
|
+
"eat": "eat food",
|
56
|
+
"drink": "drink a potion",
|
57
|
+
"read": "read a scroll/spellbook",
|
58
|
+
"zap": "zap a wand",
|
59
|
+
"apply": "apply/use a tool",
|
60
|
+
"invoke": "invoke an artifact",
|
61
|
+
"rub": "rub a lamp/stone",
|
62
|
+
"throw": "throw an item",
|
63
|
+
"fire": "fire from quiver",
|
64
|
+
# Magic
|
65
|
+
"cast": "cast a spell",
|
66
|
+
"pray": "pray to your deity",
|
67
|
+
"offer": "offer sacrifice",
|
68
|
+
"turn_undead": "turn undead (priest ability)",
|
69
|
+
# Information (NOTE: These don't consume turns!)
|
70
|
+
"look": "look around (FREE ACTION - doesn't advance time)",
|
71
|
+
"farlook": "look at specific location (FREE ACTION)",
|
72
|
+
"whatis": "identify map symbol (FREE ACTION)",
|
73
|
+
"identify": "identify inventory item (FREE ACTION)",
|
74
|
+
"discoveries": "list discoveries (FREE ACTION)",
|
75
|
+
"conduct": "check conduct (FREE ACTION)",
|
76
|
+
"attributes": "check attributes (FREE ACTION)",
|
77
|
+
# Character actions
|
78
|
+
"enhance": "enhance skills",
|
79
|
+
"sit": "sit down",
|
80
|
+
"pay": "pay shopkeeper",
|
81
|
+
"chat": "talk to someone",
|
82
|
+
"loot": "loot a container",
|
83
|
+
"engrave": "write on the ground",
|
84
|
+
"monster_ability": "use monster ability",
|
85
|
+
# Game commands
|
86
|
+
"save": "save the game",
|
87
|
+
"quit": "quit the game",
|
88
|
+
"help": "show help",
|
89
|
+
"version": "show version",
|
90
|
+
"history": "show message history",
|
91
|
+
"name": "name an item/monster",
|
92
|
+
"call": "call item type",
|
93
|
+
"adjust": "adjust inventory letters",
|
94
|
+
# Special responses for prompts/menus
|
95
|
+
"yes": "answer yes",
|
96
|
+
"no": "answer no",
|
97
|
+
"all": "select all",
|
98
|
+
"none": "select none",
|
99
|
+
"menu_next": "next menu page",
|
100
|
+
"menu_previous": "previous menu page",
|
101
|
+
"escape": "cancel/escape",
|
102
|
+
}
|
103
|
+
|
104
|
+
# Single character responses for menu selections
|
105
|
+
MENU_ACTIONS: Dict[str, str] = {
|
106
|
+
chr(i): f"select option {chr(i)}" for i in range(ord("a"), ord("z") + 1)
|
107
|
+
}
|
108
|
+
MENU_ACTIONS.update({chr(i): f"select option {chr(i)}" for i in range(ord("A"), ord("Z") + 1)})
|
109
|
+
MENU_ACTIONS.update({str(i): f"select option {i}" for i in range(10)})
|
110
|
+
|
111
|
+
# Combine all actions
|
112
|
+
ALL_ACTIONS = {**NETHACK_ACTIONS, **MENU_ACTIONS}
|
113
|
+
|
114
|
+
# Action categories for organization
|
115
|
+
ACTION_CATEGORIES = [
|
116
|
+
ActionCategory(
|
117
|
+
name="Movement",
|
118
|
+
description="Basic movement and navigation",
|
119
|
+
actions=[
|
120
|
+
"north",
|
121
|
+
"south",
|
122
|
+
"east",
|
123
|
+
"west",
|
124
|
+
"northeast",
|
125
|
+
"northwest",
|
126
|
+
"southeast",
|
127
|
+
"southwest",
|
128
|
+
"wait",
|
129
|
+
"go_up",
|
130
|
+
"go_down",
|
131
|
+
],
|
132
|
+
),
|
133
|
+
ActionCategory(
|
134
|
+
name="Inventory",
|
135
|
+
description="Managing items and equipment",
|
136
|
+
actions=[
|
137
|
+
"inventory",
|
138
|
+
"pickup",
|
139
|
+
"drop",
|
140
|
+
"wear",
|
141
|
+
"wield",
|
142
|
+
"eat",
|
143
|
+
"drink",
|
144
|
+
"read",
|
145
|
+
"apply",
|
146
|
+
"throw",
|
147
|
+
],
|
148
|
+
),
|
149
|
+
ActionCategory(
|
150
|
+
name="Combat",
|
151
|
+
description="Fighting and defense (attack by moving into monsters!)",
|
152
|
+
actions=["fire", "zap", "throw", "kick"],
|
153
|
+
),
|
154
|
+
ActionCategory(
|
155
|
+
name="Exploration",
|
156
|
+
description="Discovering the dungeon",
|
157
|
+
actions=["search", "open", "close", "look", "farlook"],
|
158
|
+
),
|
159
|
+
ActionCategory(
|
160
|
+
name="Magic",
|
161
|
+
description="Spells and divine intervention",
|
162
|
+
actions=["cast", "pray", "offer", "invoke"],
|
163
|
+
),
|
164
|
+
ActionCategory(
|
165
|
+
name="Game",
|
166
|
+
description="Meta game commands",
|
167
|
+
actions=["save", "quit", "help", "inventory"],
|
168
|
+
),
|
169
|
+
]
|
170
|
+
|
171
|
+
|
172
|
+
def validate_action(action: str, game_state: Optional[Dict] = None) -> Tuple[bool, Optional[str]]:
|
173
|
+
"""
|
174
|
+
Validate if an action is valid given the current game state.
|
175
|
+
|
176
|
+
Args:
|
177
|
+
action: The action string to validate
|
178
|
+
game_state: Optional game state for context-aware validation
|
179
|
+
|
180
|
+
Returns:
|
181
|
+
Tuple of (is_valid, error_message)
|
182
|
+
"""
|
183
|
+
# Check if action exists
|
184
|
+
if action not in ALL_ACTIONS:
|
185
|
+
return False, f"Unknown action: {action}. Use 'help' to see available actions."
|
186
|
+
|
187
|
+
# Context-aware validation if game state provided
|
188
|
+
if game_state:
|
189
|
+
# Check if in menu
|
190
|
+
if game_state.get("in_menu", False):
|
191
|
+
if action not in MENU_ACTIONS and action not in [
|
192
|
+
"escape",
|
193
|
+
"menu_next",
|
194
|
+
"menu_previous",
|
195
|
+
]:
|
196
|
+
return False, "Currently in a menu. Use letter/number keys or 'escape'."
|
197
|
+
|
198
|
+
# Check if game is over
|
199
|
+
if game_state.get("terminated", False):
|
200
|
+
if action not in ["quit", "save"]:
|
201
|
+
return False, "Game is over. You can only 'save' or 'quit'."
|
202
|
+
|
203
|
+
# Check stairs availability
|
204
|
+
if action in ["go_up", "go_down"]:
|
205
|
+
if not game_state.get("stairs_here", False):
|
206
|
+
return False, f"No stairs here to {action.replace('go_', '')}."
|
207
|
+
|
208
|
+
return True, None
|
209
|
+
|
210
|
+
|
211
|
+
def get_action_description(action: str) -> str:
|
212
|
+
"""Get the description of an action."""
|
213
|
+
return ALL_ACTIONS.get(action, "Unknown action")
|
214
|
+
|
215
|
+
|
216
|
+
def get_actions_for_context(game_state: Dict) -> List[str]:
|
217
|
+
"""Get relevant actions for the current game context."""
|
218
|
+
if game_state.get("in_menu", False):
|
219
|
+
# In menu - return menu navigation actions
|
220
|
+
menu_items = game_state.get("menu_items", [])
|
221
|
+
actions = ["escape"]
|
222
|
+
|
223
|
+
# Add letter options based on menu items
|
224
|
+
for i, item in enumerate(menu_items):
|
225
|
+
if i < 26:
|
226
|
+
actions.append(chr(ord("a") + i))
|
227
|
+
|
228
|
+
return actions
|
229
|
+
|
230
|
+
if game_state.get("terminated", False):
|
231
|
+
return ["quit", "save"]
|
232
|
+
|
233
|
+
# Normal gameplay - return common actions
|
234
|
+
common_actions = [
|
235
|
+
"north",
|
236
|
+
"south",
|
237
|
+
"east",
|
238
|
+
"west",
|
239
|
+
"search",
|
240
|
+
"inventory",
|
241
|
+
"pickup",
|
242
|
+
"look",
|
243
|
+
"wait",
|
244
|
+
"open",
|
245
|
+
"close",
|
246
|
+
]
|
247
|
+
|
248
|
+
# Add context-specific actions
|
249
|
+
if game_state.get("stairs_here", False):
|
250
|
+
if game_state.get("stairs_down", False):
|
251
|
+
common_actions.append("go_down")
|
252
|
+
if game_state.get("stairs_up", False):
|
253
|
+
common_actions.append("go_up")
|
254
|
+
|
255
|
+
if game_state.get("items_here", False):
|
256
|
+
common_actions.append("pickup")
|
257
|
+
|
258
|
+
if game_state.get("door_here", False):
|
259
|
+
if game_state.get("door_open", False):
|
260
|
+
common_actions.append("close")
|
261
|
+
else:
|
262
|
+
common_actions.append("open")
|
263
|
+
|
264
|
+
return common_actions
|
265
|
+
|
266
|
+
|
267
|
+
def convert_action_to_nle(action: str, action_map: Dict[str, int]) -> Optional[int]:
|
268
|
+
"""
|
269
|
+
Convert string action to NLE integer action.
|
270
|
+
|
271
|
+
Args:
|
272
|
+
action: String action name
|
273
|
+
action_map: Dictionary mapping action names to NLE indices
|
274
|
+
|
275
|
+
Returns:
|
276
|
+
NLE action index or None if not found
|
277
|
+
"""
|
278
|
+
# Direct lookup in action map
|
279
|
+
if action in action_map:
|
280
|
+
return action_map[action]
|
281
|
+
|
282
|
+
# Handle special cases
|
283
|
+
if action == "terminate":
|
284
|
+
# This is handled at a higher level
|
285
|
+
return None
|
286
|
+
|
287
|
+
# Single character actions (menu selections)
|
288
|
+
if len(action) == 1 and (action.isalpha() or action.isdigit()):
|
289
|
+
if action in action_map:
|
290
|
+
return action_map[action]
|
291
|
+
|
292
|
+
return None
|
293
|
+
|
294
|
+
|
295
|
+
def parse_compound_action(action: str) -> List[str]:
|
296
|
+
"""
|
297
|
+
Parse compound actions into individual steps.
|
298
|
+
E.g., "go to stairs and descend" -> ["navigate_to_stairs", "go_down"]
|
299
|
+
"""
|
300
|
+
# This could be extended to handle more complex action parsing
|
301
|
+
return [action] # For now, just return the action as-is
|