synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
  245. synth_ai/zyk/lms/caching/constants.py +0 -1
  246. synth_ai/zyk/lms/cost/monitor.py +0 -1
  247. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  248. synth_ai-0.2.0.dist-info/METADATA +0 -36
  249. synth_ai-0.2.0.dist-info/RECORD +0 -50
  250. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  251. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  253. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  254. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  255. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  256. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  259. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  260. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  261. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  264. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  265. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
  266. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,255 @@
1
+ """NetHack environment wrapper for synth-env framework."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Dict, Any, Optional, List, Union
7
+ from pydantic import BaseModel
8
+
9
+ from synth_ai.environments.stateful.core import StatefulEnvironment
10
+ from synth_ai.environments.reproducibility.core import ReproducibleEnvironment
11
+ from synth_ai.environments.environment.tools import AbstractTool, EnvToolCall, ToolResult
12
+ from synth_ai.environments.environment.shared_engine import (
13
+ GetObservationCallable,
14
+ InternalObservation,
15
+ )
16
+ from synth_ai.environments.tasks.core import TaskInstance
17
+
18
+ from .engine import (
19
+ NetHackEngine,
20
+ NetHackObservationCallable,
21
+ NetHackCheckpointObservationCallable,
22
+ NetHackPublicState,
23
+ NetHackPrivateState,
24
+ )
25
+ from .helpers import (
26
+ validate_action,
27
+ get_action_description,
28
+ NETHACK_ACTIONS,
29
+ MENU_ACTIONS,
30
+ )
31
+
32
+
33
+ class NetHackActionInput(BaseModel):
34
+ """Pydantic model for NetHack action validation."""
35
+
36
+ action: str # Action string from NETHACK_ACTIONS or MENU_ACTIONS
37
+
38
+
39
+ class NetHackInteractTool(AbstractTool):
40
+ """Tool for performing actions in NetHack."""
41
+
42
+ name = "interact"
43
+ description = (
44
+ "Perform an action in the NetHack dungeon. Available actions include "
45
+ "movement (north, south, east, west), combat (fight), inventory management "
46
+ "(inventory, pickup, drop), and many others. In menus, use letter keys (a-z) "
47
+ "or numbers (0-9) to select options, or 'escape' to cancel."
48
+ )
49
+ call_schema = NetHackActionInput
50
+ result_schema = ToolResult
51
+
52
+ def __init__(self, engine: NetHackEngine):
53
+ self.engine = engine
54
+
55
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
56
+ """Execute the interact tool."""
57
+ try:
58
+ action = call.args["action"] # Will KeyError if missing
59
+
60
+ # Get current game state for validation
61
+ priv, pub = self.engine.get_current_states_for_observation()
62
+ game_state = {
63
+ "in_menu": pub.in_menu,
64
+ "terminated": pub.terminated,
65
+ "stairs_here": False, # Would be determined from map parsing
66
+ }
67
+
68
+ # Validate action
69
+ is_valid, error_msg = validate_action(action, game_state)
70
+ if not is_valid:
71
+ return ToolResult(
72
+ ok=False,
73
+ error=error_msg or f"Invalid action: {action}",
74
+ payload={"public_state": pub, "private_state": priv},
75
+ )
76
+
77
+ # Execute action
78
+ private_state, public_state = await self.engine._step_engine(action)
79
+
80
+ return ToolResult(
81
+ ok=True,
82
+ payload={"public_state": public_state, "private_state": private_state},
83
+ )
84
+
85
+ except Exception as e:
86
+ # Return current state even on error
87
+ priv, pub = self.engine.get_current_states_for_observation()
88
+ return ToolResult(
89
+ ok=False,
90
+ error=str(e),
91
+ payload={"public_state": pub, "private_state": priv},
92
+ )
93
+
94
+
95
+ class NetHackEnvironment(StatefulEnvironment, ReproducibleEnvironment[NetHackEngine]):
96
+ """NetHack environment implementation."""
97
+
98
+ def __init__(
99
+ self,
100
+ task_instance: TaskInstance,
101
+ custom_step_obs: Optional[GetObservationCallable] = None,
102
+ custom_ckpt_obs: Optional[GetObservationCallable] = None,
103
+ ):
104
+ """Initialize NetHack environment."""
105
+ self.name = "NetHack"
106
+ self.task_instance = task_instance
107
+ self.custom_step_observation_callable = custom_step_obs or NetHackObservationCallable()
108
+ self.custom_checkpoint_observation_callable = (
109
+ custom_ckpt_obs or NetHackCheckpointObservationCallable()
110
+ )
111
+ self.engine = NetHackEngine(task_instance)
112
+ self._interact_tool = NetHackInteractTool(self.engine)
113
+
114
+ async def initialize(self) -> InternalObservation:
115
+ """Initialize the environment and return initial observation."""
116
+ priv, pub = await self.engine._reset_engine()
117
+ return await self._to_observation(priv, pub, self.custom_step_observation_callable)
118
+
119
+ async def step(
120
+ self, tool_calls: Union[List[EnvToolCall], EnvToolCall, Dict, List[Dict], str]
121
+ ) -> InternalObservation:
122
+ """Execute one step in the environment."""
123
+ try:
124
+ validated_call = self.validate_tool_calls(tool_calls)
125
+ except ValueError as e:
126
+ # Return current state with error
127
+ priv, pub = self.engine.get_current_states_for_observation()
128
+ return await self._to_observation(
129
+ priv,
130
+ pub,
131
+ self.custom_step_observation_callable,
132
+ extra_obs={"error": str(e)},
133
+ )
134
+
135
+ # Execute the tool
136
+ result = await self._interact_tool(validated_call)
137
+
138
+ if result.ok:
139
+ priv = result.payload["private_state"]
140
+ pub = result.payload["public_state"]
141
+ return await self._to_observation(priv, pub, self.custom_step_observation_callable)
142
+ else:
143
+ # Tool failed - return error with current state
144
+ priv, pub = self.engine.get_current_states_for_observation()
145
+ return await self._to_observation(
146
+ priv,
147
+ pub,
148
+ self.custom_step_observation_callable,
149
+ extra_obs={"error": result.error},
150
+ )
151
+
152
+ async def checkpoint(self) -> InternalObservation:
153
+ """Create a checkpoint observation."""
154
+ priv, pub = self.engine.get_current_states_for_observation()
155
+ return await self._to_observation(priv, pub, self.custom_checkpoint_observation_callable)
156
+
157
+ async def terminate(self) -> InternalObservation:
158
+ """Terminate the environment."""
159
+ priv, pub = self.engine.get_current_states_for_observation()
160
+
161
+ # Mark as terminated
162
+ pub.terminated = True
163
+ priv.terminated = True
164
+
165
+ return await self._to_observation(priv, pub, self.custom_checkpoint_observation_callable)
166
+
167
+ def validate_tool_calls(
168
+ self, tool_calls: Union[List[EnvToolCall], EnvToolCall, Dict, List[Dict], str]
169
+ ) -> EnvToolCall:
170
+ """Validate and normalize tool calls."""
171
+ # Handle string input (simple action)
172
+ if isinstance(tool_calls, str):
173
+ return EnvToolCall(tool="interact", args={"action": tool_calls})
174
+
175
+ # Handle dict input
176
+ if isinstance(tool_calls, dict):
177
+ # Check if it's already properly formatted
178
+ if "tool" in tool_calls and "args" in tool_calls:
179
+ # Handle tool name aliases
180
+ tool_name = tool_calls["tool"]
181
+ if tool_name == "nethack_interact":
182
+ tool_name = "interact"
183
+ return EnvToolCall(tool=tool_name, args=tool_calls["args"]) # type: ignore[misc]
184
+ elif "tool_name" in tool_calls and "args" in tool_calls:
185
+ # Handle legacy format
186
+ tool_name = tool_calls["tool_name"]
187
+ if tool_name == "nethack_interact":
188
+ tool_name = "interact"
189
+ return EnvToolCall(tool=tool_name, args=tool_calls["args"])
190
+ # Check for action key
191
+ elif "action" in tool_calls:
192
+ return EnvToolCall(tool="interact", args={"action": tool_calls["action"]})
193
+ # Check for tool_calls format
194
+ elif "tool_calls" in tool_calls:
195
+ tool_calls = tool_calls["tool_calls"]
196
+ if isinstance(tool_calls, list) and len(tool_calls) > 0:
197
+ return self.validate_tool_calls(tool_calls[0])
198
+ # Try to extract action from various formats
199
+ else:
200
+ # Look for action in nested structures
201
+ for key in ["args", "parameters", "input"]:
202
+ if key in tool_calls and isinstance(tool_calls[key], dict):
203
+ if "action" in tool_calls[key]:
204
+ return EnvToolCall(
205
+ tool="interact",
206
+ args={"action": tool_calls[key]["action"]},
207
+ )
208
+
209
+ # Handle list input
210
+ if isinstance(tool_calls, list):
211
+ if len(tool_calls) == 0:
212
+ raise ValueError("Empty tool calls list")
213
+ # Take first tool call
214
+ return self.validate_tool_calls(tool_calls[0])
215
+
216
+ # Handle EnvToolCall object
217
+ if isinstance(tool_calls, EnvToolCall):
218
+ return tool_calls
219
+
220
+ raise ValueError(
221
+ f"Invalid tool call format. Expected action string, dict with 'action' key, "
222
+ f"or EnvToolCall object. Got: {type(tool_calls)}"
223
+ )
224
+
225
+ async def _to_observation(
226
+ self,
227
+ private_state: NetHackPrivateState,
228
+ public_state: NetHackPublicState,
229
+ observation_callable: GetObservationCallable,
230
+ extra_obs: Optional[Dict[str, Any]] = None,
231
+ ) -> InternalObservation:
232
+ """Convert states to observation using the callable."""
233
+ obs = await observation_callable.get_observation(public_state, private_state) # type: ignore[call-arg]
234
+
235
+ if extra_obs:
236
+ obs.update(extra_obs)
237
+
238
+ return obs
239
+
240
+ async def _serialize_engine(self) -> Any:
241
+ """Serialize the engine state."""
242
+ return await self.engine._serialize_engine()
243
+
244
+ @classmethod
245
+ async def _deserialize_engine(cls, task_instance: TaskInstance, snapshot: Any) -> NetHackEngine:
246
+ """Deserialize the engine from a snapshot."""
247
+ return await NetHackEngine._deserialize_engine(snapshot)
248
+
249
+ def get_available_actions(self) -> List[str]:
250
+ """Get list of all available actions."""
251
+ return list(NETHACK_ACTIONS.keys()) + list(MENU_ACTIONS.keys())
252
+
253
+ def get_action_descriptions(self) -> Dict[str, str]:
254
+ """Get descriptions of all actions."""
255
+ return {**NETHACK_ACTIONS, **MENU_ACTIONS}
@@ -0,0 +1,42 @@
1
+ """Helper utilities for NetHack environment."""
2
+
3
+ from .action_mapping import (
4
+ NETHACK_ACTIONS,
5
+ MENU_ACTIONS,
6
+ ALL_ACTIONS,
7
+ ACTION_CATEGORIES,
8
+ validate_action,
9
+ get_action_description,
10
+ get_actions_for_context,
11
+ convert_action_to_nle,
12
+ parse_compound_action,
13
+ )
14
+
15
+ from .observation_utils import (
16
+ format_observation_for_llm,
17
+ parse_ascii_map,
18
+ extract_game_context,
19
+ simplify_observation,
20
+ extract_inventory_from_message,
21
+ identify_item_type,
22
+ )
23
+
24
+ __all__ = [
25
+ # Action mapping
26
+ "NETHACK_ACTIONS",
27
+ "MENU_ACTIONS",
28
+ "ALL_ACTIONS",
29
+ "ACTION_CATEGORIES",
30
+ "validate_action",
31
+ "get_action_description",
32
+ "get_actions_for_context",
33
+ "convert_action_to_nle",
34
+ "parse_compound_action",
35
+ # Observation utils
36
+ "format_observation_for_llm",
37
+ "parse_ascii_map",
38
+ "extract_game_context",
39
+ "simplify_observation",
40
+ "extract_inventory_from_message",
41
+ "identify_item_type",
42
+ ]
@@ -0,0 +1,301 @@
1
+ """Action mapping and validation for NetHack."""
2
+
3
+ from typing import Dict, List, Optional, Tuple
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class ActionCategory:
9
+ """Category of actions with description."""
10
+
11
+ name: str
12
+ description: str
13
+ actions: List[str]
14
+
15
+
16
+ # Comprehensive NetHack action mapping
17
+ NETHACK_ACTIONS: Dict[str, str] = {
18
+ # Movement actions (8 directions + wait)
19
+ "north": "move north",
20
+ "south": "move south",
21
+ "east": "move east",
22
+ "west": "move west",
23
+ "northeast": "move northeast",
24
+ "northwest": "move northwest",
25
+ "southeast": "move southeast",
26
+ "southwest": "move southwest",
27
+ "wait": "wait/rest for one turn",
28
+ # Movement modifiers
29
+ "run_north": "run north until something interesting",
30
+ "run_south": "run south until something interesting",
31
+ "run_east": "run east until something interesting",
32
+ "run_west": "run west until something interesting",
33
+ "go_up": "go up stairs/ladder",
34
+ "go_down": "go down stairs/ladder",
35
+ # Basic interactions
36
+ "search": "search for secret doors/traps",
37
+ "open": "open a door",
38
+ "close": "close a door",
39
+ "kick": "kick something",
40
+ "force": "force a lock",
41
+ "untrap": "untrap something",
42
+ # Inventory and items
43
+ "inventory": "check inventory",
44
+ "pickup": "pick up items",
45
+ "drop": "drop items",
46
+ "dropall": "drop all items",
47
+ "wear": "wear armor/accessories",
48
+ "take_off": "take off armor/accessories",
49
+ "wield": "wield a weapon",
50
+ "unwield": "unwield current weapon",
51
+ "quiver": "ready ammunition",
52
+ "put_on": "put on accessories",
53
+ "remove": "remove accessories",
54
+ # Using items
55
+ "eat": "eat food",
56
+ "drink": "drink a potion",
57
+ "read": "read a scroll/spellbook",
58
+ "zap": "zap a wand",
59
+ "apply": "apply/use a tool",
60
+ "invoke": "invoke an artifact",
61
+ "rub": "rub a lamp/stone",
62
+ "throw": "throw an item",
63
+ "fire": "fire from quiver",
64
+ # Magic
65
+ "cast": "cast a spell",
66
+ "pray": "pray to your deity",
67
+ "offer": "offer sacrifice",
68
+ "turn_undead": "turn undead (priest ability)",
69
+ # Information (NOTE: These don't consume turns!)
70
+ "look": "look around (FREE ACTION - doesn't advance time)",
71
+ "farlook": "look at specific location (FREE ACTION)",
72
+ "whatis": "identify map symbol (FREE ACTION)",
73
+ "identify": "identify inventory item (FREE ACTION)",
74
+ "discoveries": "list discoveries (FREE ACTION)",
75
+ "conduct": "check conduct (FREE ACTION)",
76
+ "attributes": "check attributes (FREE ACTION)",
77
+ # Character actions
78
+ "enhance": "enhance skills",
79
+ "sit": "sit down",
80
+ "pay": "pay shopkeeper",
81
+ "chat": "talk to someone",
82
+ "loot": "loot a container",
83
+ "engrave": "write on the ground",
84
+ "monster_ability": "use monster ability",
85
+ # Game commands
86
+ "save": "save the game",
87
+ "quit": "quit the game",
88
+ "help": "show help",
89
+ "version": "show version",
90
+ "history": "show message history",
91
+ "name": "name an item/monster",
92
+ "call": "call item type",
93
+ "adjust": "adjust inventory letters",
94
+ # Special responses for prompts/menus
95
+ "yes": "answer yes",
96
+ "no": "answer no",
97
+ "all": "select all",
98
+ "none": "select none",
99
+ "menu_next": "next menu page",
100
+ "menu_previous": "previous menu page",
101
+ "escape": "cancel/escape",
102
+ }
103
+
104
+ # Single character responses for menu selections
105
+ MENU_ACTIONS: Dict[str, str] = {
106
+ chr(i): f"select option {chr(i)}" for i in range(ord("a"), ord("z") + 1)
107
+ }
108
+ MENU_ACTIONS.update({chr(i): f"select option {chr(i)}" for i in range(ord("A"), ord("Z") + 1)})
109
+ MENU_ACTIONS.update({str(i): f"select option {i}" for i in range(10)})
110
+
111
+ # Combine all actions
112
+ ALL_ACTIONS = {**NETHACK_ACTIONS, **MENU_ACTIONS}
113
+
114
+ # Action categories for organization
115
+ ACTION_CATEGORIES = [
116
+ ActionCategory(
117
+ name="Movement",
118
+ description="Basic movement and navigation",
119
+ actions=[
120
+ "north",
121
+ "south",
122
+ "east",
123
+ "west",
124
+ "northeast",
125
+ "northwest",
126
+ "southeast",
127
+ "southwest",
128
+ "wait",
129
+ "go_up",
130
+ "go_down",
131
+ ],
132
+ ),
133
+ ActionCategory(
134
+ name="Inventory",
135
+ description="Managing items and equipment",
136
+ actions=[
137
+ "inventory",
138
+ "pickup",
139
+ "drop",
140
+ "wear",
141
+ "wield",
142
+ "eat",
143
+ "drink",
144
+ "read",
145
+ "apply",
146
+ "throw",
147
+ ],
148
+ ),
149
+ ActionCategory(
150
+ name="Combat",
151
+ description="Fighting and defense (attack by moving into monsters!)",
152
+ actions=["fire", "zap", "throw", "kick"],
153
+ ),
154
+ ActionCategory(
155
+ name="Exploration",
156
+ description="Discovering the dungeon",
157
+ actions=["search", "open", "close", "look", "farlook"],
158
+ ),
159
+ ActionCategory(
160
+ name="Magic",
161
+ description="Spells and divine intervention",
162
+ actions=["cast", "pray", "offer", "invoke"],
163
+ ),
164
+ ActionCategory(
165
+ name="Game",
166
+ description="Meta game commands",
167
+ actions=["save", "quit", "help", "inventory"],
168
+ ),
169
+ ]
170
+
171
+
172
+ def validate_action(action: str, game_state: Optional[Dict] = None) -> Tuple[bool, Optional[str]]:
173
+ """
174
+ Validate if an action is valid given the current game state.
175
+
176
+ Args:
177
+ action: The action string to validate
178
+ game_state: Optional game state for context-aware validation
179
+
180
+ Returns:
181
+ Tuple of (is_valid, error_message)
182
+ """
183
+ # Check if action exists
184
+ if action not in ALL_ACTIONS:
185
+ return False, f"Unknown action: {action}. Use 'help' to see available actions."
186
+
187
+ # Context-aware validation if game state provided
188
+ if game_state:
189
+ # Check if in menu
190
+ if game_state.get("in_menu", False):
191
+ if action not in MENU_ACTIONS and action not in [
192
+ "escape",
193
+ "menu_next",
194
+ "menu_previous",
195
+ ]:
196
+ return False, "Currently in a menu. Use letter/number keys or 'escape'."
197
+
198
+ # Check if game is over
199
+ if game_state.get("terminated", False):
200
+ if action not in ["quit", "save"]:
201
+ return False, "Game is over. You can only 'save' or 'quit'."
202
+
203
+ # Check stairs availability
204
+ if action in ["go_up", "go_down"]:
205
+ if not game_state.get("stairs_here", False):
206
+ return False, f"No stairs here to {action.replace('go_', '')}."
207
+
208
+ return True, None
209
+
210
+
211
+ def get_action_description(action: str) -> str:
212
+ """Get the description of an action."""
213
+ return ALL_ACTIONS.get(action, "Unknown action")
214
+
215
+
216
+ def get_actions_for_context(game_state: Dict) -> List[str]:
217
+ """Get relevant actions for the current game context."""
218
+ if game_state.get("in_menu", False):
219
+ # In menu - return menu navigation actions
220
+ menu_items = game_state.get("menu_items", [])
221
+ actions = ["escape"]
222
+
223
+ # Add letter options based on menu items
224
+ for i, item in enumerate(menu_items):
225
+ if i < 26:
226
+ actions.append(chr(ord("a") + i))
227
+
228
+ return actions
229
+
230
+ if game_state.get("terminated", False):
231
+ return ["quit", "save"]
232
+
233
+ # Normal gameplay - return common actions
234
+ common_actions = [
235
+ "north",
236
+ "south",
237
+ "east",
238
+ "west",
239
+ "search",
240
+ "inventory",
241
+ "pickup",
242
+ "look",
243
+ "wait",
244
+ "open",
245
+ "close",
246
+ ]
247
+
248
+ # Add context-specific actions
249
+ if game_state.get("stairs_here", False):
250
+ if game_state.get("stairs_down", False):
251
+ common_actions.append("go_down")
252
+ if game_state.get("stairs_up", False):
253
+ common_actions.append("go_up")
254
+
255
+ if game_state.get("items_here", False):
256
+ common_actions.append("pickup")
257
+
258
+ if game_state.get("door_here", False):
259
+ if game_state.get("door_open", False):
260
+ common_actions.append("close")
261
+ else:
262
+ common_actions.append("open")
263
+
264
+ return common_actions
265
+
266
+
267
+ def convert_action_to_nle(action: str, action_map: Dict[str, int]) -> Optional[int]:
268
+ """
269
+ Convert string action to NLE integer action.
270
+
271
+ Args:
272
+ action: String action name
273
+ action_map: Dictionary mapping action names to NLE indices
274
+
275
+ Returns:
276
+ NLE action index or None if not found
277
+ """
278
+ # Direct lookup in action map
279
+ if action in action_map:
280
+ return action_map[action]
281
+
282
+ # Handle special cases
283
+ if action == "terminate":
284
+ # This is handled at a higher level
285
+ return None
286
+
287
+ # Single character actions (menu selections)
288
+ if len(action) == 1 and (action.isalpha() or action.isdigit()):
289
+ if action in action_map:
290
+ return action_map[action]
291
+
292
+ return None
293
+
294
+
295
+ def parse_compound_action(action: str) -> List[str]:
296
+ """
297
+ Parse compound actions into individual steps.
298
+ E.g., "go to stairs and descend" -> ["navigate_to_stairs", "go_down"]
299
+ """
300
+ # This could be extended to handle more complex action parsing
301
+ return [action] # For now, just return the action as-is