synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,281 @@
1
+ """Unit tests for NetHack environment."""
2
+
3
+ import pytest
4
+ import asyncio
5
+ from uuid import uuid4
6
+
7
+ from synth_ai.environments.environment.tools import EnvToolCall
8
+ from synth_ai.environments.tasks.core import Impetus, Intent
9
+
10
+ from synth_ai.environments.examples.nethack.environment import (
11
+ NetHackEnvironment,
12
+ NetHackInteractTool,
13
+ )
14
+ from synth_ai.environments.examples.nethack.taskset import (
15
+ NetHackTaskInstanceMetadata,
16
+ NetHackTaskInstance,
17
+ )
18
+
19
+
20
+ class TestNetHackEnvironment:
21
+ """Test cases for NetHack environment."""
22
+
23
+ @pytest.fixture
24
+ def mock_task_instance(self):
25
+ """Create a mock task instance for testing."""
26
+ metadata = NetHackTaskInstanceMetadata(
27
+ character_role="knight",
28
+ starting_level=1,
29
+ target_depth=3,
30
+ time_limit=500,
31
+ difficulty="beginner",
32
+ special_objectives=["Defeat 10 monsters"],
33
+ seed=123,
34
+ )
35
+
36
+ return NetHackTaskInstance(
37
+ id=uuid4(),
38
+ impetus=Impetus(instructions="Test knight adventure"),
39
+ intent=Intent(
40
+ rubric={"goal": "Reach depth 3"},
41
+ gold_trajectories=None,
42
+ gold_state_diff={},
43
+ ),
44
+ metadata=metadata,
45
+ is_reproducible=True,
46
+ initial_engine_snapshot=None,
47
+ )
48
+
49
+ @pytest.mark.asyncio
50
+ async def test_environment_initialization(self, mock_task_instance):
51
+ """Test environment initialization."""
52
+ env = NetHackEnvironment(mock_task_instance)
53
+
54
+ assert env.name == "NetHack"
55
+ assert env.task_instance == mock_task_instance
56
+ assert env.engine is not None
57
+
58
+ # Initialize and check observation
59
+ obs = await env.initialize()
60
+
61
+ assert isinstance(obs, dict)
62
+ assert "ascii_map" in obs
63
+ assert "message" in obs
64
+ assert "character_stats" in obs
65
+ assert "terminated" in obs
66
+ assert obs["terminated"] is False
67
+
68
+ @pytest.mark.asyncio
69
+ async def test_step_with_valid_action(self, mock_task_instance):
70
+ """Test stepping with valid actions."""
71
+ env = NetHackEnvironment(mock_task_instance)
72
+ await env.initialize()
73
+
74
+ # Test simple string action
75
+ obs = await env.step("north")
76
+ assert "last_action" in obs
77
+ assert obs["last_action"] == "north"
78
+ assert obs["turn_count"] == 1
79
+
80
+ # Test another movement
81
+ obs = await env.step("east")
82
+ assert obs["last_action"] == "east"
83
+ assert obs["turn_count"] == 2
84
+
85
+ @pytest.mark.asyncio
86
+ async def test_step_with_invalid_action(self, mock_task_instance):
87
+ """Test stepping with invalid actions."""
88
+ env = NetHackEnvironment(mock_task_instance)
89
+ await env.initialize()
90
+
91
+ # Test invalid action
92
+ obs = await env.step("invalid_action_xyz")
93
+ assert "error" in obs
94
+ assert "Unknown action" in obs["error"]
95
+
96
+ @pytest.mark.asyncio
97
+ async def test_tool_call_formats(self, mock_task_instance):
98
+ """Test various tool call input formats."""
99
+ env = NetHackEnvironment(mock_task_instance)
100
+ await env.initialize()
101
+
102
+ # Test dict with action key
103
+ obs = await env.step({"action": "wait"})
104
+ assert obs["last_action"] == "wait"
105
+
106
+ # Test EnvToolCall format
107
+ tool_call = EnvToolCall(tool="interact", args={"action": "search"})
108
+ obs = await env.step(tool_call)
109
+ assert obs["last_action"] == "search"
110
+
111
+ # Test list format
112
+ obs = await env.step([{"action": "inventory"}])
113
+ assert obs["last_action"] == "inventory"
114
+
115
+ # Test nested tool_calls format
116
+ obs = await env.step({"tool_calls": [{"args": {"action": "look"}}]})
117
+ assert obs["last_action"] == "look"
118
+
119
+ @pytest.mark.asyncio
120
+ async def test_checkpoint(self, mock_task_instance):
121
+ """Test checkpoint functionality."""
122
+ env = NetHackEnvironment(mock_task_instance)
123
+ await env.initialize()
124
+
125
+ # Take some actions
126
+ await env.step("north")
127
+ await env.step("east")
128
+
129
+ # Create checkpoint
130
+ checkpoint_obs = await env.checkpoint()
131
+
132
+ assert "final_score" in checkpoint_obs
133
+ assert "max_depth" in checkpoint_obs
134
+ assert "turn_count_final" in checkpoint_obs
135
+ assert "total_reward" in checkpoint_obs
136
+
137
+ @pytest.mark.asyncio
138
+ async def test_terminate(self, mock_task_instance):
139
+ """Test environment termination."""
140
+ env = NetHackEnvironment(mock_task_instance)
141
+ await env.initialize()
142
+
143
+ # Take an action
144
+ await env.step("wait")
145
+
146
+ # Terminate
147
+ final_obs = await env.terminate()
148
+
149
+ assert final_obs["terminated"] is True
150
+ assert "final_score" in final_obs
151
+ assert "total_reward" in final_obs
152
+
153
+ @pytest.mark.asyncio
154
+ async def test_validate_tool_calls_edge_cases(self, mock_task_instance):
155
+ """Test tool call validation edge cases."""
156
+ env = NetHackEnvironment(mock_task_instance)
157
+
158
+ # Test empty list
159
+ with pytest.raises(ValueError, match="Empty tool calls list"):
160
+ env.validate_tool_calls([])
161
+
162
+ # Test invalid format
163
+ with pytest.raises(ValueError, match="Invalid tool call format"):
164
+ env.validate_tool_calls(123) # type: ignore[arg-type] # Not a valid format
165
+
166
+ # Test nested args
167
+ call = env.validate_tool_calls({"args": {"action": "north"}})
168
+ assert call.args["action"] == "north"
169
+
170
+ # Test parameters key
171
+ call = env.validate_tool_calls({"parameters": {"action": "south"}})
172
+ assert call.args["action"] == "south"
173
+
174
+ @pytest.mark.asyncio
175
+ async def test_available_actions(self, mock_task_instance):
176
+ """Test getting available actions."""
177
+ env = NetHackEnvironment(mock_task_instance)
178
+
179
+ actions = env.get_available_actions()
180
+ assert isinstance(actions, list)
181
+ assert "north" in actions
182
+ assert "inventory" in actions
183
+ assert "a" in actions # Menu action
184
+
185
+ descriptions = env.get_action_descriptions()
186
+ assert isinstance(descriptions, dict)
187
+ assert descriptions["north"] == "move north"
188
+ assert descriptions["inventory"] == "check inventory"
189
+
190
+
191
+ class TestNetHackInteractTool:
192
+ """Test cases for NetHack interact tool."""
193
+
194
+ @pytest.fixture
195
+ def mock_task_instance(self):
196
+ """Create a mock task instance for testing."""
197
+ metadata = NetHackTaskInstanceMetadata(
198
+ character_role="knight",
199
+ starting_level=1,
200
+ target_depth=3,
201
+ time_limit=500,
202
+ difficulty="beginner",
203
+ special_objectives=["Defeat 10 monsters"],
204
+ seed=123,
205
+ )
206
+
207
+ return NetHackTaskInstance(
208
+ id=uuid4(),
209
+ impetus=Impetus(instructions="Test knight adventure"),
210
+ intent=Intent(
211
+ rubric={"goal": "Test objectives"},
212
+ gold_trajectories=None,
213
+ gold_state_diff={},
214
+ ),
215
+ metadata=metadata,
216
+ is_reproducible=True,
217
+ initial_engine_snapshot=None,
218
+ )
219
+
220
+ @pytest.fixture
221
+ def mock_engine(self, mock_task_instance):
222
+ """Create a mock engine for testing."""
223
+ from synth_ai.environments.examples.nethack.engine import NetHackEngine
224
+
225
+ return NetHackEngine(mock_task_instance)
226
+
227
+ @pytest.mark.asyncio
228
+ async def test_interact_tool_valid_action(self, mock_engine):
229
+ """Test interact tool with valid action."""
230
+ await mock_engine._reset_engine()
231
+ tool = NetHackInteractTool(mock_engine)
232
+
233
+ call = EnvToolCall(tool="interact", args={"action": "wait"})
234
+ result = await tool(call)
235
+
236
+ assert result.ok is True
237
+ assert "public_state" in result.payload
238
+ assert "private_state" in result.payload
239
+ assert result.payload["public_state"].last_action == "wait"
240
+
241
+ @pytest.mark.asyncio
242
+ async def test_interact_tool_no_action(self, mock_engine):
243
+ """Test interact tool with missing action."""
244
+ await mock_engine._reset_engine()
245
+ tool = NetHackInteractTool(mock_engine)
246
+
247
+ call = EnvToolCall(tool="interact", args={})
248
+ result = await tool(call)
249
+
250
+ assert result.ok is False
251
+ # KeyError is caught and returned as string
252
+ assert "'action'" in result.error
253
+
254
+ @pytest.mark.asyncio
255
+ async def test_interact_tool_invalid_action(self, mock_engine):
256
+ """Test interact tool with invalid action."""
257
+ await mock_engine._reset_engine()
258
+ tool = NetHackInteractTool(mock_engine)
259
+
260
+ call = EnvToolCall(tool="interact", args={"action": "fly"})
261
+ result = await tool(call)
262
+
263
+ assert result.ok is False
264
+ assert "Unknown action" in result.error
265
+
266
+ @pytest.mark.asyncio
267
+ async def test_interact_tool_game_over_validation(self, mock_engine):
268
+ """Test interact tool validation when game is over."""
269
+ await mock_engine._reset_engine()
270
+ tool = NetHackInteractTool(mock_engine)
271
+
272
+ # Manually terminate the game
273
+ mock_engine.public_state.terminated = True
274
+ mock_engine.private_state.terminated = True
275
+
276
+ # Try non-quit action
277
+ call = EnvToolCall(tool="interact", args={"action": "north"})
278
+ result = await tool(call)
279
+
280
+ assert result.ok is False
281
+ assert "Game is over" in result.error
@@ -0,0 +1,213 @@
1
+ """Unit tests for NetHack taskset."""
2
+
3
+ import pytest
4
+ import asyncio
5
+
6
+ from synth_ai.environments.examples.nethack.taskset import (
7
+ create_nethack_taskset,
8
+ NetHackTaskInstance,
9
+ NetHackTaskInstanceMetadata,
10
+ CHARACTER_ROLES,
11
+ SPECIAL_OBJECTIVES,
12
+ )
13
+
14
+
15
+ class TestNetHackTaskSet:
16
+ """Test cases for NetHack taskset generation."""
17
+
18
+ @pytest.mark.asyncio
19
+ async def test_taskset_creation(self):
20
+ """Test basic taskset creation."""
21
+ taskset = await create_nethack_taskset()
22
+
23
+ assert taskset.name == "NetHack TaskSet"
24
+ assert len(taskset.instances) == 100 # Sum of all difficulty counts
25
+ assert taskset.split_info._is_split_defined is True
26
+
27
+ @pytest.mark.asyncio
28
+ async def test_task_instance_properties(self):
29
+ """Test properties of generated task instances."""
30
+ taskset = await create_nethack_taskset()
31
+
32
+ for instance in taskset.instances[:10]: # Check first 10
33
+ assert isinstance(instance, NetHackTaskInstance)
34
+ assert isinstance(instance.metadata, NetHackTaskInstanceMetadata)
35
+
36
+ # Check required fields
37
+ assert instance.id is not None
38
+ assert instance.impetus.instructions != ""
39
+ assert instance.intent.rubric is not None
40
+ assert instance.is_reproducible is True
41
+
42
+ # Check metadata
43
+ meta = instance.metadata
44
+ assert meta.character_role in CHARACTER_ROLES
45
+ assert meta.starting_level == 1
46
+ assert meta.target_depth > 0
47
+ assert meta.time_limit > 0
48
+ assert meta.difficulty in [
49
+ "tutorial",
50
+ "beginner",
51
+ "intermediate",
52
+ "advanced",
53
+ "expert",
54
+ ]
55
+ assert isinstance(meta.special_objectives, list)
56
+ assert meta.seed is not None
57
+
58
+ @pytest.mark.asyncio
59
+ async def test_difficulty_distribution(self):
60
+ """Test that difficulties are properly distributed."""
61
+ taskset = await create_nethack_taskset()
62
+
63
+ difficulty_counts = {
64
+ "tutorial": 0,
65
+ "beginner": 0,
66
+ "intermediate": 0,
67
+ "advanced": 0,
68
+ "expert": 0,
69
+ }
70
+
71
+ for instance in taskset.instances:
72
+ difficulty_counts[instance.metadata.difficulty] += 1
73
+
74
+ assert difficulty_counts["tutorial"] == 20
75
+ assert difficulty_counts["beginner"] == 30
76
+ assert difficulty_counts["intermediate"] == 25
77
+ assert difficulty_counts["advanced"] == 15
78
+ assert difficulty_counts["expert"] == 10
79
+
80
+ @pytest.mark.asyncio
81
+ async def test_character_role_assignment(self):
82
+ """Test character role assignment by difficulty."""
83
+ taskset = await create_nethack_taskset()
84
+
85
+ # Check tutorial only has tourist
86
+ tutorial_instances = [i for i in taskset.instances if i.metadata.difficulty == "tutorial"]
87
+ for inst in tutorial_instances:
88
+ assert inst.metadata.character_role == "tourist"
89
+
90
+ # Check expert has all roles
91
+ expert_instances = [i for i in taskset.instances if i.metadata.difficulty == "expert"]
92
+ expert_roles = set(inst.metadata.character_role for inst in expert_instances)
93
+ assert len(expert_roles) > 1 # Should have multiple roles
94
+
95
+ @pytest.mark.asyncio
96
+ async def test_objective_assignment(self):
97
+ """Test special objectives assignment."""
98
+ taskset = await create_nethack_taskset()
99
+
100
+ # Check objectives are from valid categories
101
+ all_valid_objectives = []
102
+ for category in SPECIAL_OBJECTIVES.values():
103
+ all_valid_objectives.extend(category)
104
+
105
+ for instance in taskset.instances:
106
+ for obj in instance.metadata.special_objectives:
107
+ assert obj in all_valid_objectives
108
+
109
+ # Check objective count by difficulty
110
+ tutorial_inst = next(i for i in taskset.instances if i.metadata.difficulty == "tutorial")
111
+ assert len(tutorial_inst.metadata.special_objectives) == 1
112
+
113
+ expert_inst = next(i for i in taskset.instances if i.metadata.difficulty == "expert")
114
+ assert len(expert_inst.metadata.special_objectives) == 4
115
+
116
+ @pytest.mark.asyncio
117
+ async def test_instruction_content(self):
118
+ """Test that instructions contain necessary information."""
119
+ taskset = await create_nethack_taskset()
120
+
121
+ for instance in taskset.instances[:5]: # Check first 5
122
+ instructions = instance.impetus.instructions
123
+
124
+ # Check key elements are present
125
+ assert instance.metadata.character_role in instructions
126
+ assert str(instance.metadata.target_depth) in instructions
127
+ assert str(instance.metadata.time_limit) in instructions
128
+ assert "Additional objectives:" in instructions
129
+ assert "Character strengths:" in instructions
130
+ assert "Character weaknesses:" in instructions
131
+ assert "Tips:" in instructions
132
+
133
+ @pytest.mark.asyncio
134
+ async def test_rubric_structure(self):
135
+ """Test intent rubric structure."""
136
+ taskset = await create_nethack_taskset()
137
+
138
+ for instance in taskset.instances[:5]:
139
+ rubric = instance.intent.rubric
140
+
141
+ assert "goal" in rubric
142
+ assert "success_criteria" in rubric
143
+ assert "evaluation_metrics" in rubric
144
+
145
+ # Check success criteria
146
+ assert "primary" in rubric["success_criteria"]
147
+ assert "secondary" in rubric["success_criteria"]
148
+
149
+ # Check evaluation metrics
150
+ metrics = rubric["evaluation_metrics"]
151
+ assert metrics["depth_reached"] == instance.metadata.target_depth
152
+ assert metrics["time_limit"] == instance.metadata.time_limit
153
+ assert metrics["objectives_completed"] == len(instance.metadata.special_objectives)
154
+
155
+ @pytest.mark.asyncio
156
+ async def test_split_info(self):
157
+ """Test train/val/test split."""
158
+ taskset = await create_nethack_taskset()
159
+
160
+ total_instances = len(taskset.instances)
161
+ val_size = len(taskset.split_info.val_instance_ids)
162
+ test_size = len(taskset.split_info.test_instance_ids)
163
+
164
+ # Check split sizes (should be ~10% each)
165
+ assert val_size == total_instances // 10
166
+ assert test_size == total_instances // 10
167
+
168
+ # Check no overlap
169
+ assert len(taskset.split_info.val_instance_ids & taskset.split_info.test_instance_ids) == 0
170
+
171
+ # Check all split IDs are valid
172
+ all_ids = {inst.id for inst in taskset.instances}
173
+ assert taskset.split_info.val_instance_ids.issubset(all_ids)
174
+ assert taskset.split_info.test_instance_ids.issubset(all_ids)
175
+
176
+ @pytest.mark.asyncio
177
+ async def test_task_serialization(self):
178
+ """Test task instance serialization."""
179
+ taskset = await create_nethack_taskset()
180
+ instance = taskset.instances[0]
181
+
182
+ # Serialize
183
+ serialized = await instance.serialize()
184
+
185
+ assert isinstance(serialized, dict)
186
+ assert "id" in serialized
187
+ assert "impetus" in serialized
188
+ assert "intent" in serialized
189
+ assert "metadata" in serialized
190
+
191
+ # Check metadata fields
192
+ meta = serialized["metadata"]
193
+ assert meta["character_role"] == instance.metadata.character_role
194
+ assert meta["target_depth"] == instance.metadata.target_depth
195
+ assert meta["time_limit"] == instance.metadata.time_limit
196
+
197
+ # Deserialize
198
+ restored = await NetHackTaskInstance.deserialize(serialized)
199
+
200
+ assert restored.metadata.character_role == instance.metadata.character_role
201
+ assert restored.metadata.target_depth == instance.metadata.target_depth
202
+ assert restored.metadata.time_limit == instance.metadata.time_limit
203
+ assert restored.metadata.special_objectives == instance.metadata.special_objectives
204
+
205
+ @pytest.mark.asyncio
206
+ async def test_reproducibility(self):
207
+ """Test that tasks are marked as reproducible."""
208
+ taskset = await create_nethack_taskset()
209
+
210
+ for instance in taskset.instances:
211
+ assert instance.is_reproducible is True
212
+ assert instance.metadata.seed is not None
213
+ assert 0 <= instance.metadata.seed < 2**31