synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
  245. synth_ai/zyk/lms/caching/constants.py +0 -1
  246. synth_ai/zyk/lms/cost/monitor.py +0 -1
  247. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  248. synth_ai-0.2.0.dist-info/METADATA +0 -36
  249. synth_ai-0.2.0.dist-info/RECORD +0 -50
  250. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  251. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  253. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  254. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  255. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  256. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  259. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  260. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  261. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  264. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  265. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
  266. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,210 @@
1
+ """Unit tests for MiniGrid taskset."""
2
+
3
+ import asyncio
4
+ import pytest
5
+ from uuid import UUID
6
+
7
+ from synth_ai.environments.examples.minigrid.taskset import (
8
+ MiniGridTaskInstance,
9
+ MiniGridTaskInstanceMetadata,
10
+ create_minigrid_taskset,
11
+ DEFAULT_MINIGRID_TASK,
12
+ ENVIRONMENTS,
13
+ )
14
+
15
+
16
+ @pytest.mark.asyncio
17
+ async def test_default_task():
18
+ """Test the default MiniGrid task."""
19
+ task = DEFAULT_MINIGRID_TASK
20
+
21
+ # Check task properties
22
+ assert isinstance(task.id, UUID)
23
+ assert task.impetus.instructions == "Navigate the 5x5 grid to reach the goal marked with 'G'."
24
+ assert (
25
+ task.intent.rubric["goal"]
26
+ == "Successfully reach the goal tile in the MiniGrid-Empty-5x5-v0 environment."
27
+ )
28
+ assert task.metadata.env_name == "MiniGrid-Empty-5x5-v0"
29
+ assert task.metadata.grid_size == (5, 5)
30
+ assert task.metadata.difficulty == "easy"
31
+ assert task.metadata.seed == 42
32
+ assert task.is_reproducible is True
33
+
34
+
35
+ @pytest.mark.asyncio
36
+ async def test_task_serialization():
37
+ """Test task instance serialization and deserialization."""
38
+ task = DEFAULT_MINIGRID_TASK
39
+
40
+ # Serialize
41
+ serialized = await task.serialize()
42
+
43
+ # Check serialized data
44
+ assert "id" in serialized
45
+ assert "impetus" in serialized
46
+ assert "intent" in serialized
47
+ assert "metadata" in serialized
48
+ assert serialized["metadata"]["env_name"] == "MiniGrid-Empty-5x5-v0"
49
+ assert serialized["metadata"]["grid_size"] == [5, 5]
50
+
51
+ # Deserialize
52
+ deserialized = await MiniGridTaskInstance.deserialize(serialized)
53
+
54
+ # Check deserialized task
55
+ assert deserialized.impetus.instructions == task.impetus.instructions
56
+ assert deserialized.metadata.env_name == task.metadata.env_name
57
+ assert deserialized.metadata.grid_size == task.metadata.grid_size
58
+
59
+
60
+ @pytest.mark.asyncio
61
+ async def test_create_taskset():
62
+ """Test taskset creation."""
63
+ taskset = await create_minigrid_taskset(
64
+ num_tasks_per_difficulty={"easy": 5, "medium": 3, "hard": 2}, seed=42
65
+ )
66
+
67
+ # Check taskset properties
68
+ assert taskset.name == "MiniGrid TaskSet"
69
+ assert len(taskset.instances) == 10 # 5 + 3 + 2
70
+
71
+ # Check splits
72
+ assert taskset.split_info._is_split_defined
73
+ assert len(taskset.split_info.val_instance_ids) >= 1
74
+ assert len(taskset.split_info.test_instance_ids) >= 1
75
+
76
+ # Check no overlap between splits
77
+ assert taskset.split_info.val_instance_ids.isdisjoint(taskset.split_info.test_instance_ids)
78
+
79
+ # Check all instances are valid
80
+ for instance in taskset.instances:
81
+ assert isinstance(instance, MiniGridTaskInstance)
82
+ assert instance.metadata.env_name in [
83
+ env[0] for envs in ENVIRONMENTS.values() for env in envs
84
+ ]
85
+
86
+
87
+ @pytest.mark.asyncio
88
+ async def test_task_metadata():
89
+ """Test task metadata properties."""
90
+ taskset = await create_minigrid_taskset(
91
+ num_tasks_per_difficulty={"easy": 2, "medium": 2, "hard": 2}, seed=123
92
+ )
93
+
94
+ easy_tasks = [t for t in taskset.instances if t.metadata.difficulty == "easy"]
95
+ medium_tasks = [t for t in taskset.instances if t.metadata.difficulty == "medium"]
96
+ hard_tasks = [t for t in taskset.instances if t.metadata.difficulty == "hard"]
97
+
98
+ # Check counts
99
+ assert len(easy_tasks) == 2
100
+ assert len(medium_tasks) == 2
101
+ assert len(hard_tasks) == 2
102
+
103
+ # Check metadata properties
104
+ for task in medium_tasks:
105
+ if "DoorKey" in task.metadata.env_name:
106
+ assert task.metadata.has_key is True
107
+ assert task.metadata.has_door is True
108
+
109
+ for task in hard_tasks:
110
+ if "Lava" in task.metadata.env_name:
111
+ assert task.metadata.has_lava is True
112
+
113
+
114
+ @pytest.mark.asyncio
115
+ async def test_task_instructions():
116
+ """Test that task instructions are properly generated."""
117
+ taskset = await create_minigrid_taskset(num_tasks_per_difficulty={"medium": 5}, seed=456)
118
+
119
+ for task in taskset.instances:
120
+ # Check instructions exist
121
+ assert task.impetus.instructions
122
+ assert len(task.impetus.instructions) > 0
123
+
124
+ # Check instructions match environment type
125
+ if task.metadata.has_lava:
126
+ assert "avoiding lava" in task.impetus.instructions
127
+ if task.metadata.has_key:
128
+ assert "key" in task.impetus.instructions
129
+
130
+ # Check rubric
131
+ assert "goal" in task.intent.rubric
132
+ assert "success_criteria" in task.intent.rubric
133
+ assert isinstance(task.intent.rubric["success_criteria"], list)
134
+
135
+
136
+ @pytest.mark.asyncio
137
+ async def test_environment_configurations():
138
+ """Test that environment configurations are valid."""
139
+ # Check all predefined environments
140
+ for difficulty, env_list in ENVIRONMENTS.items():
141
+ assert difficulty in ["easy", "medium", "hard"]
142
+ for env_name, grid_size in env_list:
143
+ assert isinstance(env_name, str)
144
+ assert "MiniGrid" in env_name
145
+ assert isinstance(grid_size, tuple)
146
+ assert len(grid_size) == 2
147
+ assert all(isinstance(x, int) for x in grid_size)
148
+
149
+
150
+ @pytest.mark.asyncio
151
+ async def test_reproducibility():
152
+ """Test that taskset generation is reproducible with same seed."""
153
+ seed = 789
154
+
155
+ # Generate two tasksets with same seed
156
+ taskset1 = await create_minigrid_taskset(
157
+ num_tasks_per_difficulty={"easy": 3, "medium": 3}, seed=seed
158
+ )
159
+
160
+ taskset2 = await create_minigrid_taskset(
161
+ num_tasks_per_difficulty={"easy": 3, "medium": 3}, seed=seed
162
+ )
163
+
164
+ # Check that they have the same tasks
165
+ assert len(taskset1.instances) == len(taskset2.instances)
166
+
167
+ for t1, t2 in zip(taskset1.instances, taskset2.instances):
168
+ assert t1.metadata.env_name == t2.metadata.env_name
169
+ assert t1.metadata.seed == t2.metadata.seed
170
+ assert t1.metadata.difficulty == t2.metadata.difficulty
171
+
172
+
173
+ @pytest.mark.asyncio
174
+ async def test_empty_taskset():
175
+ """Test creating an empty taskset."""
176
+ taskset = await create_minigrid_taskset(num_tasks_per_difficulty={}, seed=42)
177
+
178
+ assert len(taskset.instances) == 0
179
+ assert taskset.split_info.val_instance_ids == set()
180
+ assert taskset.split_info.test_instance_ids == set()
181
+
182
+
183
+ @pytest.mark.asyncio
184
+ async def test_task_instance_fields():
185
+ """Test all required fields are present in task instances."""
186
+ task = DEFAULT_MINIGRID_TASK
187
+
188
+ # Check all required fields
189
+ assert hasattr(task, "id")
190
+ assert hasattr(task, "impetus")
191
+ assert hasattr(task, "intent")
192
+ assert hasattr(task, "metadata")
193
+ assert hasattr(task, "is_reproducible")
194
+ assert hasattr(task, "initial_engine_snapshot")
195
+
196
+ # Check metadata fields
197
+ metadata = task.metadata
198
+ assert hasattr(metadata, "env_name")
199
+ assert hasattr(metadata, "grid_size")
200
+ assert hasattr(metadata, "difficulty")
201
+ assert hasattr(metadata, "has_key")
202
+ assert hasattr(metadata, "has_door")
203
+ assert hasattr(metadata, "has_lava")
204
+ assert hasattr(metadata, "num_objects")
205
+ assert hasattr(metadata, "optimal_path_length")
206
+ assert hasattr(metadata, "seed")
207
+
208
+
209
+ if __name__ == "__main__":
210
+ asyncio.run(pytest.main([__file__, "-v"]))
@@ -0,0 +1,7 @@
1
+ """NetHack environment implementation for synth-env framework."""
2
+
3
+ __all__ = ["NetHackEngine", "NetHackEnvironment", "create_nethack_taskset"]
4
+
5
+ from .engine import NetHackEngine
6
+ from .environment import NetHackEnvironment
7
+ from .taskset import create_nethack_taskset
@@ -0,0 +1,337 @@
1
+ """NetHack achievements and milestones tracking."""
2
+
3
+ import json
4
+ import os
5
+ from typing import Dict, Any, Optional, Tuple
6
+ from dataclasses import dataclass, field
7
+
8
+
9
+ # Exact copy of Balrog's Progress class
10
+ class Progress:
11
+ def __init__(self, achievements_path=None):
12
+ if achievements_path is None:
13
+ achievements_path = os.path.join(
14
+ os.path.dirname(__file__), "helpers", "achievements.json"
15
+ )
16
+
17
+ with open(achievements_path, "r") as f:
18
+ self.achievements = json.load(f)["3.4.3"]
19
+
20
+ self.dungeon_progression = 0
21
+ self.experience_progression = 0
22
+ self.ascension = False
23
+
24
+ def update(self, dungeon_name, experience_level):
25
+ """Update progression based on current dungeon and experience level."""
26
+ achievements_unlocked = []
27
+
28
+ if dungeon_name == "ascension":
29
+ if not self.ascension:
30
+ achievements_unlocked.append("ascension (100 points)")
31
+ self.ascension = True
32
+ return achievements_unlocked
33
+
34
+ # Update dungeon progression
35
+ if dungeon_name in self.achievements["dungeons"]:
36
+ new_progression = self.achievements["dungeons"][dungeon_name]
37
+ if new_progression > self.dungeon_progression:
38
+ old_score = self.dungeon_progression
39
+ self.dungeon_progression = new_progression
40
+ achievements_unlocked.append(
41
+ f"dungeon {dungeon_name} ({old_score} -> {new_progression} points)"
42
+ )
43
+
44
+ # Update experience progression
45
+ exp_key = f"lvl{experience_level}"
46
+ if exp_key in self.achievements["experience_levels"]:
47
+ new_progression = self.achievements["experience_levels"][exp_key]
48
+ if new_progression > self.experience_progression:
49
+ old_score = self.experience_progression
50
+ self.experience_progression = new_progression
51
+ achievements_unlocked.append(
52
+ f"experience {exp_key} ({old_score} -> {new_progression} points)"
53
+ )
54
+
55
+ return achievements_unlocked
56
+
57
+ @property
58
+ def percent(self):
59
+ """Return the BALROG evaluation score (0-100)."""
60
+ if self.ascension:
61
+ return 100.0
62
+ return max(self.dungeon_progression, self.experience_progression)
63
+
64
+
65
+ @dataclass
66
+ class NetHackAchievements:
67
+ """Track player achievements and milestones in NetHack."""
68
+
69
+ # Exploration achievements
70
+ depth_reached: int = 1
71
+ rooms_explored: int = 0
72
+ secret_doors_found: int = 0
73
+ stairs_down_found: int = 0
74
+ stairs_up_found: int = 0
75
+
76
+ # Combat achievements
77
+ monsters_killed: int = 0
78
+ peaceful_monsters_killed: int = 0
79
+ unique_monsters_killed: int = 0
80
+ kills_by_magic: int = 0
81
+ kills_by_melee: int = 0
82
+ kills_by_ranged: int = 0
83
+
84
+ # Item achievements
85
+ items_picked_up: int = 0
86
+ gold_collected: int = 0
87
+ scrolls_read: int = 0
88
+ potions_drunk: int = 0
89
+ spells_cast: int = 0
90
+ artifacts_found: int = 0
91
+
92
+ # Status achievements
93
+ max_level_reached: int = 1
94
+ max_hp_reached: int = 0
95
+ times_prayed: int = 0
96
+ successful_prayers: int = 0
97
+ times_polymorphed: int = 0
98
+
99
+ # Special achievements (boolean flags)
100
+ first_kill: bool = False
101
+ first_spell_cast: bool = False
102
+ first_prayer: bool = False
103
+ reached_minetown: bool = False
104
+ reached_mines_end: bool = False
105
+ reached_castle: bool = False
106
+ got_quest: bool = False
107
+ completed_quest: bool = False
108
+
109
+ # Survival achievements
110
+ turns_survived: int = 0
111
+ turns_without_damage: int = 0
112
+ traps_triggered: int = 0
113
+ traps_avoided: int = 0
114
+
115
+ # Negative achievements (for tracking mistakes)
116
+ times_died: int = 0
117
+ pets_killed: int = 0
118
+ shopkeepers_angered: int = 0
119
+
120
+ # Balrog progress tracker
121
+ balrog_progress: Progress = field(default_factory=Progress)
122
+
123
+ def to_dict(self) -> Dict[str, Any]:
124
+ """Convert achievements to dictionary."""
125
+ return {
126
+ # Exploration
127
+ "depth_reached": self.depth_reached,
128
+ "rooms_explored": self.rooms_explored,
129
+ "secret_doors_found": self.secret_doors_found,
130
+ "stairs_found": self.stairs_down_found + self.stairs_up_found,
131
+ # Combat
132
+ "monsters_killed": self.monsters_killed,
133
+ "unique_monsters_killed": self.unique_monsters_killed,
134
+ "kills_by_magic": self.kills_by_magic,
135
+ # Items
136
+ "items_collected": self.items_picked_up,
137
+ "gold_collected": self.gold_collected,
138
+ "artifacts_found": self.artifacts_found,
139
+ # Status
140
+ "max_level": self.max_level_reached,
141
+ "max_hp": self.max_hp_reached,
142
+ "successful_prayers": self.successful_prayers,
143
+ # Special (as booleans)
144
+ "first_kill": self.first_kill,
145
+ "first_spell_cast": self.first_spell_cast,
146
+ "reached_minetown": self.reached_minetown,
147
+ "got_quest": self.got_quest,
148
+ # Survival
149
+ "turns_survived": self.turns_survived,
150
+ "traps_avoided": self.traps_avoided,
151
+ # Balrog score
152
+ "balrog_score": self.balrog_progress.percent,
153
+ }
154
+
155
+ def get_unlocked_achievements(self) -> Dict[str, bool]:
156
+ """Get dictionary of which achievements have been unlocked."""
157
+ return {
158
+ # Depth milestones
159
+ "reached_dlvl_2": self.depth_reached >= 2,
160
+ "reached_dlvl_5": self.depth_reached >= 5,
161
+ "reached_dlvl_10": self.depth_reached >= 10,
162
+ "reached_dlvl_20": self.depth_reached >= 20,
163
+ # Kill milestones
164
+ "first_kill": self.first_kill,
165
+ "killed_10_monsters": self.monsters_killed >= 10,
166
+ "killed_50_monsters": self.monsters_killed >= 50,
167
+ "killed_100_monsters": self.monsters_killed >= 100,
168
+ "killed_by_magic": self.kills_by_magic > 0,
169
+ # Item milestones
170
+ "collected_100_gold": self.gold_collected >= 100,
171
+ "collected_1000_gold": self.gold_collected >= 1000,
172
+ "collected_10000_gold": self.gold_collected >= 10000,
173
+ "found_artifact": self.artifacts_found > 0,
174
+ # Level milestones
175
+ "reached_level_5": self.max_level_reached >= 5,
176
+ "reached_level_10": self.max_level_reached >= 10,
177
+ "reached_level_20": self.max_level_reached >= 20,
178
+ # Special locations
179
+ "reached_minetown": self.reached_minetown,
180
+ "reached_mines_end": self.reached_mines_end,
181
+ "reached_castle": self.reached_castle,
182
+ # Quest milestones
183
+ "got_quest": self.got_quest,
184
+ "completed_quest": self.completed_quest,
185
+ # Survival milestones
186
+ "survived_100_turns": self.turns_survived >= 100,
187
+ "survived_1000_turns": self.turns_survived >= 1000,
188
+ "survived_10000_turns": self.turns_survived >= 10000,
189
+ # Prayer milestones
190
+ "first_prayer": self.first_prayer,
191
+ "successful_prayer": self.successful_prayers > 0,
192
+ # Exploration milestones
193
+ "found_secret_door": self.secret_doors_found > 0,
194
+ "explored_10_rooms": self.rooms_explored >= 10,
195
+ "explored_50_rooms": self.rooms_explored >= 50,
196
+ }
197
+
198
+ def update_from_observation(
199
+ self, obs: Dict[str, Any], prev_obs: Optional[Dict[str, Any]] = None
200
+ ) -> Dict[str, bool]:
201
+ """Update achievements based on NLE observation. Returns newly unlocked achievements."""
202
+ newly_unlocked = {}
203
+ old_unlocked = self.get_unlocked_achievements()
204
+
205
+ # Update basic stats from player_stats - require it to exist
206
+ stats = obs["player_stats"]
207
+
208
+ # Update depth
209
+ current_depth = stats["depth"]
210
+ if current_depth > self.depth_reached:
211
+ self.depth_reached = current_depth
212
+
213
+ # Update level
214
+ current_level = stats["experience_level"]
215
+ if current_level > self.max_level_reached:
216
+ self.max_level_reached = current_level
217
+
218
+ # Update HP
219
+ current_hp = stats["max_hp"]
220
+ if current_hp > self.max_hp_reached:
221
+ self.max_hp_reached = current_hp
222
+
223
+ # Update gold
224
+ self.gold_collected = stats["gold"]
225
+
226
+ # Update turn count (if available)
227
+ if "turn" in stats:
228
+ self.turns_survived = stats["turn"]
229
+
230
+ # Update Balrog progress
231
+ # Map depth to dungeon name (simplified version)
232
+ dungeon_name = self._get_dungeon_name(current_depth)
233
+ balrog_achievements = self.balrog_progress.update(dungeon_name, current_level)
234
+
235
+ # Track balrog achievements as newly unlocked
236
+ for balrog_achievement in balrog_achievements:
237
+ newly_unlocked[f"balrog_{balrog_achievement}"] = True
238
+
239
+ # Check for kills (would need to parse messages or track HP changes)
240
+ if prev_obs and "message" in obs:
241
+ message = obs["message"]
242
+ if isinstance(message, bytes):
243
+ message = message.decode("ascii", errors="ignore").strip("\x00")
244
+ if "You kill" in message or "dies!" in message:
245
+ self.monsters_killed += 1
246
+ if not self.first_kill:
247
+ self.first_kill = True
248
+
249
+ # Check kill type
250
+ if "magic missile" in message or "spell" in message:
251
+ self.kills_by_magic += 1
252
+
253
+ # Check for new achievements
254
+ new_unlocked = self.get_unlocked_achievements()
255
+ for achievement, unlocked in new_unlocked.items():
256
+ if unlocked and not old_unlocked.get(achievement, False):
257
+ newly_unlocked[achievement] = True
258
+
259
+ return newly_unlocked
260
+
261
+ def _get_dungeon_name(self, depth: int) -> str:
262
+ """Map depth to dungeon name for Balrog progress tracking."""
263
+ # Simplified mapping - in real implementation would need more game state
264
+ if depth >= 50:
265
+ return "dlvl50"
266
+ elif depth >= 40:
267
+ return "dlvl40"
268
+ elif depth >= 30:
269
+ return "dlvl30"
270
+ elif depth >= 10:
271
+ return "dlvl10"
272
+ elif depth >= 5:
273
+ return "dlvl5"
274
+ else:
275
+ return "dlvl1"
276
+
277
+
278
+ def calculate_balrog_reward(
279
+ obs: Dict[str, Any], prev_obs: Optional[Dict[str, Any]] = None
280
+ ) -> float:
281
+ """
282
+ Calculate reward using exact Balrog-style reward function.
283
+
284
+ This is the exact implementation from Balrog that uses Progress class
285
+ to track game progression based on dungeons reached and experience levels.
286
+ """
287
+ # For compatibility with existing code, we'll use the simple delta rewards
288
+ # The actual Balrog score is tracked in NetHackAchievements.balrog_progress
289
+ reward = 0.0
290
+
291
+ if not prev_obs:
292
+ return reward
293
+
294
+ # Get player stats - require them to exist
295
+ stats = obs["player_stats"]
296
+ prev_stats = prev_obs["player_stats"]
297
+
298
+ # Score delta (most important in Balrog)
299
+ score_delta = stats["score"] - prev_stats["score"]
300
+ if score_delta > 0:
301
+ reward += score_delta / 100.0 # Scale down large score changes
302
+
303
+ # Gold delta
304
+ gold_delta = stats["gold"] - prev_stats["gold"]
305
+ if gold_delta > 0:
306
+ reward += gold_delta / 1000.0 # Small reward for gold
307
+
308
+ # Experience delta
309
+ exp_delta = stats["experience_points"] - prev_stats["experience_points"]
310
+ if exp_delta > 0:
311
+ reward += exp_delta / 100.0
312
+
313
+ # Depth progress - THIS SHOULD GIVE 10.0 REWARD FOR REACHING LEVEL 3!
314
+ depth_delta = stats["depth"] - prev_stats["depth"]
315
+ if depth_delta > 0:
316
+ reward += depth_delta * 10.0 # Big reward for going deeper
317
+
318
+ # Level up bonus
319
+ level_delta = stats["experience_level"] - prev_stats["experience_level"]
320
+ if level_delta > 0:
321
+ reward += level_delta * 5.0
322
+
323
+ # Death penalty
324
+ if "done" in obs and obs["done"]:
325
+ message = obs["message"] if "message" in obs else b""
326
+ if isinstance(message, bytes):
327
+ message = message.decode("ascii", errors="ignore")
328
+ if "died" in message.lower() or stats["hp"] <= 0:
329
+ reward -= 100.0 # Large death penalty
330
+
331
+ # Hunger penalty (if very hungry)
332
+ if "hunger" in stats:
333
+ hunger = stats["hunger"]
334
+ if hunger > 500: # Weak or worse
335
+ reward -= 0.1
336
+
337
+ return reward