synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
  245. synth_ai/zyk/lms/caching/constants.py +0 -1
  246. synth_ai/zyk/lms/cost/monitor.py +0 -1
  247. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  248. synth_ai-0.2.0.dist-info/METADATA +0 -36
  249. synth_ai-0.2.0.dist-info/RECORD +0 -50
  250. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  251. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  253. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  254. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  255. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  256. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  259. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  260. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  261. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  264. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  265. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
  266. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,585 @@
1
+ import pytest
2
+ import tempfile
3
+ import shutil
4
+ from pathlib import Path
5
+ from unittest.mock import patch, MagicMock
6
+ from uuid import uuid4
7
+
8
+ # Add timeout to all async tests
9
+ pytestmark = pytest.mark.timeout(15)
10
+
11
+ from synth_ai.environments.examples.verilog.environment import (
12
+ VerilogEnvironment,
13
+ VerilogWriteFileTool,
14
+ VerilogCompileTool,
15
+ VerilogSimulateTool,
16
+ VerilogSubmitTool,
17
+ VerilogObservationCallable,
18
+ WriteFileInput,
19
+ CompileInput,
20
+ SimulateInput,
21
+ SubmitInput,
22
+ )
23
+ from synth_ai.environments.examples.verilog.engine import (
24
+ VerilogEngine,
25
+ VerilogPublicState,
26
+ VerilogPrivateState,
27
+ )
28
+ from synth_ai.environments.examples.verilog.taskset import (
29
+ VerilogTaskInstance,
30
+ VerilogTaskInstanceMetadata,
31
+ )
32
+ from synth_ai.environments.environment.tools import EnvToolCall, ToolResult
33
+ from synth_ai.environments.tasks.core import Impetus, Intent
34
+
35
+
36
+ @pytest.fixture
37
+ def mock_task_instance():
38
+ """Create a mock task instance for testing."""
39
+ temp_dir = tempfile.mkdtemp(prefix="test_verilog_env_")
40
+ pristine_dir = Path(temp_dir) / "pristine"
41
+ snapshot_dir = Path(temp_dir) / "snapshot"
42
+
43
+ pristine_dir.mkdir(parents=True)
44
+ snapshot_dir.mkdir(parents=True)
45
+
46
+ # Create test files
47
+ (pristine_dir / "TopModule.v").write_text("""module TopModule();
48
+ // TODO: Implement module
49
+ endmodule""")
50
+
51
+ (pristine_dir / "RefModule.v").write_text("""module RefModule(
52
+ output zero
53
+ );
54
+ assign zero = 1'b0;
55
+ endmodule""")
56
+
57
+ (pristine_dir / "test_tb.v").write_text("""`timescale 1ns/1ps
58
+ module test_tb;
59
+ wire zero;
60
+ TopModule dut(.zero(zero));
61
+ RefModule ref(.zero(zero_ref));
62
+
63
+ initial begin
64
+ #10;
65
+ if (zero !== zero_ref) $fatal(1, "Test failed");
66
+ $display("Mismatches: 0 in 10 samples");
67
+ $finish;
68
+ end
69
+ endmodule""")
70
+
71
+ metadata = VerilogTaskInstanceMetadata(
72
+ problem_name="test_problem",
73
+ difficulty="easy",
74
+ description="Test problem",
75
+ files_provided=["TopModule.v", "RefModule.v", "test_tb.v"],
76
+ )
77
+
78
+ task = VerilogTaskInstance(
79
+ id=uuid4(),
80
+ impetus=Impetus(instructions="Test task instructions"),
81
+ intent=Intent(rubric="Test goal", gold_trajectories=None, gold_state_diff={}),
82
+ metadata=metadata,
83
+ is_reproducible=True,
84
+ initial_engine_snapshot=None,
85
+ pristine_dir=str(pristine_dir),
86
+ snapshot_dir=str(snapshot_dir),
87
+ )
88
+
89
+ yield task
90
+
91
+ # Cleanup
92
+ shutil.rmtree(temp_dir, ignore_errors=True)
93
+
94
+
95
+ @pytest.fixture
96
+ def verilog_env(mock_task_instance):
97
+ """Create a VerilogEnvironment instance for testing."""
98
+ return VerilogEnvironment(mock_task_instance)
99
+
100
+
101
+ class TestVerilogEnvironment:
102
+ """Test suite for VerilogEnvironment class."""
103
+
104
+ @pytest.mark.asyncio
105
+ async def test_environment_initialization(self, verilog_env):
106
+ """Test environment initialization."""
107
+ assert verilog_env.name == "VerilogEval"
108
+ assert verilog_env.task_instance is not None
109
+ assert isinstance(verilog_env.engine, VerilogEngine)
110
+ assert len(verilog_env._tools_instances) == 4
111
+ assert "write_file" in verilog_env._tools_instances
112
+ assert "compile" in verilog_env._tools_instances
113
+ assert "simulate" in verilog_env._tools_instances
114
+ assert "submit" in verilog_env._tools_instances
115
+
116
+ @pytest.mark.asyncio
117
+ async def test_environment_initialize(self, verilog_env):
118
+ """Test environment initialization method."""
119
+ obs = await verilog_env.initialize()
120
+
121
+ assert isinstance(obs, dict)
122
+ assert "files" in obs
123
+ assert "build_dir" in obs
124
+ assert "files_summary" in obs
125
+ assert "task_completed" in obs
126
+ assert "reward_last" in obs
127
+ assert "total_reward" in obs
128
+ assert "terminated" in obs
129
+ assert "compile_status" in obs
130
+ assert "simulate_status" in obs
131
+
132
+ assert len(obs["files"]) >= 3 # TopModule.v, RefModule.v, test_tb.v
133
+ assert obs["task_completed"] is False
134
+ assert obs["terminated"] is False
135
+ assert obs["reward_last"] == 0.0
136
+ assert obs["total_reward"] == 0.0
137
+
138
+ @pytest.mark.asyncio
139
+ async def test_environment_terminate(self, verilog_env):
140
+ """Test environment termination."""
141
+ await verilog_env.initialize()
142
+ obs = await verilog_env.terminate()
143
+
144
+ assert obs["terminated"] is True
145
+ assert "message" in obs
146
+ assert obs["message"] == "Environment terminated."
147
+
148
+ def test_validate_tool_calls_dict(self, verilog_env):
149
+ """Test tool call validation with dictionary input."""
150
+ tool_call_dict = {
151
+ "tool": "write_file",
152
+ "args": {"path": "test.v", "content": "module test(); endmodule"},
153
+ }
154
+
155
+ validated = verilog_env.validate_tool_calls(tool_call_dict)
156
+
157
+ assert isinstance(validated, EnvToolCall)
158
+ assert validated.tool == "write_file"
159
+ assert validated.args["path"] == "test.v"
160
+
161
+ def test_validate_tool_calls_list(self, verilog_env):
162
+ """Test tool call validation with list input."""
163
+ tool_call_list = [{"tool": "compile", "args": {"sources": ["test.v"]}}]
164
+
165
+ validated = verilog_env.validate_tool_calls(tool_call_list)
166
+
167
+ assert isinstance(validated, EnvToolCall)
168
+ assert validated.tool == "compile"
169
+ assert validated.args["sources"] == ["test.v"]
170
+
171
+ def test_validate_tool_calls_env_tool_call(self, verilog_env):
172
+ """Test tool call validation with EnvToolCall input."""
173
+ original_call = EnvToolCall(tool="simulate", args={})
174
+
175
+ validated = verilog_env.validate_tool_calls(original_call)
176
+
177
+ assert validated is original_call
178
+
179
+ def test_validate_tool_calls_invalid_tool(self, verilog_env):
180
+ """Test tool call validation with invalid tool name."""
181
+ tool_call_dict = {"tool": "invalid_tool", "args": {}}
182
+
183
+ with pytest.raises(ValueError, match="Unknown tool: invalid_tool"):
184
+ verilog_env.validate_tool_calls(tool_call_dict)
185
+
186
+ def test_validate_tool_calls_empty_list(self, verilog_env):
187
+ """Test tool call validation with empty list."""
188
+ with pytest.raises(ValueError, match="Received empty list"):
189
+ verilog_env.validate_tool_calls([])
190
+
191
+ @pytest.mark.asyncio
192
+ async def test_step_write_file(self, verilog_env):
193
+ """Test environment step with write_file tool."""
194
+ await verilog_env.initialize()
195
+
196
+ tool_call = EnvToolCall(
197
+ tool="write_file",
198
+ args={"path": "test.v", "content": "module test(); endmodule"},
199
+ )
200
+
201
+ obs = await verilog_env.step(tool_call)
202
+
203
+ assert "test.v" in obs["files"]
204
+ assert "module test();" in obs["files"]["test.v"]
205
+ assert obs["reward_last"] < 0 # Step penalty
206
+
207
+ @pytest.mark.asyncio
208
+ @patch("src.examples.verilog.engine.subprocess.run")
209
+ async def test_step_compile_success(self, mock_run, verilog_env):
210
+ """Test environment step with successful compilation."""
211
+ await verilog_env.initialize()
212
+
213
+ # Mock successful compilation
214
+ mock_proc = MagicMock()
215
+ mock_proc.returncode = 0
216
+ mock_proc.stdout = ""
217
+ mock_proc.stderr = ""
218
+ mock_run.return_value = mock_proc
219
+
220
+ tool_call = EnvToolCall(tool="compile", args={})
221
+ obs = await verilog_env.step(tool_call)
222
+
223
+ assert "Last compile: Success" in obs["compile_status"]
224
+ assert obs["reward_last"] > 0 # Compile success reward minus step penalty
225
+
226
+ @pytest.mark.asyncio
227
+ @patch("src.examples.verilog.engine.subprocess.run")
228
+ async def test_step_compile_failure(self, mock_run, verilog_env):
229
+ """Test environment step with compilation failure."""
230
+ await verilog_env.initialize()
231
+
232
+ # Mock failed compilation
233
+ mock_proc = MagicMock()
234
+ mock_proc.returncode = 1
235
+ mock_proc.stdout = ""
236
+ mock_proc.stderr = "Error: syntax error"
237
+ mock_run.return_value = mock_proc
238
+
239
+ tool_call = EnvToolCall(tool="compile", args={})
240
+ obs = await verilog_env.step(tool_call)
241
+
242
+ assert "Last compile: Failed" in obs["compile_status"]
243
+ assert obs["reward_last"] < 0 # Only step penalty
244
+
245
+ @pytest.mark.asyncio
246
+ @patch("src.examples.verilog.engine.subprocess.run")
247
+ async def test_step_simulate_success(self, mock_run, verilog_env):
248
+ """Test environment step with successful simulation."""
249
+ await verilog_env.initialize()
250
+
251
+ # Mock successful simulation
252
+ mock_proc = MagicMock()
253
+ mock_proc.returncode = 0
254
+ mock_proc.stdout = "Mismatches: 0 in 10 samples"
255
+ mock_proc.stderr = ""
256
+ mock_run.return_value = mock_proc
257
+
258
+ tool_call = EnvToolCall(tool="simulate", args={})
259
+ obs = await verilog_env.step(tool_call)
260
+
261
+ assert "Last simulation: Passed" in obs["simulate_status"]
262
+ assert obs["task_completed"] is True
263
+ assert obs["terminated"] is True
264
+ assert obs["reward_last"] > 0.5 # Large simulation success reward
265
+
266
+ @pytest.mark.asyncio
267
+ async def test_step_submit(self, verilog_env):
268
+ """Test environment step with submit tool."""
269
+ await verilog_env.initialize()
270
+
271
+ tool_call = EnvToolCall(tool="submit", args={})
272
+ obs = await verilog_env.step(tool_call)
273
+
274
+ assert obs["terminated"] is True
275
+
276
+ @pytest.mark.asyncio
277
+ async def test_checkpoint(self, verilog_env):
278
+ """Test environment checkpoint functionality."""
279
+ await verilog_env.initialize()
280
+
281
+ obs = await verilog_env.checkpoint()
282
+
283
+ assert "engine_snapshot_data" in obs
284
+ assert isinstance(obs["engine_snapshot_data"], dict)
285
+
286
+
287
+ class TestVerilogTools:
288
+ """Test suite for Verilog tool implementations."""
289
+
290
+ @pytest.fixture
291
+ def mock_engine(self):
292
+ """Create a mock engine for tool testing."""
293
+ engine = MagicMock()
294
+ return engine
295
+
296
+ @pytest.mark.asyncio
297
+ async def test_write_file_tool(self, mock_engine):
298
+ """Test VerilogWriteFileTool."""
299
+ tool = VerilogWriteFileTool(mock_engine)
300
+
301
+ # Mock async method properly
302
+ async def mock_write_file(*args, **kwargs):
303
+ return {"ok": True, "type": "write_file"}
304
+
305
+ mock_engine.write_file = mock_write_file
306
+
307
+ call = EnvToolCall(
308
+ tool="write_file",
309
+ args={"path": "test.v", "content": "module test(); endmodule"},
310
+ )
311
+
312
+ result = await tool(call)
313
+
314
+ assert isinstance(result, ToolResult)
315
+ assert result.ok is True
316
+
317
+ @pytest.mark.asyncio
318
+ async def test_write_file_tool_error(self, mock_engine):
319
+ """Test VerilogWriteFileTool with error."""
320
+ tool = VerilogWriteFileTool(mock_engine)
321
+
322
+ async def mock_write_file_error(*args, **kwargs):
323
+ raise Exception("Write error")
324
+
325
+ mock_engine.write_file = mock_write_file_error
326
+
327
+ call = EnvToolCall(tool="write_file", args={"path": "test.v", "content": "test"})
328
+
329
+ result = await tool(call)
330
+
331
+ assert result.ok is False
332
+ assert "Write error" in result.error
333
+
334
+ @pytest.mark.asyncio
335
+ async def test_compile_tool(self, mock_engine):
336
+ """Test VerilogCompileTool."""
337
+ tool = VerilogCompileTool(mock_engine)
338
+
339
+ async def mock_compile(*args, **kwargs):
340
+ return {"ok": True, "type": "compile", "returncode": 0}
341
+
342
+ mock_engine.compile = mock_compile
343
+
344
+ call = EnvToolCall(tool="compile", args={"sources": ["test.v"], "testbench": "test_tb.v"})
345
+
346
+ result = await tool(call)
347
+
348
+ assert result.ok is True
349
+
350
+ @pytest.mark.asyncio
351
+ async def test_compile_tool_no_args(self, mock_engine):
352
+ """Test VerilogCompileTool with no arguments."""
353
+ tool = VerilogCompileTool(mock_engine)
354
+
355
+ async def mock_compile(*args, **kwargs):
356
+ return {"ok": True, "type": "compile"}
357
+
358
+ mock_engine.compile = mock_compile
359
+
360
+ call = EnvToolCall(tool="compile", args={})
361
+
362
+ result = await tool(call)
363
+
364
+ assert result.ok is True
365
+
366
+ @pytest.mark.asyncio
367
+ async def test_simulate_tool(self, mock_engine):
368
+ """Test VerilogSimulateTool."""
369
+ tool = VerilogSimulateTool(mock_engine)
370
+
371
+ async def mock_simulate(*args, **kwargs):
372
+ return {"ok": True, "type": "simulate", "passed": True}
373
+
374
+ mock_engine.simulate = mock_simulate
375
+
376
+ call = EnvToolCall(tool="simulate", args={"binary": "test.out"})
377
+
378
+ result = await tool(call)
379
+
380
+ assert result.ok is True
381
+
382
+ @pytest.mark.asyncio
383
+ async def test_submit_tool(self, mock_engine):
384
+ """Test VerilogSubmitTool."""
385
+ tool = VerilogSubmitTool(mock_engine)
386
+
387
+ async def mock_submit(*args, **kwargs):
388
+ return {"ok": True, "type": "submit", "submitted": True}
389
+
390
+ mock_engine.submit = mock_submit
391
+
392
+ call = EnvToolCall(tool="submit", args={})
393
+
394
+ result = await tool(call)
395
+
396
+ assert result.ok is True
397
+
398
+
399
+ class TestVerilogObservationCallable:
400
+ """Test suite for VerilogObservationCallable."""
401
+
402
+ @pytest.mark.asyncio
403
+ async def test_get_observation_basic(self):
404
+ """Test basic observation generation."""
405
+ callable_obj = VerilogObservationCallable()
406
+
407
+ pub = VerilogPublicState(
408
+ files={"test.v": "module test(); endmodule"},
409
+ build_dir="/tmp/build",
410
+ task_completed=False,
411
+ )
412
+
413
+ priv = VerilogPrivateState(
414
+ reward_last=0.1, total_reward=0.5, terminated=False, truncated=False
415
+ )
416
+
417
+ obs = await callable_obj.get_observation(pub, priv)
418
+
419
+ assert obs["files"] == pub.files
420
+ assert obs["build_dir"] == pub.build_dir
421
+ assert obs["files_summary"] == "1 Verilog files available: test.v"
422
+ assert obs["task_completed"] is False
423
+ assert obs["reward_last"] == 0.1
424
+ assert obs["total_reward"] == 0.5
425
+ assert obs["terminated"] is False
426
+ assert obs["compile_status"] == ""
427
+ assert obs["simulate_status"] == ""
428
+
429
+ @pytest.mark.asyncio
430
+ async def test_get_observation_with_compile_status(self):
431
+ """Test observation with compile status."""
432
+ callable_obj = VerilogObservationCallable()
433
+
434
+ pub = VerilogPublicState(
435
+ files={},
436
+ build_dir="/tmp/build",
437
+ task_completed=False,
438
+ last_compile_output="Compilation successful",
439
+ )
440
+
441
+ priv = VerilogPrivateState(
442
+ reward_last=0.0, total_reward=0.0, terminated=False, truncated=False
443
+ )
444
+
445
+ obs = await callable_obj.get_observation(pub, priv)
446
+
447
+ assert obs["compile_status"] == "Last compile: Success"
448
+
449
+ @pytest.mark.asyncio
450
+ async def test_get_observation_with_compile_error(self):
451
+ """Test observation with compile error."""
452
+ callable_obj = VerilogObservationCallable()
453
+
454
+ pub = VerilogPublicState(
455
+ files={},
456
+ build_dir="/tmp/build",
457
+ task_completed=False,
458
+ last_compile_output="Error: syntax error",
459
+ )
460
+
461
+ priv = VerilogPrivateState(
462
+ reward_last=0.0, total_reward=0.0, terminated=False, truncated=False
463
+ )
464
+
465
+ obs = await callable_obj.get_observation(pub, priv)
466
+
467
+ assert obs["compile_status"] == "Last compile: Failed"
468
+
469
+ @pytest.mark.asyncio
470
+ async def test_get_observation_with_simulate_status_passed(self):
471
+ """Test observation with successful simulation."""
472
+ callable_obj = VerilogObservationCallable()
473
+
474
+ pub = VerilogPublicState(
475
+ files={},
476
+ build_dir="/tmp/build",
477
+ task_completed=True,
478
+ last_simulate_output="Mismatches: 0 in 10 samples",
479
+ )
480
+
481
+ priv = VerilogPrivateState(
482
+ reward_last=1.0, total_reward=1.0, terminated=True, truncated=False
483
+ )
484
+
485
+ obs = await callable_obj.get_observation(pub, priv)
486
+
487
+ assert obs["simulate_status"] == "Last simulation: Passed"
488
+ assert obs["task_completed"] is True
489
+ assert obs["terminated"] is True
490
+
491
+ @pytest.mark.asyncio
492
+ async def test_get_observation_with_simulate_status_failed(self):
493
+ """Test observation with failed simulation."""
494
+ callable_obj = VerilogObservationCallable()
495
+
496
+ pub = VerilogPublicState(
497
+ files={},
498
+ build_dir="/tmp/build",
499
+ task_completed=False,
500
+ last_simulate_output="Mismatches: 5 in 10 samples",
501
+ )
502
+
503
+ priv = VerilogPrivateState(
504
+ reward_last=0.0, total_reward=0.0, terminated=False, truncated=False
505
+ )
506
+
507
+ obs = await callable_obj.get_observation(pub, priv)
508
+
509
+ assert obs["simulate_status"] == "Last simulation: Failed"
510
+
511
+ @pytest.mark.asyncio
512
+ async def test_get_observation_multiple_files(self):
513
+ """Test observation with multiple files."""
514
+ callable_obj = VerilogObservationCallable()
515
+
516
+ pub = VerilogPublicState(
517
+ files={
518
+ "TopModule.v": "module TopModule(); endmodule",
519
+ "RefModule.v": "module RefModule(); endmodule",
520
+ "test_tb.v": "module test_tb(); endmodule",
521
+ },
522
+ build_dir="/tmp/build",
523
+ task_completed=False,
524
+ )
525
+
526
+ priv = VerilogPrivateState(
527
+ reward_last=0.0, total_reward=0.0, terminated=False, truncated=False
528
+ )
529
+
530
+ obs = await callable_obj.get_observation(pub, priv)
531
+
532
+ expected_summary = "3 Verilog files available: TopModule.v, RefModule.v, test_tb.v"
533
+ assert obs["files_summary"] == expected_summary
534
+
535
+
536
+ class TestInputSchemas:
537
+ """Test suite for tool input schemas."""
538
+
539
+ def test_write_file_input_valid(self):
540
+ """Test WriteFileInput with valid data."""
541
+ data = {"path": "test.v", "content": "module test(); endmodule"}
542
+ input_obj = WriteFileInput(**data)
543
+
544
+ assert input_obj.path == "test.v"
545
+ assert input_obj.content == "module test(); endmodule"
546
+
547
+ def test_write_file_input_missing_required(self):
548
+ """Test WriteFileInput with missing required fields."""
549
+ with pytest.raises(ValueError):
550
+ WriteFileInput(path="test.v") # Missing content
551
+
552
+ def test_compile_input_valid(self):
553
+ """Test CompileInput with valid data."""
554
+ data = {"sources": ["test.v"], "testbench": "test_tb.v"}
555
+ input_obj = CompileInput(**data)
556
+
557
+ assert input_obj.sources == ["test.v"]
558
+ assert input_obj.testbench == "test_tb.v"
559
+
560
+ def test_compile_input_optional_fields(self):
561
+ """Test CompileInput with optional fields."""
562
+ input_obj = CompileInput()
563
+
564
+ assert input_obj.sources is None
565
+ assert input_obj.testbench is None
566
+
567
+ def test_simulate_input_valid(self):
568
+ """Test SimulateInput with valid data."""
569
+ data = {"binary": "test.out"}
570
+ input_obj = SimulateInput(**data)
571
+
572
+ assert input_obj.binary == "test.out"
573
+
574
+ def test_simulate_input_optional(self):
575
+ """Test SimulateInput with optional binary."""
576
+ input_obj = SimulateInput()
577
+
578
+ assert input_obj.binary is None
579
+
580
+ def test_submit_input(self):
581
+ """Test SubmitInput (no fields)."""
582
+ input_obj = SubmitInput()
583
+
584
+ # Should create successfully with no fields
585
+ assert input_obj is not None