synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
  245. synth_ai/zyk/lms/caching/constants.py +0 -1
  246. synth_ai/zyk/lms/cost/monitor.py +0 -1
  247. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  248. synth_ai-0.2.0.dist-info/METADATA +0 -36
  249. synth_ai-0.2.0.dist-info/RECORD +0 -50
  250. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  251. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  253. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  254. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  255. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  256. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  259. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  260. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  261. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  264. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  265. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
  266. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,466 @@
1
+ import pytest
2
+ import tempfile
3
+ import shutil
4
+ from pathlib import Path
5
+ from unittest.mock import patch, MagicMock
6
+
7
+ # Add timeout to all async tests
8
+ pytestmark = pytest.mark.timeout(15)
9
+
10
+ from synth_ai.environments.examples.verilog.engine import (
11
+ VerilogEngine,
12
+ VerilogPublicState,
13
+ VerilogPrivateState,
14
+ VerilogCompileSuccessComponent,
15
+ VerilogSimulationPassComponent,
16
+ VerilogStepPenaltyComponent,
17
+ )
18
+ from synth_ai.environments.examples.verilog.taskset import (
19
+ VerilogTaskInstance,
20
+ VerilogTaskInstanceMetadata,
21
+ )
22
+ from synth_ai.environments.tasks.core import Impetus, Intent
23
+ from uuid import uuid4
24
+
25
+
26
+ @pytest.fixture
27
+ def mock_task_instance():
28
+ """Create a mock task instance for testing."""
29
+ temp_dir = tempfile.mkdtemp(prefix="test_verilog_")
30
+ pristine_dir = Path(temp_dir) / "pristine"
31
+ snapshot_dir = Path(temp_dir) / "snapshot"
32
+
33
+ pristine_dir.mkdir(parents=True)
34
+ snapshot_dir.mkdir(parents=True)
35
+
36
+ # Create test files
37
+ (pristine_dir / "TopModule.v").write_text("""module TopModule(
38
+ output zero
39
+ );
40
+ assign zero = 1'b0;
41
+ endmodule""")
42
+
43
+ (pristine_dir / "test_tb.v").write_text("""`timescale 1ns/1ps
44
+ module test_tb;
45
+ wire zero;
46
+ TopModule dut(.zero(zero));
47
+
48
+ initial begin
49
+ #10;
50
+ if (zero !== 1'b0) $fatal(1, "Test failed");
51
+ $display("ALL_TESTS_PASSED");
52
+ $finish;
53
+ end
54
+ endmodule""")
55
+
56
+ metadata = VerilogTaskInstanceMetadata(
57
+ problem_name="test_problem",
58
+ difficulty="easy",
59
+ description="Test problem",
60
+ files_provided=["TopModule.v", "test_tb.v"],
61
+ )
62
+
63
+ task = VerilogTaskInstance(
64
+ id=uuid4(),
65
+ impetus=Impetus(instructions="Test task"),
66
+ intent=Intent(rubric="Test goal", gold_trajectories=None, gold_state_diff={}),
67
+ metadata=metadata,
68
+ is_reproducible=True,
69
+ initial_engine_snapshot=None,
70
+ pristine_dir=str(pristine_dir),
71
+ snapshot_dir=str(snapshot_dir),
72
+ )
73
+
74
+ yield task
75
+
76
+ # Cleanup
77
+ shutil.rmtree(temp_dir, ignore_errors=True)
78
+
79
+
80
+ @pytest.fixture
81
+ def engine(mock_task_instance):
82
+ """Create a VerilogEngine instance for testing."""
83
+ return VerilogEngine(mock_task_instance)
84
+
85
+
86
+ class TestVerilogEngine:
87
+ """Test suite for VerilogEngine class."""
88
+
89
+ @pytest.mark.asyncio
90
+ async def test_engine_initialization(self, engine):
91
+ """Test engine initialization."""
92
+ assert engine.task_instance is not None
93
+ assert engine._total_reward == 0.0
94
+ assert engine.reward_stack is not None
95
+ assert len(engine.reward_stack.components) == 3
96
+
97
+ @pytest.mark.asyncio
98
+ async def test_reset_engine(self, engine):
99
+ """Test engine reset functionality."""
100
+ priv, pub = await engine._reset_engine()
101
+
102
+ assert isinstance(priv, VerilogPrivateState)
103
+ assert isinstance(pub, VerilogPublicState)
104
+ assert priv.reward_last == 0.0
105
+ assert priv.total_reward == 0.0
106
+ assert not priv.terminated
107
+ assert not priv.truncated
108
+ assert len(pub.files) >= 1
109
+ assert engine.snapshot_dir.exists()
110
+ assert engine.build_dir.exists()
111
+
112
+ @pytest.mark.asyncio
113
+ async def test_write_file(self, engine):
114
+ """Test file writing functionality."""
115
+ await engine._reset_engine()
116
+
117
+ result = await engine.write_file("test.v", "module test(); endmodule")
118
+
119
+ assert result["ok"] is True
120
+ assert result["type"] == "write_file"
121
+ assert (engine.snapshot_dir / "test.v").exists()
122
+ assert (engine.snapshot_dir / "test.v").read_text() == "module test(); endmodule"
123
+
124
+ @pytest.mark.asyncio
125
+ async def test_write_file_nested_path(self, engine):
126
+ """Test writing file with nested directory structure."""
127
+ await engine._reset_engine()
128
+
129
+ result = await engine.write_file("subdir/nested.v", "module nested(); endmodule")
130
+
131
+ assert result["ok"] is True
132
+ nested_file = engine.snapshot_dir / "subdir" / "nested.v"
133
+ assert nested_file.exists()
134
+ assert nested_file.read_text() == "module nested(); endmodule"
135
+
136
+ @pytest.mark.asyncio
137
+ async def test_get_file_contents(self, engine):
138
+ """Test file content retrieval."""
139
+ await engine._reset_engine()
140
+
141
+ # Write test file
142
+ await engine.write_file("new_test.v", "module new_test(); endmodule")
143
+
144
+ files = engine._get_file_contents()
145
+ assert "new_test.v" in files
146
+ assert "module new_test();" in files["new_test.v"]
147
+
148
+ @pytest.mark.asyncio
149
+ @patch("subprocess.run")
150
+ async def test_compile_success(self, mock_run, engine):
151
+ """Test successful compilation."""
152
+ await engine._reset_engine()
153
+
154
+ # Mock successful compilation
155
+ mock_proc = MagicMock()
156
+ mock_proc.returncode = 0
157
+ mock_proc.stdout = ""
158
+ mock_proc.stderr = ""
159
+ mock_run.return_value = mock_proc
160
+
161
+ result = await engine.compile(sources=["TopModule.v"])
162
+
163
+ assert result["ok"] is True
164
+ assert result["type"] == "compile"
165
+ assert result["returncode"] == 0
166
+ assert "binary" in result
167
+
168
+ # Verify iverilog was called with correct flags
169
+ mock_run.assert_called_once()
170
+ args = mock_run.call_args[0][0]
171
+ assert "iverilog" in args
172
+ assert "-g2012" in args
173
+ assert "-o" in args
174
+
175
+ @pytest.mark.asyncio
176
+ @patch("subprocess.run")
177
+ async def test_compile_failure(self, mock_run, engine):
178
+ """Test compilation failure."""
179
+ await engine._reset_engine()
180
+
181
+ # Mock failed compilation
182
+ mock_proc = MagicMock()
183
+ mock_proc.returncode = 1
184
+ mock_proc.stdout = ""
185
+ mock_proc.stderr = "Error: syntax error"
186
+ mock_run.return_value = mock_proc
187
+
188
+ result = await engine.compile(sources=["invalid.v"])
189
+
190
+ assert result["ok"] is False
191
+ assert result["type"] == "compile"
192
+ assert result["returncode"] == 1
193
+ assert "syntax error" in result["stderr"]
194
+ assert result["binary"] is None
195
+
196
+ @pytest.mark.asyncio
197
+ @patch("subprocess.run")
198
+ async def test_simulate_success(self, mock_run, engine):
199
+ """Test successful simulation."""
200
+ await engine._reset_engine()
201
+
202
+ # Mock successful simulation
203
+ mock_proc = MagicMock()
204
+ mock_proc.returncode = 0
205
+ mock_proc.stdout = "Simulation output\nMismatches: 0 in 10 samples\n"
206
+ mock_proc.stderr = ""
207
+ mock_run.return_value = mock_proc
208
+
209
+ result = await engine.simulate()
210
+
211
+ assert result["ok"] is True
212
+ assert result["type"] == "simulate"
213
+ assert result["returncode"] == 0
214
+ assert result["passed"] is True
215
+ assert "Mismatches: 0" in result["stdout"]
216
+
217
+ @pytest.mark.asyncio
218
+ @patch("subprocess.run")
219
+ async def test_simulate_all_tests_passed(self, mock_run, engine):
220
+ """Test simulation with ALL_TESTS_PASSED indicator."""
221
+ await engine._reset_engine()
222
+
223
+ # Mock simulation with ALL_TESTS_PASSED
224
+ mock_proc = MagicMock()
225
+ mock_proc.returncode = 0
226
+ mock_proc.stdout = "Simulation running\nALL_TESTS_PASSED\n"
227
+ mock_proc.stderr = ""
228
+ mock_run.return_value = mock_proc
229
+
230
+ result = await engine.simulate()
231
+
232
+ assert result["ok"] is True
233
+ assert result["passed"] is True
234
+
235
+ @pytest.mark.asyncio
236
+ @patch("subprocess.run")
237
+ async def test_simulate_failure(self, mock_run, engine):
238
+ """Test simulation failure."""
239
+ await engine._reset_engine()
240
+
241
+ # Mock failed simulation
242
+ mock_proc = MagicMock()
243
+ mock_proc.returncode = 0
244
+ mock_proc.stdout = "Simulation output\nMismatches: 5 in 10 samples\n"
245
+ mock_proc.stderr = ""
246
+ mock_run.return_value = mock_proc
247
+
248
+ result = await engine.simulate()
249
+
250
+ assert result["ok"] is True
251
+ assert result["passed"] is False
252
+ assert "Mismatches: 5" in result["stdout"]
253
+
254
+ @pytest.mark.asyncio
255
+ async def test_submit(self, engine):
256
+ """Test submission functionality."""
257
+ await engine._reset_engine()
258
+
259
+ result = await engine.submit()
260
+
261
+ assert result["ok"] is True
262
+ assert result["type"] == "submit"
263
+ assert result["submitted"] is True
264
+
265
+ @pytest.mark.asyncio
266
+ async def test_step_engine_compile_success(self, engine):
267
+ """Test engine stepping with successful compilation."""
268
+ await engine._reset_engine()
269
+
270
+ action_result = {
271
+ "ok": True,
272
+ "type": "compile",
273
+ "returncode": 0,
274
+ "stdout": "Compilation successful",
275
+ }
276
+
277
+ priv, pub = await engine._step_engine(action_result)
278
+
279
+ assert priv.reward_last > 0 # Should get compile success reward
280
+ assert pub.last_compile_output == "Compilation successful"
281
+ assert not pub.task_completed
282
+
283
+ @pytest.mark.asyncio
284
+ async def test_step_engine_simulate_success(self, engine):
285
+ """Test engine stepping with successful simulation."""
286
+ await engine._reset_engine()
287
+
288
+ action_result = {
289
+ "ok": True,
290
+ "type": "simulate",
291
+ "returncode": 0,
292
+ "stdout": "ALL_TESTS_PASSED",
293
+ "passed": True,
294
+ }
295
+
296
+ priv, pub = await engine._step_engine(action_result)
297
+
298
+ assert priv.reward_last > 0.5 # Should get large simulation success reward
299
+ assert pub.last_simulate_output == "ALL_TESTS_PASSED"
300
+ assert pub.task_completed is True
301
+ assert priv.terminated is True
302
+
303
+ @pytest.mark.asyncio
304
+ async def test_step_penalty(self, engine):
305
+ """Test that each step incurs a small penalty."""
306
+ await engine._reset_engine()
307
+
308
+ action_result = {"ok": True, "type": "write_file"}
309
+
310
+ priv, pub = await engine._step_engine(action_result)
311
+
312
+ assert priv.reward_last < 0 # Should be negative due to step penalty
313
+ assert priv.total_reward < 0
314
+
315
+
316
+ class TestVerilogRewardComponents:
317
+ """Test suite for Verilog reward components."""
318
+
319
+ @pytest.mark.asyncio
320
+ async def test_compile_success_component(self):
321
+ """Test compile success reward component."""
322
+ component = VerilogCompileSuccessComponent()
323
+ state = VerilogPublicState(files={}, build_dir="", task_completed=False)
324
+
325
+ # Test successful compilation
326
+ action = {"type": "compile", "returncode": 0}
327
+ reward = await component.score(state, action)
328
+ assert reward == 0.1
329
+
330
+ # Test failed compilation
331
+ action = {"type": "compile", "returncode": 1}
332
+ reward = await component.score(state, action)
333
+ assert reward == 0.0
334
+
335
+ # Test non-compile action
336
+ action = {"type": "write_file"}
337
+ reward = await component.score(state, action)
338
+ assert reward == 0.0
339
+
340
+ @pytest.mark.asyncio
341
+ async def test_simulation_pass_component(self):
342
+ """Test simulation pass reward component."""
343
+ component = VerilogSimulationPassComponent()
344
+ state = VerilogPublicState(files={}, build_dir="", task_completed=False)
345
+
346
+ # Test successful simulation
347
+ action = {"type": "simulate", "passed": True}
348
+ reward = await component.score(state, action)
349
+ assert reward == 1.0
350
+
351
+ # Test failed simulation
352
+ action = {"type": "simulate", "passed": False}
353
+ reward = await component.score(state, action)
354
+ assert reward == 0.0
355
+
356
+ # Test non-simulate action
357
+ action = {"type": "write_file"}
358
+ reward = await component.score(state, action)
359
+ assert reward == 0.0
360
+
361
+ @pytest.mark.asyncio
362
+ async def test_step_penalty_component(self):
363
+ """Test step penalty reward component."""
364
+ penalty = -0.05
365
+ component = VerilogStepPenaltyComponent(penalty=penalty)
366
+ state = VerilogPublicState(files={}, build_dir="", task_completed=False)
367
+
368
+ # Any action should incur penalty
369
+ action = {"type": "write_file"}
370
+ reward = await component.score(state, action)
371
+ assert reward == penalty
372
+
373
+ action = {"type": "compile"}
374
+ reward = await component.score(state, action)
375
+ assert reward == penalty
376
+
377
+
378
+ class TestEngineIntegration:
379
+ """Integration tests for the full engine workflow."""
380
+
381
+ @pytest.mark.asyncio
382
+ @patch("subprocess.run")
383
+ async def test_full_workflow_success(self, mock_run, engine):
384
+ """Test complete workflow from reset to successful completion."""
385
+
386
+ # Setup mock subprocess calls
387
+ def mock_subprocess(cmd, **kwargs):
388
+ mock_proc = MagicMock()
389
+ if "iverilog" in cmd:
390
+ # Mock successful compilation
391
+ mock_proc.returncode = 0
392
+ mock_proc.stdout = ""
393
+ mock_proc.stderr = ""
394
+ elif "vvp" in cmd:
395
+ # Mock successful simulation
396
+ mock_proc.returncode = 0
397
+ mock_proc.stdout = "ALL_TESTS_PASSED\n"
398
+ mock_proc.stderr = ""
399
+ return mock_proc
400
+
401
+ mock_run.side_effect = mock_subprocess
402
+
403
+ # Initialize engine
404
+ priv, pub = await engine._reset_engine()
405
+ assert priv.total_reward == 0.0
406
+
407
+ # Write file
408
+ write_result = await engine.write_file(
409
+ "TopModule.v",
410
+ """module TopModule(
411
+ output zero
412
+ );
413
+ assign zero = 1'b0;
414
+ endmodule""",
415
+ )
416
+ assert write_result["ok"] is True
417
+
418
+ # Compile
419
+ compile_result = await engine.compile()
420
+ assert compile_result["ok"] is True
421
+
422
+ priv, pub = await engine._step_engine(compile_result)
423
+ compile_reward = priv.reward_last
424
+ assert compile_reward > 0 # Should get compile success reward
425
+
426
+ # Simulate
427
+ simulate_result = await engine.simulate()
428
+ assert simulate_result["ok"] is True
429
+ assert simulate_result["passed"] is True
430
+
431
+ priv, pub = await engine._step_engine(simulate_result)
432
+ simulate_reward = priv.reward_last
433
+ assert simulate_reward > 0.5 # Should get large simulation reward
434
+ assert pub.task_completed is True
435
+ assert priv.terminated is True
436
+
437
+ # Total reward should be positive (compile + simulate - step penalties)
438
+ assert priv.total_reward > 0
439
+
440
+ @pytest.mark.asyncio
441
+ @patch("subprocess.run")
442
+ async def test_compilation_failure_workflow(self, mock_run, engine):
443
+ """Test workflow with compilation failure."""
444
+ # Mock failed compilation
445
+ mock_proc = MagicMock()
446
+ mock_proc.returncode = 1
447
+ mock_proc.stdout = ""
448
+ mock_proc.stderr = "Error: syntax error"
449
+ mock_run.return_value = mock_proc
450
+
451
+ # Initialize engine
452
+ await engine._reset_engine()
453
+
454
+ # Write invalid file
455
+ await engine.write_file("invalid.v", "invalid verilog code")
456
+
457
+ # Attempt compilation
458
+ compile_result = await engine.compile()
459
+ assert compile_result["ok"] is False
460
+
461
+ priv, pub = await engine._step_engine(compile_result)
462
+
463
+ # Should only get step penalty, no compile success reward
464
+ assert priv.reward_last < 0
465
+ assert not pub.task_completed
466
+ assert not priv.terminated