synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,457 @@
1
+ import pytest
2
+ import asyncio
3
+ from pathlib import Path
4
+ from unittest.mock import patch
5
+
6
+ # Add timeout to all async tests
7
+ pytestmark = pytest.mark.timeout(15)
8
+
9
+ from synth_ai.environments.examples.verilog.taskset import (
10
+ create_verilog_taskset,
11
+ _create_hf_task_instance,
12
+ VerilogTaskInstance,
13
+ VerilogTaskInstanceMetadata,
14
+ _cleanup_temp_dirs,
15
+ _temp_dirs,
16
+ )
17
+ from synth_ai.environments.tasks.core import TaskInstanceSet, SplitInfo, Impetus, Intent
18
+ from uuid import uuid4
19
+ from typing import cast
20
+
21
+
22
+ class TestVerilogTaskset:
23
+ """Test suite for Verilog taskset creation."""
24
+
25
+ @pytest.mark.asyncio
26
+ @patch("src.examples.verilog.taskset.load_dataset")
27
+ async def test_create_verilog_taskset_basic(self, mock_load_dataset):
28
+ """Test basic taskset creation."""
29
+ # Mock dataset
30
+ mock_dataset = [
31
+ {
32
+ "problem_id": "test_001",
33
+ "prompt": "Implement a simple AND gate with inputs a, b and output y.",
34
+ "test": "`timescale 1ns/1ps\nmodule test_tb;\n // testbench code\nendmodule",
35
+ "ref": "module RefModule(input a, b, output y);\n assign y = a & b;\nendmodule",
36
+ },
37
+ {
38
+ "problem_id": "test_002",
39
+ "prompt": "Implement a simple OR gate with inputs a, b and output y.",
40
+ "test": "`timescale 1ns/1ps\nmodule test_tb2;\n // testbench code\nendmodule",
41
+ "ref": "module RefModule(input a, b, output y);\n assign y = a | b;\nendmodule",
42
+ },
43
+ ]
44
+ mock_load_dataset.return_value = mock_dataset
45
+
46
+ taskset = await create_verilog_taskset(max_instances=2)
47
+
48
+ assert isinstance(taskset, TaskInstanceSet)
49
+ assert taskset.name == "VerilogEval v2 TaskSet"
50
+ assert taskset.description == "VerilogEval v2 spec-to-RTL tasks from HuggingFace"
51
+ assert len(taskset.instances) == 2
52
+
53
+ # Check split info
54
+ assert isinstance(taskset.split_info, SplitInfo)
55
+ assert taskset.split_info._is_split_defined is True
56
+
57
+ # Check instance properties
58
+ instance = taskset.instances[0]
59
+ assert isinstance(instance, VerilogTaskInstance)
60
+ metadata = cast(VerilogTaskInstanceMetadata, instance.metadata)
61
+ assert metadata.problem_name == "test_001"
62
+ assert "AND gate" in metadata.description
63
+ assert len(metadata.files_provided) == 3 # TopModule.v, testbench, RefModule.v
64
+
65
+ @pytest.mark.asyncio
66
+ @patch("src.examples.verilog.taskset.load_dataset")
67
+ async def test_create_verilog_taskset_max_instances(self, mock_load_dataset):
68
+ """Test taskset creation with max_instances limit."""
69
+ # Mock larger dataset
70
+ mock_dataset = [
71
+ {
72
+ "problem_id": f"test_{i:03d}",
73
+ "prompt": f"Test {i}",
74
+ "test": "",
75
+ "ref": "",
76
+ }
77
+ for i in range(20)
78
+ ]
79
+ mock_load_dataset.return_value = mock_dataset
80
+
81
+ taskset = await create_verilog_taskset(max_instances=5)
82
+
83
+ assert len(taskset.instances) == 5
84
+ # Should only create instances for first 5 items
85
+ metadata0 = cast(VerilogTaskInstanceMetadata, taskset.instances[0].metadata)
86
+ metadata4 = cast(VerilogTaskInstanceMetadata, taskset.instances[4].metadata)
87
+ assert metadata0.problem_name == "test_000"
88
+ assert metadata4.problem_name == "test_004"
89
+
90
+ @pytest.mark.asyncio
91
+ @patch("src.examples.verilog.taskset.load_dataset")
92
+ async def test_create_verilog_taskset_split_info(self, mock_load_dataset):
93
+ """Test that split info is correctly calculated."""
94
+ mock_dataset = [
95
+ {
96
+ "problem_id": f"test_{i:03d}",
97
+ "prompt": f"Test {i}",
98
+ "test": "",
99
+ "ref": "",
100
+ }
101
+ for i in range(10)
102
+ ]
103
+ mock_load_dataset.return_value = mock_dataset
104
+
105
+ taskset = await create_verilog_taskset(max_instances=10)
106
+
107
+ # Should have 80% val (8 instances) and 20% test (2 instances)
108
+ assert len(taskset.split_info.val_instance_ids) == 8
109
+ assert len(taskset.split_info.test_instance_ids) == 2
110
+
111
+ # Check that all instance IDs are accounted for
112
+ all_ids = set(inst.id for inst in taskset.instances)
113
+ split_ids = taskset.split_info.val_instance_ids | taskset.split_info.test_instance_ids
114
+ assert all_ids == split_ids
115
+
116
+ def test_create_hf_task_instance(self):
117
+ """Test creation of task instance from HuggingFace dataset item."""
118
+ item = {
119
+ "problem_id": "Prob001_zero",
120
+ "prompt": "I would like you to implement a module named TopModule with output zero that always outputs LOW.",
121
+ "test": "`timescale 1 ps/1 ps\nmodule tb();\n // testbench\nendmodule",
122
+ "ref": "module RefModule(output zero);\n assign zero = 1'b0;\nendmodule",
123
+ }
124
+
125
+ instance = _create_hf_task_instance(item, 0)
126
+
127
+ assert isinstance(instance, VerilogTaskInstance)
128
+ metadata = cast(VerilogTaskInstanceMetadata, instance.metadata)
129
+ assert metadata.problem_name == "Prob001_zero"
130
+ assert "TopModule" in instance.impetus.instructions
131
+ assert "always outputs LOW" in metadata.description
132
+ assert metadata.difficulty == "medium"
133
+ assert len(metadata.files_provided) == 3
134
+
135
+ # Check that files were created
136
+ pristine_dir = Path(instance.pristine_dir)
137
+ assert (pristine_dir / "TopModule.v").exists()
138
+ assert (pristine_dir / "Prob001_zero_tb.v").exists()
139
+ assert (pristine_dir / "RefModule.v").exists()
140
+
141
+ # Check file contents
142
+ topmodule_content = (pristine_dir / "TopModule.v").read_text()
143
+ assert "module TopModule();" in topmodule_content
144
+ assert "TODO: Implement" in topmodule_content
145
+ assert "always outputs LOW" in topmodule_content
146
+
147
+ ref_content = (pristine_dir / "RefModule.v").read_text()
148
+ assert "module RefModule" in ref_content
149
+ assert "assign zero = 1'b0" in ref_content
150
+
151
+ @pytest.mark.asyncio
152
+ async def test_task_instance_serialization(self):
153
+ """Test task instance serialization and deserialization."""
154
+ item = {
155
+ "problem_id": "test_serial",
156
+ "prompt": "Test serialization",
157
+ "test": "module test_tb(); endmodule",
158
+ "ref": "module RefModule(); endmodule",
159
+ }
160
+
161
+ instance = _create_hf_task_instance(item, 0)
162
+
163
+ # Test serialization
164
+ serialized = await instance.serialize()
165
+ assert isinstance(serialized, dict)
166
+ assert serialized["metadata"]["problem_name"] == "test_serial"
167
+ assert "id" in serialized
168
+ assert isinstance(serialized["id"], str) # UUID should be converted to string
169
+
170
+ # Test deserialization
171
+ deserialized = await VerilogTaskInstance.deserialize(serialized)
172
+ assert isinstance(deserialized, VerilogTaskInstance)
173
+ deserialized_metadata = cast(VerilogTaskInstanceMetadata, deserialized.metadata)
174
+ instance_metadata = cast(VerilogTaskInstanceMetadata, instance.metadata)
175
+ assert deserialized_metadata.problem_name == instance_metadata.problem_name
176
+ assert deserialized.impetus.instructions == instance.impetus.instructions
177
+
178
+
179
+ class TestVerilogTaskInstanceMetadata:
180
+ """Test suite for VerilogTaskInstanceMetadata."""
181
+
182
+ def test_metadata_creation(self):
183
+ """Test metadata creation with all fields."""
184
+ metadata = VerilogTaskInstanceMetadata(
185
+ problem_name="test_problem",
186
+ difficulty="hard",
187
+ description="A test problem for unit testing",
188
+ files_provided=["TopModule.v", "test_tb.v", "RefModule.v"],
189
+ )
190
+
191
+ assert metadata.problem_name == "test_problem"
192
+ assert metadata.difficulty == "hard"
193
+ assert metadata.description == "A test problem for unit testing"
194
+ assert len(metadata.files_provided) == 3
195
+ assert "TopModule.v" in metadata.files_provided
196
+
197
+
198
+ class TestVerilogTaskInstance:
199
+ """Test suite for VerilogTaskInstance class."""
200
+
201
+ def test_task_instance_creation(self):
202
+ """Test basic task instance creation."""
203
+ metadata = VerilogTaskInstanceMetadata(
204
+ problem_name="test",
205
+ difficulty="easy",
206
+ description="Test description",
207
+ files_provided=["test.v"],
208
+ )
209
+
210
+ instance = VerilogTaskInstance(
211
+ id=uuid4(),
212
+ impetus=Impetus(instructions="Test instructions"),
213
+ intent=Intent(rubric={"goal": "Test goal"}, gold_trajectories=None, gold_state_diff={}),
214
+ metadata=metadata,
215
+ is_reproducible=True,
216
+ initial_engine_snapshot=None,
217
+ pristine_dir="/tmp/pristine",
218
+ snapshot_dir="/tmp/snapshot",
219
+ )
220
+
221
+ metadata_check = cast(VerilogTaskInstanceMetadata, instance.metadata)
222
+ assert metadata_check.problem_name == "test"
223
+ assert instance.impetus.instructions == "Test instructions"
224
+ assert instance.intent.rubric == "Test goal"
225
+ assert instance.pristine_dir == "/tmp/pristine"
226
+ assert instance.snapshot_dir == "/tmp/snapshot"
227
+
228
+ @pytest.mark.asyncio
229
+ async def test_serialization_with_uuid(self):
230
+ """Test serialization properly handles UUID conversion."""
231
+
232
+ metadata = VerilogTaskInstanceMetadata(
233
+ problem_name="test",
234
+ difficulty="easy",
235
+ description="Test",
236
+ files_provided=["test.v"],
237
+ )
238
+
239
+ instance = VerilogTaskInstance(
240
+ id=uuid4(),
241
+ impetus=Impetus(instructions="Test"),
242
+ intent=Intent(rubric={"goal": "Test"}, gold_trajectories=None, gold_state_diff={}),
243
+ metadata=metadata,
244
+ is_reproducible=True,
245
+ initial_engine_snapshot=None,
246
+ )
247
+
248
+ serialized = await instance.serialize()
249
+ assert isinstance(serialized["id"], str)
250
+
251
+ # Test deserialization can handle string ID
252
+ deserialized = await VerilogTaskInstance.deserialize(serialized)
253
+ assert deserialized is not None
254
+
255
+ @pytest.mark.asyncio
256
+ async def test_deserialization_graceful_id_handling(self):
257
+ """Test deserialization gracefully handles various ID formats."""
258
+ metadata = VerilogTaskInstanceMetadata(
259
+ problem_name="test",
260
+ difficulty="easy",
261
+ description="Test",
262
+ files_provided=["test.v"],
263
+ )
264
+
265
+ # Test with string ID
266
+ data = {
267
+ "id": "some-string-id",
268
+ "impetus": {"instructions": "Test"},
269
+ "intent": {"rubric": {"goal": "Test"}, "deterministic_eval_functions": []},
270
+ "metadata": {
271
+ "problem_name": "test",
272
+ "difficulty": "easy",
273
+ "description": "Test",
274
+ "files_provided": ["test.v"],
275
+ },
276
+ }
277
+
278
+ instance = await VerilogTaskInstance.deserialize(data)
279
+ metadata_check = cast(VerilogTaskInstanceMetadata, instance.metadata)
280
+ assert metadata_check.problem_name == "test"
281
+
282
+ @pytest.mark.asyncio
283
+ async def test_deserialization_filters_constructor_fields(self):
284
+ """Test deserialization only uses valid constructor fields."""
285
+ data = {
286
+ "id": "test-id",
287
+ "impetus": {"instructions": "Test"},
288
+ "intent": {"rubric": {"goal": "Test"}, "deterministic_eval_functions": []},
289
+ "metadata": {
290
+ "problem_name": "test",
291
+ "difficulty": "easy",
292
+ "description": "Test",
293
+ "files_provided": ["test.v"],
294
+ },
295
+ "extra_field": "should_be_ignored",
296
+ "another_extra": 123,
297
+ }
298
+
299
+ instance = await VerilogTaskInstance.deserialize(data)
300
+ metadata_check = cast(VerilogTaskInstanceMetadata, instance.metadata)
301
+ assert metadata_check.problem_name == "test"
302
+ # Extra fields should be filtered out and not cause errors
303
+
304
+
305
+ class TestTempDirectoryCleanup:
306
+ """Test suite for temporary directory cleanup functionality."""
307
+
308
+ def test_temp_dirs_tracking(self):
309
+ """Test that temporary directories are tracked."""
310
+ initial_count = len(_temp_dirs)
311
+
312
+ item = {
313
+ "problem_id": "cleanup_test",
314
+ "prompt": "Test cleanup",
315
+ "test": "module test(); endmodule",
316
+ "ref": "module ref(); endmodule",
317
+ }
318
+
319
+ instance = _create_hf_task_instance(item, 0)
320
+
321
+ # Should have added 2 directories (pristine and snapshot)
322
+ assert len(_temp_dirs) == initial_count + 2
323
+
324
+ # Verify directories exist
325
+ pristine_dir = Path(instance.pristine_dir)
326
+ snapshot_dir = Path(instance.snapshot_dir)
327
+ assert pristine_dir.exists()
328
+ assert snapshot_dir.exists()
329
+
330
+ def test_cleanup_temp_dirs(self):
331
+ """Test manual cleanup of temporary directories."""
332
+ # Create some temp directories through task creation
333
+ item = {
334
+ "problem_id": "cleanup_test2",
335
+ "prompt": "Test cleanup",
336
+ "test": "module test(); endmodule",
337
+ "ref": "module ref(); endmodule",
338
+ }
339
+
340
+ instance = _create_hf_task_instance(item, 0)
341
+ pristine_dir = Path(instance.pristine_dir)
342
+ snapshot_dir = Path(instance.snapshot_dir)
343
+
344
+ # Verify they exist
345
+ assert pristine_dir.exists()
346
+ assert snapshot_dir.exists()
347
+
348
+ # Clean up
349
+ _cleanup_temp_dirs()
350
+
351
+ # Verify they're removed
352
+ assert not pristine_dir.exists()
353
+ assert not snapshot_dir.exists()
354
+ assert len(_temp_dirs) == 0
355
+
356
+
357
+ class TestTasksetIntegration:
358
+ """Integration tests for the complete taskset workflow."""
359
+
360
+ @pytest.mark.asyncio
361
+ @patch("src.examples.verilog.taskset.load_dataset")
362
+ async def test_full_taskset_workflow(self, mock_load_dataset):
363
+ """Test complete workflow from dataset loading to task creation."""
364
+ # Mock realistic VerilogEval dataset items
365
+ mock_dataset = [
366
+ {
367
+ "problem_id": "Prob001_zero",
368
+ "prompt": "I would like you to implement a module named TopModule with the following interface. All input and output ports are one bit unless otherwise specified.\n\n - output zero\n\nThe module should always outputs a LOW.",
369
+ "test": '`timescale 1 ps/1 ps\n`define OK 12\n`define INCORRECT 13\n\nmodule stimulus_gen (\n\tinput clk,\n\toutput reg[511:0] wavedrom_title,\n\toutput reg wavedrom_enable\n);\n\ntask wavedrom_start(input[511:0] title = "");\nendtask\n\nendmodule\n\nmodule tb();\n\nreg clk=0;\ninitial forever\n\t#5 clk = ~clk;\n\nlogic zero_ref;\nlogic zero_dut;\n\nRefModule good1 (\n\t.zero(zero_ref) );\n\t\nTopModule top_module1 (\n\t.zero(zero_dut) );\n\nendmodule',
370
+ "ref": "module RefModule (\n output zero\n);\n\n assign zero = 1'b0;\n\nendmodule",
371
+ },
372
+ {
373
+ "problem_id": "Prob002_and_gate",
374
+ "prompt": "Implement an AND gate with inputs a, b and output y.",
375
+ "test": "`timescale 1ns/1ps\nmodule test_tb;\n reg a, b;\n wire y;\n TopModule dut(.a(a), .b(b), .y(y));\n RefModule ref(.a(a), .b(b), .y(y_ref));\nendmodule",
376
+ "ref": "module RefModule(input a, b, output y);\n assign y = a & b;\nendmodule",
377
+ },
378
+ ]
379
+ mock_load_dataset.return_value = mock_dataset
380
+
381
+ # Create taskset
382
+ taskset = await create_verilog_taskset(max_instances=2)
383
+
384
+ # Verify taskset structure
385
+ assert len(taskset.instances) == 2
386
+ assert len(taskset.split_info.val_instance_ids) == 1 # 80% of 2 = 1.6 -> 1
387
+ assert len(taskset.split_info.test_instance_ids) == 1 # 20% of 2 = 0.4 -> 1
388
+
389
+ # Verify first instance (zero module)
390
+ zero_instance = taskset.instances[0]
391
+ zero_metadata = cast(VerilogTaskInstanceMetadata, zero_instance.metadata)
392
+ assert zero_metadata.problem_name == "Prob001_zero"
393
+ assert "output zero" in zero_instance.impetus.instructions
394
+ assert "always outputs a LOW" in zero_metadata.description
395
+
396
+ # Check files were created properly
397
+ pristine_dir = Path(zero_instance.pristine_dir)
398
+ assert (pristine_dir / "TopModule.v").exists()
399
+ assert (pristine_dir / "Prob001_zero_tb.v").exists()
400
+ assert (pristine_dir / "RefModule.v").exists()
401
+
402
+ # Verify TopModule template
403
+ topmodule_content = (pristine_dir / "TopModule.v").read_text()
404
+ assert "module TopModule();" in topmodule_content
405
+ assert "TODO: Implement" in topmodule_content
406
+ assert "output zero" in topmodule_content
407
+
408
+ # Verify RefModule content
409
+ ref_content = (pristine_dir / "RefModule.v").read_text()
410
+ assert "module RefModule" in ref_content
411
+ assert "assign zero = 1'b0" in ref_content
412
+
413
+ # Verify second instance (AND gate)
414
+ and_instance = taskset.instances[1]
415
+ and_metadata = cast(VerilogTaskInstanceMetadata, and_instance.metadata)
416
+ assert and_metadata.problem_name == "Prob002_and_gate"
417
+ assert "AND gate" in and_instance.impetus.instructions
418
+
419
+ # Test serialization of entire taskset
420
+ serialized_instances = await asyncio.gather(
421
+ *(inst.serialize() for inst in taskset.instances)
422
+ )
423
+ assert len(serialized_instances) == 2
424
+ assert all(isinstance(s, dict) for s in serialized_instances)
425
+
426
+ @pytest.mark.asyncio
427
+ @patch("src.examples.verilog.taskset.load_dataset")
428
+ async def test_empty_dataset_handling(self, mock_load_dataset):
429
+ """Test handling of empty dataset."""
430
+ mock_load_dataset.return_value = []
431
+
432
+ taskset = await create_verilog_taskset(max_instances=5)
433
+
434
+ assert len(taskset.instances) == 0
435
+ assert len(taskset.split_info.val_instance_ids) == 0
436
+ assert len(taskset.split_info.test_instance_ids) == 0
437
+
438
+ @pytest.mark.asyncio
439
+ @patch("src.examples.verilog.taskset.load_dataset")
440
+ async def test_single_instance_split(self, mock_load_dataset):
441
+ """Test split calculation with single instance."""
442
+ mock_dataset = [
443
+ {
444
+ "problem_id": "single_test",
445
+ "prompt": "Single test",
446
+ "test": "module test(); endmodule",
447
+ "ref": "module ref(); endmodule",
448
+ }
449
+ ]
450
+ mock_load_dataset.return_value = mock_dataset
451
+
452
+ taskset = await create_verilog_taskset(max_instances=1)
453
+
454
+ # With 1 instance: 80% = 0.8 -> 0, 20% = 0.2 -> 0
455
+ # But we need at least one instance somewhere, so it should go to val
456
+ assert len(taskset.instances) == 1
457
+ # The split calculation should handle edge cases gracefully
@@ -0,0 +1,42 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import TypeVar, Generic, Any
3
+
4
+
5
+ class IReproducibleEngine(ABC):
6
+ """
7
+ An abstract base class for engines that support serialization and deserialization,
8
+ making them reproducible.
9
+ """
10
+
11
+ @abstractmethod
12
+ async def _serialize_engine(
13
+ self,
14
+ ) -> Any: # Replace Any with a more specific Snapshot type if common one emerges
15
+ """Serializes the current state of the engine."""
16
+ pass
17
+
18
+ @classmethod
19
+ @abstractmethod
20
+ async def _deserialize_engine(cls, snapshot: Any) -> "IReproducibleEngine": # Replace Any
21
+ """Creates an engine instance from a serialized snapshot."""
22
+ pass
23
+
24
+
25
+ # Type variable for the engine, ensuring it adheres to the IReproducibleEngine interface.
26
+ EngineType_co = TypeVar("EngineType_co", bound=IReproducibleEngine, covariant=True)
27
+
28
+
29
+ class ReproducibleEnvironment(Generic[EngineType_co]):
30
+ """
31
+ A mixin class for environments that support reproducibility through
32
+ engine serialization and deserialization.
33
+
34
+ It expects the environment to have an 'engine' attribute that conforms to
35
+ the IReproducibleEngine interface. This contract is enforced via type hinting
36
+ and the IReproducibleEngine ABC.
37
+ """
38
+
39
+ engine: EngineType_co
40
+ # No explicit runtime checks like hasattr are performed here.
41
+ # The presence and correctness of _serialize_engine and _deserialize_engine
42
+ # methods on the engine are ensured by the IReproducibleEngine contract.