synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,418 @@
1
+ from synth_ai.environments.tasks.core import (
2
+ Task,
3
+ TaskInstance,
4
+ TaskInstanceMetadata,
5
+ TaskInstanceSet,
6
+ SplitInfo,
7
+ Impetus,
8
+ Intent,
9
+ )
10
+ from uuid import uuid4, UUID
11
+ from dataclasses import dataclass, asdict, fields
12
+ from typing import Optional
13
+ from pathlib import Path
14
+ import tempfile
15
+ import os
16
+ import shutil
17
+ import atexit
18
+ from datasets import load_dataset
19
+
20
+ # Global list to track temp directories for cleanup
21
+ _temp_dirs = []
22
+
23
+
24
+ def _cleanup_temp_dirs():
25
+ """Clean up all temporary directories created during task instances."""
26
+ for temp_dir in _temp_dirs:
27
+ try:
28
+ if os.path.exists(temp_dir):
29
+ shutil.rmtree(temp_dir)
30
+ except Exception:
31
+ pass # Ignore cleanup errors
32
+ _temp_dirs.clear()
33
+
34
+
35
+ # Register cleanup function to run at exit
36
+ atexit.register(_cleanup_temp_dirs)
37
+
38
+ verilog_task = Task(
39
+ global_premises="Implement and verify Verilog hardware designs",
40
+ global_constraints="Must pass testbench verification",
41
+ global_objectives="Write correct Verilog code that passes all tests",
42
+ shared_env_params={},
43
+ )
44
+
45
+
46
+ @dataclass
47
+ class VerilogTaskInstanceMetadata(TaskInstanceMetadata):
48
+ problem_name: str
49
+ difficulty: str
50
+ description: str
51
+ files_provided: list[str]
52
+
53
+
54
+ @dataclass
55
+ class VerilogTaskInstance(TaskInstance):
56
+ pristine_dir: Optional[str] = None
57
+ snapshot_dir: Optional[str] = None
58
+
59
+ async def serialize(self) -> dict:
60
+ data = asdict(self)
61
+ if "id" in data and isinstance(data["id"], UUID):
62
+ data["id"] = str(data["id"])
63
+ if "intent" in data and data["intent"] is not None:
64
+ if "deterministic_eval_functions" in data["intent"]:
65
+ data["intent"]["deterministic_eval_functions"] = []
66
+ return data
67
+
68
+ @classmethod
69
+ async def deserialize(cls, data: dict) -> "VerilogTaskInstance":
70
+ """Gracefully accept non-UUID ids and rebuild required objects."""
71
+ if "id" in data:
72
+ try:
73
+ data["id"] = UUID(str(data["id"]))
74
+ except (ValueError, TypeError, AttributeError):
75
+ pass # keep original string
76
+
77
+ if "impetus" in data and isinstance(data["impetus"], dict):
78
+ impetus_data = data["impetus"]
79
+ # Ensure instructions field exists with default if missing
80
+ if "instructions" not in impetus_data:
81
+ impetus_data["instructions"] = "Implement the Verilog module"
82
+ data["impetus"] = Impetus(**impetus_data)
83
+
84
+ if "intent" in data and isinstance(data["intent"], dict):
85
+ intent_data = data["intent"]
86
+ if "deterministic_eval_functions" not in intent_data:
87
+ intent_data["deterministic_eval_functions"] = []
88
+ # Provide default values for required fields if missing
89
+ if "rubric" not in intent_data:
90
+ intent_data["rubric"] = {"goal": "Pass all testbench tests"}
91
+ if "gold_trajectories" not in intent_data:
92
+ intent_data["gold_trajectories"] = None
93
+ if "gold_state_diff" not in intent_data:
94
+ intent_data["gold_state_diff"] = {}
95
+ data["intent"] = Intent(**intent_data)
96
+
97
+ if "metadata" in data and isinstance(data["metadata"], dict):
98
+ metadata_data = data["metadata"]
99
+ # Ensure required fields exist with defaults if missing
100
+ if "problem_name" not in metadata_data:
101
+ metadata_data["problem_name"] = "unknown"
102
+ if "difficulty" not in metadata_data:
103
+ metadata_data["difficulty"] = "medium"
104
+ if "description" not in metadata_data:
105
+ metadata_data["description"] = "Verilog implementation task"
106
+ if "files_provided" not in metadata_data:
107
+ metadata_data["files_provided"] = []
108
+ data["metadata"] = VerilogTaskInstanceMetadata(**metadata_data)
109
+
110
+ constructor_field_names = {f.name for f in fields(cls)}
111
+ filtered_data = {k: v for k, v in data.items() if k in constructor_field_names}
112
+
113
+ # Add default values for required fields if missing
114
+ if "is_reproducible" not in filtered_data:
115
+ filtered_data["is_reproducible"] = True
116
+ if "initial_engine_snapshot" not in filtered_data:
117
+ filtered_data["initial_engine_snapshot"] = None
118
+
119
+ return cls(**filtered_data)
120
+
121
+
122
+ async def create_verilog_taskset(max_instances: int = 10) -> TaskInstanceSet:
123
+ """Create a Verilog task set from HuggingFace VerilogEval v2 dataset."""
124
+ # Load VerilogEval v2 dataset from HuggingFace
125
+ ds = load_dataset("dakies/nvlabs-verilogeval-v2-spec-to-rtl", split="test")
126
+
127
+ instances = []
128
+
129
+ # Limit the number of instances for faster testing
130
+ dataset_size = min(max_instances, len(ds)) # type: ignore[arg-type]
131
+
132
+ # Convert each dataset item to VerilogTaskInstance
133
+ for i in range(dataset_size):
134
+ item = ds[i]
135
+ instance = _create_hf_task_instance(item, i)
136
+ instances.append(instance)
137
+
138
+ # Create split info - use first 80% for validation, last 20% for test
139
+ total_instances = len(instances)
140
+ val_split = int(0.8 * total_instances)
141
+
142
+ val_ids = {inst.id for inst in instances[:val_split]}
143
+ test_ids = {inst.id for inst in instances[val_split:]}
144
+
145
+ split_info = SplitInfo(
146
+ val_instance_ids=val_ids,
147
+ test_instance_ids=test_ids,
148
+ _is_split_defined=True,
149
+ )
150
+
151
+ return TaskInstanceSet(
152
+ name="VerilogEval v2 TaskSet",
153
+ description="VerilogEval v2 spec-to-RTL tasks from HuggingFace",
154
+ instances=instances,
155
+ split_info=split_info,
156
+ )
157
+
158
+
159
+ def _create_hf_task_instance(item, index: int) -> VerilogTaskInstance:
160
+ """Create a VerilogTaskInstance from a HuggingFace dataset item."""
161
+ instance_id = uuid4()
162
+
163
+ # Create temporary directory for this task
164
+ temp_dir = tempfile.mkdtemp(prefix=f"verilog_hf_{index}_{instance_id}_")
165
+ _temp_dirs.append(temp_dir) # Track for cleanup
166
+ pristine_dir = Path(temp_dir)
167
+ pristine_dir.mkdir(exist_ok=True)
168
+
169
+ # Extract information from dataset item
170
+ problem_id = item["problem_id"]
171
+ prompt = item["prompt"]
172
+ testbench = item["test"]
173
+ ref_solution = item["ref"]
174
+
175
+ # Create incomplete module template (TopModule is the expected name in tests)
176
+ module_content = (
177
+ """module TopModule();
178
+ // TODO: Implement the module based on the specification below
179
+ /*
180
+ Specification:
181
+ """
182
+ + prompt.strip()
183
+ + """
184
+ */
185
+ endmodule"""
186
+ )
187
+
188
+ # Write files to pristine directory
189
+ module_file = "TopModule.v"
190
+ testbench_file = f"{problem_id}_tb.v"
191
+ ref_file = "RefModule.v"
192
+
193
+ (pristine_dir / module_file).write_text(module_content)
194
+ (pristine_dir / testbench_file).write_text(testbench)
195
+ (pristine_dir / ref_file).write_text(ref_solution) # Include reference module
196
+
197
+ files_provided = [module_file, testbench_file, ref_file]
198
+
199
+ # Create task components
200
+ impetus = Impetus(
201
+ instructions=f"Problem: {problem_id}\n\n{prompt.strip()}\n\nImplement the TopModule according to the specification. The testbench will verify your implementation."
202
+ )
203
+
204
+ intent = Intent(
205
+ rubric={
206
+ "goal": f"Implement correct TopModule for {problem_id} that passes testbench verification"
207
+ },
208
+ gold_trajectories=None,
209
+ gold_state_diff={},
210
+ )
211
+
212
+ metadata = VerilogTaskInstanceMetadata(
213
+ problem_name=problem_id,
214
+ difficulty="medium", # VerilogEval doesn't specify difficulty levels
215
+ description=prompt.strip(), # Full description
216
+ files_provided=files_provided,
217
+ )
218
+
219
+ # Create snapshot directory and track for cleanup
220
+ snapshot_dir = tempfile.mkdtemp(prefix=f"verilog_snapshot_{instance_id}_")
221
+ _temp_dirs.append(snapshot_dir)
222
+
223
+ return VerilogTaskInstance(
224
+ id=instance_id,
225
+ impetus=impetus,
226
+ intent=intent,
227
+ metadata=metadata,
228
+ is_reproducible=True,
229
+ initial_engine_snapshot=None,
230
+ pristine_dir=str(pristine_dir),
231
+ snapshot_dir=snapshot_dir,
232
+ )
233
+
234
+
235
+ def _create_adder_task() -> VerilogTaskInstance:
236
+ """Create a simple 4-bit adder task."""
237
+ instance_id = uuid4()
238
+
239
+ # Create temporary directory for this task
240
+ temp_dir = tempfile.mkdtemp(prefix=f"verilog_adder_{instance_id}_")
241
+ _temp_dirs.append(temp_dir) # Track for cleanup
242
+
243
+ # Write adder testbench
244
+ adder_tb_content = """`timescale 1ns/1ps
245
+ module adder4_tb;
246
+ reg [3:0] a, b;
247
+ wire [4:0] sum;
248
+
249
+ adder4 dut(.a(a), .b(b), .sum(sum));
250
+
251
+ initial begin
252
+ a = 4'b0000; b = 4'b0000; #10;
253
+ if (sum != 5'b00000) $fatal(1, "Test failed: 0 + 0 != 0");
254
+
255
+ a = 4'b0001; b = 4'b0001; #10;
256
+ if (sum != 5'b00010) $fatal(1, "Test failed: 1 + 1 != 2");
257
+
258
+ a = 4'b1111; b = 4'b0001; #10;
259
+ if (sum != 5'b10000) $fatal(1, "Test failed: 15 + 1 != 16");
260
+
261
+ $display("ALL_TESTS_PASSED");
262
+ $finish;
263
+ end
264
+ endmodule"""
265
+
266
+ # Write incomplete adder module (for student to complete)
267
+ adder_content = """module adder4(
268
+ input [3:0] a,
269
+ input [3:0] b,
270
+ output [4:0] sum
271
+ );
272
+ // TODO: Implement 4-bit adder
273
+ // assign sum = ?;
274
+ endmodule"""
275
+
276
+ pristine_dir = Path(temp_dir)
277
+ pristine_dir.mkdir(exist_ok=True)
278
+
279
+ (pristine_dir / "adder4_tb.v").write_text(adder_tb_content)
280
+ (pristine_dir / "adder4.v").write_text(adder_content)
281
+
282
+ impetus = Impetus(
283
+ instructions="Implement a 4-bit adder module that takes two 4-bit inputs 'a' and 'b' and produces a 5-bit output 'sum'."
284
+ )
285
+
286
+ intent = Intent(
287
+ rubric="Implement correct 4-bit adder that passes testbench",
288
+ gold_trajectories=None,
289
+ gold_state_diff={},
290
+ )
291
+
292
+ metadata = VerilogTaskInstanceMetadata(
293
+ problem_name="adder4",
294
+ difficulty="easy",
295
+ description="4-bit adder implementation",
296
+ files_provided=["adder4.v", "adder4_tb.v"],
297
+ )
298
+
299
+ return VerilogTaskInstance(
300
+ id=instance_id,
301
+ impetus=impetus,
302
+ intent=intent,
303
+ metadata=metadata,
304
+ is_reproducible=True,
305
+ initial_engine_snapshot=None,
306
+ pristine_dir=str(pristine_dir),
307
+ snapshot_dir=(
308
+ lambda: (
309
+ _temp_dirs.append(d := tempfile.mkdtemp(prefix=f"verilog_snapshot_{instance_id}_")),
310
+ d,
311
+ )[1]
312
+ )(),
313
+ )
314
+
315
+
316
+ def _create_and_gate_task() -> VerilogTaskInstance:
317
+ """Create a simple AND gate task."""
318
+ instance_id = uuid4()
319
+
320
+ # Create temporary directory for this task
321
+ temp_dir = tempfile.mkdtemp(prefix=f"verilog_and_{instance_id}_")
322
+ _temp_dirs.append(temp_dir) # Track for cleanup
323
+
324
+ # Write AND gate testbench
325
+ and_tb_content = """`timescale 1ns/1ps
326
+ module and_gate_tb;
327
+ reg a, b;
328
+ wire y;
329
+
330
+ and_gate dut(.a(a), .b(b), .y(y));
331
+
332
+ initial begin
333
+ a = 0; b = 0; #10;
334
+ if (y != 0) $fatal(1, "Test failed: 0 AND 0 != 0");
335
+
336
+ a = 0; b = 1; #10;
337
+ if (y != 0) $fatal(1, "Test failed: 0 AND 1 != 0");
338
+
339
+ a = 1; b = 0; #10;
340
+ if (y != 0) $fatal(1, "Test failed: 1 AND 0 != 0");
341
+
342
+ a = 1; b = 1; #10;
343
+ if (y != 1) $fatal(1, "Test failed: 1 AND 1 != 1");
344
+
345
+ $display("ALL_TESTS_PASSED");
346
+ $finish;
347
+ end
348
+ endmodule"""
349
+
350
+ # Write incomplete AND gate module
351
+ and_content = """module and_gate(
352
+ input a,
353
+ input b,
354
+ output y
355
+ );
356
+ // TODO: Implement AND gate
357
+ // assign y = ?;
358
+ endmodule"""
359
+
360
+ pristine_dir = Path(temp_dir)
361
+ pristine_dir.mkdir(exist_ok=True)
362
+
363
+ (pristine_dir / "and_gate_tb.v").write_text(and_tb_content)
364
+ (pristine_dir / "and_gate.v").write_text(and_content)
365
+
366
+ impetus = Impetus(
367
+ instructions="Implement an AND gate module that takes two inputs 'a' and 'b' and produces output 'y'."
368
+ )
369
+
370
+ intent = Intent(
371
+ rubric="Implement correct AND gate that passes testbench",
372
+ gold_trajectories=None,
373
+ gold_state_diff={},
374
+ )
375
+
376
+ metadata = VerilogTaskInstanceMetadata(
377
+ problem_name="and_gate",
378
+ difficulty="easy",
379
+ description="Basic AND gate implementation",
380
+ files_provided=["and_gate.v", "and_gate_tb.v"],
381
+ )
382
+
383
+ return VerilogTaskInstance(
384
+ id=instance_id,
385
+ impetus=impetus,
386
+ intent=intent,
387
+ metadata=metadata,
388
+ is_reproducible=True,
389
+ initial_engine_snapshot=None,
390
+ pristine_dir=str(pristine_dir),
391
+ snapshot_dir=(
392
+ lambda: (
393
+ _temp_dirs.append(d := tempfile.mkdtemp(prefix=f"verilog_snapshot_{instance_id}_")),
394
+ d,
395
+ )[1]
396
+ )(),
397
+ )
398
+
399
+
400
+ # Example usage
401
+ if __name__ == "__main__":
402
+ import asyncio
403
+
404
+ async def main():
405
+ taskset = await create_verilog_taskset()
406
+
407
+ serialized = await asyncio.gather(*(inst.serialize() for inst in taskset.instances))
408
+
409
+ print(f"Created {len(serialized)} Verilog task instances")
410
+
411
+ # Print summary
412
+ for i, inst in enumerate(taskset.instances):
413
+ print(f"Task {i + 1}: {inst.metadata.problem_name} ({inst.metadata.difficulty})")
414
+ print(f" Description: {inst.metadata.description}")
415
+ print(f" Files: {inst.metadata.files_provided}")
416
+ print()
417
+
418
+ asyncio.run(main())