synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +28 -2
- synth_ai/core/system.py +4 -0
- synth_ai/environments/__init__.py +35 -0
- synth_ai/environments/environment/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/base.py +50 -0
- synth_ai/environments/environment/core.py +22 -0
- synth_ai/environments/environment/db/__init__.py +1 -0
- synth_ai/environments/environment/db/sqlite.py +45 -0
- synth_ai/environments/environment/registry.py +24 -0
- synth_ai/environments/environment/resources/sqlite.py +46 -0
- synth_ai/environments/environment/results.py +1 -0
- synth_ai/environments/environment/rewards/__init__.py +1 -0
- synth_ai/environments/environment/rewards/core.py +28 -0
- synth_ai/environments/environment/shared_engine.py +26 -0
- synth_ai/environments/environment/tools/__init__.py +34 -0
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/engine.py +502 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/environment.py +255 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
- synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
- synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
- synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
- synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
- synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
- synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
- synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
- synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
- synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/red/test_fixes.py +125 -0
- synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
- synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
- synth_ai/environments/examples/red/units/test_engine.py +192 -0
- synth_ai/environments/examples/red/units/test_environment.py +455 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
- synth_ai/environments/examples/red/units/test_integration.py +217 -0
- synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
- synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
- synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
- synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
- synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
- synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
- synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
- synth_ai/environments/examples/red/units/test_taskset.py +116 -0
- synth_ai/environments/examples/red/units/test_tree.py +448 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
- synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
- synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
- synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
- synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
- synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
- synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
- synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
- synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
- synth_ai/environments/reproducibility/core.py +42 -0
- synth_ai/environments/reproducibility/tree.py +364 -0
- synth_ai/environments/service/app.py +78 -0
- synth_ai/environments/service/core_routes.py +775 -0
- synth_ai/environments/service/external_registry.py +57 -0
- synth_ai/environments/service/registry.py +9 -0
- synth_ai/environments/stateful/__init__.py +1 -0
- synth_ai/environments/stateful/core.py +28 -0
- synth_ai/environments/stateful/engine.py +21 -0
- synth_ai/environments/stateful/state.py +7 -0
- synth_ai/environments/tasks/api.py +19 -0
- synth_ai/environments/tasks/core.py +78 -0
- synth_ai/environments/tasks/filters.py +39 -0
- synth_ai/environments/tasks/utils.py +89 -0
- synth_ai/environments/v0_observability/history.py +3 -0
- synth_ai/environments/v0_observability/log.py +2 -0
- synth_ai/lm/caching/constants.py +1 -0
- synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
- synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
- synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
- synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
- synth_ai/{zyk/lms → lm}/config.py +2 -1
- synth_ai/{zyk/lms → lm}/constants.py +2 -2
- synth_ai/{zyk/lms → lm}/core/all.py +10 -10
- synth_ai/{zyk/lms → lm}/core/main.py +57 -33
- synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
- synth_ai/lm/cost/monitor.py +1 -0
- synth_ai/lm/cost/statefulness.py +1 -0
- synth_ai/lm/provider_support/__init__.py +8 -0
- synth_ai/lm/provider_support/anthropic.py +945 -0
- synth_ai/lm/provider_support/openai.py +1115 -0
- synth_ai/lm/provider_support/suppress_logging.py +31 -0
- synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
- synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
- synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
- synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
- synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
- synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
- synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
- synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
- synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
- synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
- synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
- synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
- synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
- synth_ai/tracing/__init__.py +0 -0
- synth_ai/tracing/abstractions.py +224 -0
- synth_ai/tracing/base_client.py +91 -0
- synth_ai/tracing/client_manager.py +131 -0
- synth_ai/tracing/config.py +140 -0
- synth_ai/tracing/context.py +146 -0
- synth_ai/tracing/decorators.py +679 -0
- synth_ai/tracing/events/__init__.py +0 -0
- synth_ai/tracing/events/manage.py +147 -0
- synth_ai/tracing/events/scope.py +86 -0
- synth_ai/tracing/events/store.py +227 -0
- synth_ai/tracing/immediate_client.py +152 -0
- synth_ai/tracing/local.py +18 -0
- synth_ai/tracing/log_client_base.py +74 -0
- synth_ai/tracing/retry_queue.py +187 -0
- synth_ai/tracing/trackers.py +515 -0
- synth_ai/tracing/upload.py +504 -0
- synth_ai/tracing/utils.py +9 -0
- synth_ai/zyk/__init__.py +28 -2
- synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
- synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai-0.2.0.dist-info/METADATA +0 -36
- synth_ai-0.2.0.dist-info/RECORD +0 -50
- /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
- /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
- /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
- /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
- /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,457 @@
|
|
1
|
+
import pytest
|
2
|
+
import asyncio
|
3
|
+
from pathlib import Path
|
4
|
+
from unittest.mock import patch
|
5
|
+
|
6
|
+
# Add timeout to all async tests
|
7
|
+
pytestmark = pytest.mark.timeout(15)
|
8
|
+
|
9
|
+
from synth_ai.environments.examples.verilog.taskset import (
|
10
|
+
create_verilog_taskset,
|
11
|
+
_create_hf_task_instance,
|
12
|
+
VerilogTaskInstance,
|
13
|
+
VerilogTaskInstanceMetadata,
|
14
|
+
_cleanup_temp_dirs,
|
15
|
+
_temp_dirs,
|
16
|
+
)
|
17
|
+
from synth_ai.environments.tasks.core import TaskInstanceSet, SplitInfo, Impetus, Intent
|
18
|
+
from uuid import uuid4
|
19
|
+
from typing import cast
|
20
|
+
|
21
|
+
|
22
|
+
class TestVerilogTaskset:
|
23
|
+
"""Test suite for Verilog taskset creation."""
|
24
|
+
|
25
|
+
@pytest.mark.asyncio
|
26
|
+
@patch("src.examples.verilog.taskset.load_dataset")
|
27
|
+
async def test_create_verilog_taskset_basic(self, mock_load_dataset):
|
28
|
+
"""Test basic taskset creation."""
|
29
|
+
# Mock dataset
|
30
|
+
mock_dataset = [
|
31
|
+
{
|
32
|
+
"problem_id": "test_001",
|
33
|
+
"prompt": "Implement a simple AND gate with inputs a, b and output y.",
|
34
|
+
"test": "`timescale 1ns/1ps\nmodule test_tb;\n // testbench code\nendmodule",
|
35
|
+
"ref": "module RefModule(input a, b, output y);\n assign y = a & b;\nendmodule",
|
36
|
+
},
|
37
|
+
{
|
38
|
+
"problem_id": "test_002",
|
39
|
+
"prompt": "Implement a simple OR gate with inputs a, b and output y.",
|
40
|
+
"test": "`timescale 1ns/1ps\nmodule test_tb2;\n // testbench code\nendmodule",
|
41
|
+
"ref": "module RefModule(input a, b, output y);\n assign y = a | b;\nendmodule",
|
42
|
+
},
|
43
|
+
]
|
44
|
+
mock_load_dataset.return_value = mock_dataset
|
45
|
+
|
46
|
+
taskset = await create_verilog_taskset(max_instances=2)
|
47
|
+
|
48
|
+
assert isinstance(taskset, TaskInstanceSet)
|
49
|
+
assert taskset.name == "VerilogEval v2 TaskSet"
|
50
|
+
assert taskset.description == "VerilogEval v2 spec-to-RTL tasks from HuggingFace"
|
51
|
+
assert len(taskset.instances) == 2
|
52
|
+
|
53
|
+
# Check split info
|
54
|
+
assert isinstance(taskset.split_info, SplitInfo)
|
55
|
+
assert taskset.split_info._is_split_defined is True
|
56
|
+
|
57
|
+
# Check instance properties
|
58
|
+
instance = taskset.instances[0]
|
59
|
+
assert isinstance(instance, VerilogTaskInstance)
|
60
|
+
metadata = cast(VerilogTaskInstanceMetadata, instance.metadata)
|
61
|
+
assert metadata.problem_name == "test_001"
|
62
|
+
assert "AND gate" in metadata.description
|
63
|
+
assert len(metadata.files_provided) == 3 # TopModule.v, testbench, RefModule.v
|
64
|
+
|
65
|
+
@pytest.mark.asyncio
|
66
|
+
@patch("src.examples.verilog.taskset.load_dataset")
|
67
|
+
async def test_create_verilog_taskset_max_instances(self, mock_load_dataset):
|
68
|
+
"""Test taskset creation with max_instances limit."""
|
69
|
+
# Mock larger dataset
|
70
|
+
mock_dataset = [
|
71
|
+
{
|
72
|
+
"problem_id": f"test_{i:03d}",
|
73
|
+
"prompt": f"Test {i}",
|
74
|
+
"test": "",
|
75
|
+
"ref": "",
|
76
|
+
}
|
77
|
+
for i in range(20)
|
78
|
+
]
|
79
|
+
mock_load_dataset.return_value = mock_dataset
|
80
|
+
|
81
|
+
taskset = await create_verilog_taskset(max_instances=5)
|
82
|
+
|
83
|
+
assert len(taskset.instances) == 5
|
84
|
+
# Should only create instances for first 5 items
|
85
|
+
metadata0 = cast(VerilogTaskInstanceMetadata, taskset.instances[0].metadata)
|
86
|
+
metadata4 = cast(VerilogTaskInstanceMetadata, taskset.instances[4].metadata)
|
87
|
+
assert metadata0.problem_name == "test_000"
|
88
|
+
assert metadata4.problem_name == "test_004"
|
89
|
+
|
90
|
+
@pytest.mark.asyncio
|
91
|
+
@patch("src.examples.verilog.taskset.load_dataset")
|
92
|
+
async def test_create_verilog_taskset_split_info(self, mock_load_dataset):
|
93
|
+
"""Test that split info is correctly calculated."""
|
94
|
+
mock_dataset = [
|
95
|
+
{
|
96
|
+
"problem_id": f"test_{i:03d}",
|
97
|
+
"prompt": f"Test {i}",
|
98
|
+
"test": "",
|
99
|
+
"ref": "",
|
100
|
+
}
|
101
|
+
for i in range(10)
|
102
|
+
]
|
103
|
+
mock_load_dataset.return_value = mock_dataset
|
104
|
+
|
105
|
+
taskset = await create_verilog_taskset(max_instances=10)
|
106
|
+
|
107
|
+
# Should have 80% val (8 instances) and 20% test (2 instances)
|
108
|
+
assert len(taskset.split_info.val_instance_ids) == 8
|
109
|
+
assert len(taskset.split_info.test_instance_ids) == 2
|
110
|
+
|
111
|
+
# Check that all instance IDs are accounted for
|
112
|
+
all_ids = set(inst.id for inst in taskset.instances)
|
113
|
+
split_ids = taskset.split_info.val_instance_ids | taskset.split_info.test_instance_ids
|
114
|
+
assert all_ids == split_ids
|
115
|
+
|
116
|
+
def test_create_hf_task_instance(self):
|
117
|
+
"""Test creation of task instance from HuggingFace dataset item."""
|
118
|
+
item = {
|
119
|
+
"problem_id": "Prob001_zero",
|
120
|
+
"prompt": "I would like you to implement a module named TopModule with output zero that always outputs LOW.",
|
121
|
+
"test": "`timescale 1 ps/1 ps\nmodule tb();\n // testbench\nendmodule",
|
122
|
+
"ref": "module RefModule(output zero);\n assign zero = 1'b0;\nendmodule",
|
123
|
+
}
|
124
|
+
|
125
|
+
instance = _create_hf_task_instance(item, 0)
|
126
|
+
|
127
|
+
assert isinstance(instance, VerilogTaskInstance)
|
128
|
+
metadata = cast(VerilogTaskInstanceMetadata, instance.metadata)
|
129
|
+
assert metadata.problem_name == "Prob001_zero"
|
130
|
+
assert "TopModule" in instance.impetus.instructions
|
131
|
+
assert "always outputs LOW" in metadata.description
|
132
|
+
assert metadata.difficulty == "medium"
|
133
|
+
assert len(metadata.files_provided) == 3
|
134
|
+
|
135
|
+
# Check that files were created
|
136
|
+
pristine_dir = Path(instance.pristine_dir)
|
137
|
+
assert (pristine_dir / "TopModule.v").exists()
|
138
|
+
assert (pristine_dir / "Prob001_zero_tb.v").exists()
|
139
|
+
assert (pristine_dir / "RefModule.v").exists()
|
140
|
+
|
141
|
+
# Check file contents
|
142
|
+
topmodule_content = (pristine_dir / "TopModule.v").read_text()
|
143
|
+
assert "module TopModule();" in topmodule_content
|
144
|
+
assert "TODO: Implement" in topmodule_content
|
145
|
+
assert "always outputs LOW" in topmodule_content
|
146
|
+
|
147
|
+
ref_content = (pristine_dir / "RefModule.v").read_text()
|
148
|
+
assert "module RefModule" in ref_content
|
149
|
+
assert "assign zero = 1'b0" in ref_content
|
150
|
+
|
151
|
+
@pytest.mark.asyncio
|
152
|
+
async def test_task_instance_serialization(self):
|
153
|
+
"""Test task instance serialization and deserialization."""
|
154
|
+
item = {
|
155
|
+
"problem_id": "test_serial",
|
156
|
+
"prompt": "Test serialization",
|
157
|
+
"test": "module test_tb(); endmodule",
|
158
|
+
"ref": "module RefModule(); endmodule",
|
159
|
+
}
|
160
|
+
|
161
|
+
instance = _create_hf_task_instance(item, 0)
|
162
|
+
|
163
|
+
# Test serialization
|
164
|
+
serialized = await instance.serialize()
|
165
|
+
assert isinstance(serialized, dict)
|
166
|
+
assert serialized["metadata"]["problem_name"] == "test_serial"
|
167
|
+
assert "id" in serialized
|
168
|
+
assert isinstance(serialized["id"], str) # UUID should be converted to string
|
169
|
+
|
170
|
+
# Test deserialization
|
171
|
+
deserialized = await VerilogTaskInstance.deserialize(serialized)
|
172
|
+
assert isinstance(deserialized, VerilogTaskInstance)
|
173
|
+
deserialized_metadata = cast(VerilogTaskInstanceMetadata, deserialized.metadata)
|
174
|
+
instance_metadata = cast(VerilogTaskInstanceMetadata, instance.metadata)
|
175
|
+
assert deserialized_metadata.problem_name == instance_metadata.problem_name
|
176
|
+
assert deserialized.impetus.instructions == instance.impetus.instructions
|
177
|
+
|
178
|
+
|
179
|
+
class TestVerilogTaskInstanceMetadata:
|
180
|
+
"""Test suite for VerilogTaskInstanceMetadata."""
|
181
|
+
|
182
|
+
def test_metadata_creation(self):
|
183
|
+
"""Test metadata creation with all fields."""
|
184
|
+
metadata = VerilogTaskInstanceMetadata(
|
185
|
+
problem_name="test_problem",
|
186
|
+
difficulty="hard",
|
187
|
+
description="A test problem for unit testing",
|
188
|
+
files_provided=["TopModule.v", "test_tb.v", "RefModule.v"],
|
189
|
+
)
|
190
|
+
|
191
|
+
assert metadata.problem_name == "test_problem"
|
192
|
+
assert metadata.difficulty == "hard"
|
193
|
+
assert metadata.description == "A test problem for unit testing"
|
194
|
+
assert len(metadata.files_provided) == 3
|
195
|
+
assert "TopModule.v" in metadata.files_provided
|
196
|
+
|
197
|
+
|
198
|
+
class TestVerilogTaskInstance:
|
199
|
+
"""Test suite for VerilogTaskInstance class."""
|
200
|
+
|
201
|
+
def test_task_instance_creation(self):
|
202
|
+
"""Test basic task instance creation."""
|
203
|
+
metadata = VerilogTaskInstanceMetadata(
|
204
|
+
problem_name="test",
|
205
|
+
difficulty="easy",
|
206
|
+
description="Test description",
|
207
|
+
files_provided=["test.v"],
|
208
|
+
)
|
209
|
+
|
210
|
+
instance = VerilogTaskInstance(
|
211
|
+
id=uuid4(),
|
212
|
+
impetus=Impetus(instructions="Test instructions"),
|
213
|
+
intent=Intent(rubric={"goal": "Test goal"}, gold_trajectories=None, gold_state_diff={}),
|
214
|
+
metadata=metadata,
|
215
|
+
is_reproducible=True,
|
216
|
+
initial_engine_snapshot=None,
|
217
|
+
pristine_dir="/tmp/pristine",
|
218
|
+
snapshot_dir="/tmp/snapshot",
|
219
|
+
)
|
220
|
+
|
221
|
+
metadata_check = cast(VerilogTaskInstanceMetadata, instance.metadata)
|
222
|
+
assert metadata_check.problem_name == "test"
|
223
|
+
assert instance.impetus.instructions == "Test instructions"
|
224
|
+
assert instance.intent.rubric == "Test goal"
|
225
|
+
assert instance.pristine_dir == "/tmp/pristine"
|
226
|
+
assert instance.snapshot_dir == "/tmp/snapshot"
|
227
|
+
|
228
|
+
@pytest.mark.asyncio
|
229
|
+
async def test_serialization_with_uuid(self):
|
230
|
+
"""Test serialization properly handles UUID conversion."""
|
231
|
+
|
232
|
+
metadata = VerilogTaskInstanceMetadata(
|
233
|
+
problem_name="test",
|
234
|
+
difficulty="easy",
|
235
|
+
description="Test",
|
236
|
+
files_provided=["test.v"],
|
237
|
+
)
|
238
|
+
|
239
|
+
instance = VerilogTaskInstance(
|
240
|
+
id=uuid4(),
|
241
|
+
impetus=Impetus(instructions="Test"),
|
242
|
+
intent=Intent(rubric={"goal": "Test"}, gold_trajectories=None, gold_state_diff={}),
|
243
|
+
metadata=metadata,
|
244
|
+
is_reproducible=True,
|
245
|
+
initial_engine_snapshot=None,
|
246
|
+
)
|
247
|
+
|
248
|
+
serialized = await instance.serialize()
|
249
|
+
assert isinstance(serialized["id"], str)
|
250
|
+
|
251
|
+
# Test deserialization can handle string ID
|
252
|
+
deserialized = await VerilogTaskInstance.deserialize(serialized)
|
253
|
+
assert deserialized is not None
|
254
|
+
|
255
|
+
@pytest.mark.asyncio
|
256
|
+
async def test_deserialization_graceful_id_handling(self):
|
257
|
+
"""Test deserialization gracefully handles various ID formats."""
|
258
|
+
metadata = VerilogTaskInstanceMetadata(
|
259
|
+
problem_name="test",
|
260
|
+
difficulty="easy",
|
261
|
+
description="Test",
|
262
|
+
files_provided=["test.v"],
|
263
|
+
)
|
264
|
+
|
265
|
+
# Test with string ID
|
266
|
+
data = {
|
267
|
+
"id": "some-string-id",
|
268
|
+
"impetus": {"instructions": "Test"},
|
269
|
+
"intent": {"rubric": {"goal": "Test"}, "deterministic_eval_functions": []},
|
270
|
+
"metadata": {
|
271
|
+
"problem_name": "test",
|
272
|
+
"difficulty": "easy",
|
273
|
+
"description": "Test",
|
274
|
+
"files_provided": ["test.v"],
|
275
|
+
},
|
276
|
+
}
|
277
|
+
|
278
|
+
instance = await VerilogTaskInstance.deserialize(data)
|
279
|
+
metadata_check = cast(VerilogTaskInstanceMetadata, instance.metadata)
|
280
|
+
assert metadata_check.problem_name == "test"
|
281
|
+
|
282
|
+
@pytest.mark.asyncio
|
283
|
+
async def test_deserialization_filters_constructor_fields(self):
|
284
|
+
"""Test deserialization only uses valid constructor fields."""
|
285
|
+
data = {
|
286
|
+
"id": "test-id",
|
287
|
+
"impetus": {"instructions": "Test"},
|
288
|
+
"intent": {"rubric": {"goal": "Test"}, "deterministic_eval_functions": []},
|
289
|
+
"metadata": {
|
290
|
+
"problem_name": "test",
|
291
|
+
"difficulty": "easy",
|
292
|
+
"description": "Test",
|
293
|
+
"files_provided": ["test.v"],
|
294
|
+
},
|
295
|
+
"extra_field": "should_be_ignored",
|
296
|
+
"another_extra": 123,
|
297
|
+
}
|
298
|
+
|
299
|
+
instance = await VerilogTaskInstance.deserialize(data)
|
300
|
+
metadata_check = cast(VerilogTaskInstanceMetadata, instance.metadata)
|
301
|
+
assert metadata_check.problem_name == "test"
|
302
|
+
# Extra fields should be filtered out and not cause errors
|
303
|
+
|
304
|
+
|
305
|
+
class TestTempDirectoryCleanup:
|
306
|
+
"""Test suite for temporary directory cleanup functionality."""
|
307
|
+
|
308
|
+
def test_temp_dirs_tracking(self):
|
309
|
+
"""Test that temporary directories are tracked."""
|
310
|
+
initial_count = len(_temp_dirs)
|
311
|
+
|
312
|
+
item = {
|
313
|
+
"problem_id": "cleanup_test",
|
314
|
+
"prompt": "Test cleanup",
|
315
|
+
"test": "module test(); endmodule",
|
316
|
+
"ref": "module ref(); endmodule",
|
317
|
+
}
|
318
|
+
|
319
|
+
instance = _create_hf_task_instance(item, 0)
|
320
|
+
|
321
|
+
# Should have added 2 directories (pristine and snapshot)
|
322
|
+
assert len(_temp_dirs) == initial_count + 2
|
323
|
+
|
324
|
+
# Verify directories exist
|
325
|
+
pristine_dir = Path(instance.pristine_dir)
|
326
|
+
snapshot_dir = Path(instance.snapshot_dir)
|
327
|
+
assert pristine_dir.exists()
|
328
|
+
assert snapshot_dir.exists()
|
329
|
+
|
330
|
+
def test_cleanup_temp_dirs(self):
|
331
|
+
"""Test manual cleanup of temporary directories."""
|
332
|
+
# Create some temp directories through task creation
|
333
|
+
item = {
|
334
|
+
"problem_id": "cleanup_test2",
|
335
|
+
"prompt": "Test cleanup",
|
336
|
+
"test": "module test(); endmodule",
|
337
|
+
"ref": "module ref(); endmodule",
|
338
|
+
}
|
339
|
+
|
340
|
+
instance = _create_hf_task_instance(item, 0)
|
341
|
+
pristine_dir = Path(instance.pristine_dir)
|
342
|
+
snapshot_dir = Path(instance.snapshot_dir)
|
343
|
+
|
344
|
+
# Verify they exist
|
345
|
+
assert pristine_dir.exists()
|
346
|
+
assert snapshot_dir.exists()
|
347
|
+
|
348
|
+
# Clean up
|
349
|
+
_cleanup_temp_dirs()
|
350
|
+
|
351
|
+
# Verify they're removed
|
352
|
+
assert not pristine_dir.exists()
|
353
|
+
assert not snapshot_dir.exists()
|
354
|
+
assert len(_temp_dirs) == 0
|
355
|
+
|
356
|
+
|
357
|
+
class TestTasksetIntegration:
|
358
|
+
"""Integration tests for the complete taskset workflow."""
|
359
|
+
|
360
|
+
@pytest.mark.asyncio
|
361
|
+
@patch("src.examples.verilog.taskset.load_dataset")
|
362
|
+
async def test_full_taskset_workflow(self, mock_load_dataset):
|
363
|
+
"""Test complete workflow from dataset loading to task creation."""
|
364
|
+
# Mock realistic VerilogEval dataset items
|
365
|
+
mock_dataset = [
|
366
|
+
{
|
367
|
+
"problem_id": "Prob001_zero",
|
368
|
+
"prompt": "I would like you to implement a module named TopModule with the following interface. All input and output ports are one bit unless otherwise specified.\n\n - output zero\n\nThe module should always outputs a LOW.",
|
369
|
+
"test": '`timescale 1 ps/1 ps\n`define OK 12\n`define INCORRECT 13\n\nmodule stimulus_gen (\n\tinput clk,\n\toutput reg[511:0] wavedrom_title,\n\toutput reg wavedrom_enable\n);\n\ntask wavedrom_start(input[511:0] title = "");\nendtask\n\nendmodule\n\nmodule tb();\n\nreg clk=0;\ninitial forever\n\t#5 clk = ~clk;\n\nlogic zero_ref;\nlogic zero_dut;\n\nRefModule good1 (\n\t.zero(zero_ref) );\n\t\nTopModule top_module1 (\n\t.zero(zero_dut) );\n\nendmodule',
|
370
|
+
"ref": "module RefModule (\n output zero\n);\n\n assign zero = 1'b0;\n\nendmodule",
|
371
|
+
},
|
372
|
+
{
|
373
|
+
"problem_id": "Prob002_and_gate",
|
374
|
+
"prompt": "Implement an AND gate with inputs a, b and output y.",
|
375
|
+
"test": "`timescale 1ns/1ps\nmodule test_tb;\n reg a, b;\n wire y;\n TopModule dut(.a(a), .b(b), .y(y));\n RefModule ref(.a(a), .b(b), .y(y_ref));\nendmodule",
|
376
|
+
"ref": "module RefModule(input a, b, output y);\n assign y = a & b;\nendmodule",
|
377
|
+
},
|
378
|
+
]
|
379
|
+
mock_load_dataset.return_value = mock_dataset
|
380
|
+
|
381
|
+
# Create taskset
|
382
|
+
taskset = await create_verilog_taskset(max_instances=2)
|
383
|
+
|
384
|
+
# Verify taskset structure
|
385
|
+
assert len(taskset.instances) == 2
|
386
|
+
assert len(taskset.split_info.val_instance_ids) == 1 # 80% of 2 = 1.6 -> 1
|
387
|
+
assert len(taskset.split_info.test_instance_ids) == 1 # 20% of 2 = 0.4 -> 1
|
388
|
+
|
389
|
+
# Verify first instance (zero module)
|
390
|
+
zero_instance = taskset.instances[0]
|
391
|
+
zero_metadata = cast(VerilogTaskInstanceMetadata, zero_instance.metadata)
|
392
|
+
assert zero_metadata.problem_name == "Prob001_zero"
|
393
|
+
assert "output zero" in zero_instance.impetus.instructions
|
394
|
+
assert "always outputs a LOW" in zero_metadata.description
|
395
|
+
|
396
|
+
# Check files were created properly
|
397
|
+
pristine_dir = Path(zero_instance.pristine_dir)
|
398
|
+
assert (pristine_dir / "TopModule.v").exists()
|
399
|
+
assert (pristine_dir / "Prob001_zero_tb.v").exists()
|
400
|
+
assert (pristine_dir / "RefModule.v").exists()
|
401
|
+
|
402
|
+
# Verify TopModule template
|
403
|
+
topmodule_content = (pristine_dir / "TopModule.v").read_text()
|
404
|
+
assert "module TopModule();" in topmodule_content
|
405
|
+
assert "TODO: Implement" in topmodule_content
|
406
|
+
assert "output zero" in topmodule_content
|
407
|
+
|
408
|
+
# Verify RefModule content
|
409
|
+
ref_content = (pristine_dir / "RefModule.v").read_text()
|
410
|
+
assert "module RefModule" in ref_content
|
411
|
+
assert "assign zero = 1'b0" in ref_content
|
412
|
+
|
413
|
+
# Verify second instance (AND gate)
|
414
|
+
and_instance = taskset.instances[1]
|
415
|
+
and_metadata = cast(VerilogTaskInstanceMetadata, and_instance.metadata)
|
416
|
+
assert and_metadata.problem_name == "Prob002_and_gate"
|
417
|
+
assert "AND gate" in and_instance.impetus.instructions
|
418
|
+
|
419
|
+
# Test serialization of entire taskset
|
420
|
+
serialized_instances = await asyncio.gather(
|
421
|
+
*(inst.serialize() for inst in taskset.instances)
|
422
|
+
)
|
423
|
+
assert len(serialized_instances) == 2
|
424
|
+
assert all(isinstance(s, dict) for s in serialized_instances)
|
425
|
+
|
426
|
+
@pytest.mark.asyncio
|
427
|
+
@patch("src.examples.verilog.taskset.load_dataset")
|
428
|
+
async def test_empty_dataset_handling(self, mock_load_dataset):
|
429
|
+
"""Test handling of empty dataset."""
|
430
|
+
mock_load_dataset.return_value = []
|
431
|
+
|
432
|
+
taskset = await create_verilog_taskset(max_instances=5)
|
433
|
+
|
434
|
+
assert len(taskset.instances) == 0
|
435
|
+
assert len(taskset.split_info.val_instance_ids) == 0
|
436
|
+
assert len(taskset.split_info.test_instance_ids) == 0
|
437
|
+
|
438
|
+
@pytest.mark.asyncio
|
439
|
+
@patch("src.examples.verilog.taskset.load_dataset")
|
440
|
+
async def test_single_instance_split(self, mock_load_dataset):
|
441
|
+
"""Test split calculation with single instance."""
|
442
|
+
mock_dataset = [
|
443
|
+
{
|
444
|
+
"problem_id": "single_test",
|
445
|
+
"prompt": "Single test",
|
446
|
+
"test": "module test(); endmodule",
|
447
|
+
"ref": "module ref(); endmodule",
|
448
|
+
}
|
449
|
+
]
|
450
|
+
mock_load_dataset.return_value = mock_dataset
|
451
|
+
|
452
|
+
taskset = await create_verilog_taskset(max_instances=1)
|
453
|
+
|
454
|
+
# With 1 instance: 80% = 0.8 -> 0, 20% = 0.2 -> 0
|
455
|
+
# But we need at least one instance somewhere, so it should go to val
|
456
|
+
assert len(taskset.instances) == 1
|
457
|
+
# The split calculation should handle edge cases gracefully
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import TypeVar, Generic, Any
|
3
|
+
|
4
|
+
|
5
|
+
class IReproducibleEngine(ABC):
|
6
|
+
"""
|
7
|
+
An abstract base class for engines that support serialization and deserialization,
|
8
|
+
making them reproducible.
|
9
|
+
"""
|
10
|
+
|
11
|
+
@abstractmethod
|
12
|
+
async def _serialize_engine(
|
13
|
+
self,
|
14
|
+
) -> Any: # Replace Any with a more specific Snapshot type if common one emerges
|
15
|
+
"""Serializes the current state of the engine."""
|
16
|
+
pass
|
17
|
+
|
18
|
+
@classmethod
|
19
|
+
@abstractmethod
|
20
|
+
async def _deserialize_engine(cls, snapshot: Any) -> "IReproducibleEngine": # Replace Any
|
21
|
+
"""Creates an engine instance from a serialized snapshot."""
|
22
|
+
pass
|
23
|
+
|
24
|
+
|
25
|
+
# Type variable for the engine, ensuring it adheres to the IReproducibleEngine interface.
|
26
|
+
EngineType_co = TypeVar("EngineType_co", bound=IReproducibleEngine, covariant=True)
|
27
|
+
|
28
|
+
|
29
|
+
class ReproducibleEnvironment(Generic[EngineType_co]):
|
30
|
+
"""
|
31
|
+
A mixin class for environments that support reproducibility through
|
32
|
+
engine serialization and deserialization.
|
33
|
+
|
34
|
+
It expects the environment to have an 'engine' attribute that conforms to
|
35
|
+
the IReproducibleEngine interface. This contract is enforced via type hinting
|
36
|
+
and the IReproducibleEngine ABC.
|
37
|
+
"""
|
38
|
+
|
39
|
+
engine: EngineType_co
|
40
|
+
# No explicit runtime checks like hasattr are performed here.
|
41
|
+
# The presence and correctness of _serialize_engine and _deserialize_engine
|
42
|
+
# methods on the engine are ensured by the IReproducibleEngine contract.
|