synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +28 -2
- synth_ai/core/system.py +4 -0
- synth_ai/environments/__init__.py +35 -0
- synth_ai/environments/environment/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/base.py +50 -0
- synth_ai/environments/environment/core.py +22 -0
- synth_ai/environments/environment/db/__init__.py +1 -0
- synth_ai/environments/environment/db/sqlite.py +45 -0
- synth_ai/environments/environment/registry.py +24 -0
- synth_ai/environments/environment/resources/sqlite.py +46 -0
- synth_ai/environments/environment/results.py +1 -0
- synth_ai/environments/environment/rewards/__init__.py +1 -0
- synth_ai/environments/environment/rewards/core.py +28 -0
- synth_ai/environments/environment/shared_engine.py +26 -0
- synth_ai/environments/environment/tools/__init__.py +34 -0
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/engine.py +502 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/environment.py +255 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
- synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
- synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
- synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
- synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
- synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
- synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
- synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
- synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
- synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/red/test_fixes.py +125 -0
- synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
- synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
- synth_ai/environments/examples/red/units/test_engine.py +192 -0
- synth_ai/environments/examples/red/units/test_environment.py +455 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
- synth_ai/environments/examples/red/units/test_integration.py +217 -0
- synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
- synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
- synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
- synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
- synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
- synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
- synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
- synth_ai/environments/examples/red/units/test_taskset.py +116 -0
- synth_ai/environments/examples/red/units/test_tree.py +448 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
- synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
- synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
- synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
- synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
- synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
- synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
- synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
- synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
- synth_ai/environments/reproducibility/core.py +42 -0
- synth_ai/environments/reproducibility/tree.py +364 -0
- synth_ai/environments/service/app.py +78 -0
- synth_ai/environments/service/core_routes.py +775 -0
- synth_ai/environments/service/external_registry.py +57 -0
- synth_ai/environments/service/registry.py +9 -0
- synth_ai/environments/stateful/__init__.py +1 -0
- synth_ai/environments/stateful/core.py +28 -0
- synth_ai/environments/stateful/engine.py +21 -0
- synth_ai/environments/stateful/state.py +7 -0
- synth_ai/environments/tasks/api.py +19 -0
- synth_ai/environments/tasks/core.py +78 -0
- synth_ai/environments/tasks/filters.py +39 -0
- synth_ai/environments/tasks/utils.py +89 -0
- synth_ai/environments/v0_observability/history.py +3 -0
- synth_ai/environments/v0_observability/log.py +2 -0
- synth_ai/lm/caching/constants.py +1 -0
- synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
- synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
- synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
- synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
- synth_ai/{zyk/lms → lm}/config.py +2 -1
- synth_ai/{zyk/lms → lm}/constants.py +2 -2
- synth_ai/{zyk/lms → lm}/core/all.py +10 -10
- synth_ai/{zyk/lms → lm}/core/main.py +57 -33
- synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
- synth_ai/lm/cost/monitor.py +1 -0
- synth_ai/lm/cost/statefulness.py +1 -0
- synth_ai/lm/provider_support/__init__.py +8 -0
- synth_ai/lm/provider_support/anthropic.py +945 -0
- synth_ai/lm/provider_support/openai.py +1115 -0
- synth_ai/lm/provider_support/suppress_logging.py +31 -0
- synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
- synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
- synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
- synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
- synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
- synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
- synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
- synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
- synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
- synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
- synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
- synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
- synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
- synth_ai/tracing/__init__.py +0 -0
- synth_ai/tracing/abstractions.py +224 -0
- synth_ai/tracing/base_client.py +91 -0
- synth_ai/tracing/client_manager.py +131 -0
- synth_ai/tracing/config.py +140 -0
- synth_ai/tracing/context.py +146 -0
- synth_ai/tracing/decorators.py +679 -0
- synth_ai/tracing/events/__init__.py +0 -0
- synth_ai/tracing/events/manage.py +147 -0
- synth_ai/tracing/events/scope.py +86 -0
- synth_ai/tracing/events/store.py +227 -0
- synth_ai/tracing/immediate_client.py +152 -0
- synth_ai/tracing/local.py +18 -0
- synth_ai/tracing/log_client_base.py +74 -0
- synth_ai/tracing/retry_queue.py +187 -0
- synth_ai/tracing/trackers.py +515 -0
- synth_ai/tracing/upload.py +504 -0
- synth_ai/tracing/utils.py +9 -0
- synth_ai/zyk/__init__.py +28 -2
- synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
- synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai-0.2.0.dist-info/METADATA +0 -36
- synth_ai-0.2.0.dist-info/RECORD +0 -50
- /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
- /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
- /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
- /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
- /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,466 @@
|
|
1
|
+
import pytest
|
2
|
+
import tempfile
|
3
|
+
import shutil
|
4
|
+
from pathlib import Path
|
5
|
+
from unittest.mock import patch, MagicMock
|
6
|
+
|
7
|
+
# Add timeout to all async tests
|
8
|
+
pytestmark = pytest.mark.timeout(15)
|
9
|
+
|
10
|
+
from synth_ai.environments.examples.verilog.engine import (
|
11
|
+
VerilogEngine,
|
12
|
+
VerilogPublicState,
|
13
|
+
VerilogPrivateState,
|
14
|
+
VerilogCompileSuccessComponent,
|
15
|
+
VerilogSimulationPassComponent,
|
16
|
+
VerilogStepPenaltyComponent,
|
17
|
+
)
|
18
|
+
from synth_ai.environments.examples.verilog.taskset import (
|
19
|
+
VerilogTaskInstance,
|
20
|
+
VerilogTaskInstanceMetadata,
|
21
|
+
)
|
22
|
+
from synth_ai.environments.tasks.core import Impetus, Intent
|
23
|
+
from uuid import uuid4
|
24
|
+
|
25
|
+
|
26
|
+
@pytest.fixture
|
27
|
+
def mock_task_instance():
|
28
|
+
"""Create a mock task instance for testing."""
|
29
|
+
temp_dir = tempfile.mkdtemp(prefix="test_verilog_")
|
30
|
+
pristine_dir = Path(temp_dir) / "pristine"
|
31
|
+
snapshot_dir = Path(temp_dir) / "snapshot"
|
32
|
+
|
33
|
+
pristine_dir.mkdir(parents=True)
|
34
|
+
snapshot_dir.mkdir(parents=True)
|
35
|
+
|
36
|
+
# Create test files
|
37
|
+
(pristine_dir / "TopModule.v").write_text("""module TopModule(
|
38
|
+
output zero
|
39
|
+
);
|
40
|
+
assign zero = 1'b0;
|
41
|
+
endmodule""")
|
42
|
+
|
43
|
+
(pristine_dir / "test_tb.v").write_text("""`timescale 1ns/1ps
|
44
|
+
module test_tb;
|
45
|
+
wire zero;
|
46
|
+
TopModule dut(.zero(zero));
|
47
|
+
|
48
|
+
initial begin
|
49
|
+
#10;
|
50
|
+
if (zero !== 1'b0) $fatal(1, "Test failed");
|
51
|
+
$display("ALL_TESTS_PASSED");
|
52
|
+
$finish;
|
53
|
+
end
|
54
|
+
endmodule""")
|
55
|
+
|
56
|
+
metadata = VerilogTaskInstanceMetadata(
|
57
|
+
problem_name="test_problem",
|
58
|
+
difficulty="easy",
|
59
|
+
description="Test problem",
|
60
|
+
files_provided=["TopModule.v", "test_tb.v"],
|
61
|
+
)
|
62
|
+
|
63
|
+
task = VerilogTaskInstance(
|
64
|
+
id=uuid4(),
|
65
|
+
impetus=Impetus(instructions="Test task"),
|
66
|
+
intent=Intent(rubric="Test goal", gold_trajectories=None, gold_state_diff={}),
|
67
|
+
metadata=metadata,
|
68
|
+
is_reproducible=True,
|
69
|
+
initial_engine_snapshot=None,
|
70
|
+
pristine_dir=str(pristine_dir),
|
71
|
+
snapshot_dir=str(snapshot_dir),
|
72
|
+
)
|
73
|
+
|
74
|
+
yield task
|
75
|
+
|
76
|
+
# Cleanup
|
77
|
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
78
|
+
|
79
|
+
|
80
|
+
@pytest.fixture
|
81
|
+
def engine(mock_task_instance):
|
82
|
+
"""Create a VerilogEngine instance for testing."""
|
83
|
+
return VerilogEngine(mock_task_instance)
|
84
|
+
|
85
|
+
|
86
|
+
class TestVerilogEngine:
|
87
|
+
"""Test suite for VerilogEngine class."""
|
88
|
+
|
89
|
+
@pytest.mark.asyncio
|
90
|
+
async def test_engine_initialization(self, engine):
|
91
|
+
"""Test engine initialization."""
|
92
|
+
assert engine.task_instance is not None
|
93
|
+
assert engine._total_reward == 0.0
|
94
|
+
assert engine.reward_stack is not None
|
95
|
+
assert len(engine.reward_stack.components) == 3
|
96
|
+
|
97
|
+
@pytest.mark.asyncio
|
98
|
+
async def test_reset_engine(self, engine):
|
99
|
+
"""Test engine reset functionality."""
|
100
|
+
priv, pub = await engine._reset_engine()
|
101
|
+
|
102
|
+
assert isinstance(priv, VerilogPrivateState)
|
103
|
+
assert isinstance(pub, VerilogPublicState)
|
104
|
+
assert priv.reward_last == 0.0
|
105
|
+
assert priv.total_reward == 0.0
|
106
|
+
assert not priv.terminated
|
107
|
+
assert not priv.truncated
|
108
|
+
assert len(pub.files) >= 1
|
109
|
+
assert engine.snapshot_dir.exists()
|
110
|
+
assert engine.build_dir.exists()
|
111
|
+
|
112
|
+
@pytest.mark.asyncio
|
113
|
+
async def test_write_file(self, engine):
|
114
|
+
"""Test file writing functionality."""
|
115
|
+
await engine._reset_engine()
|
116
|
+
|
117
|
+
result = await engine.write_file("test.v", "module test(); endmodule")
|
118
|
+
|
119
|
+
assert result["ok"] is True
|
120
|
+
assert result["type"] == "write_file"
|
121
|
+
assert (engine.snapshot_dir / "test.v").exists()
|
122
|
+
assert (engine.snapshot_dir / "test.v").read_text() == "module test(); endmodule"
|
123
|
+
|
124
|
+
@pytest.mark.asyncio
|
125
|
+
async def test_write_file_nested_path(self, engine):
|
126
|
+
"""Test writing file with nested directory structure."""
|
127
|
+
await engine._reset_engine()
|
128
|
+
|
129
|
+
result = await engine.write_file("subdir/nested.v", "module nested(); endmodule")
|
130
|
+
|
131
|
+
assert result["ok"] is True
|
132
|
+
nested_file = engine.snapshot_dir / "subdir" / "nested.v"
|
133
|
+
assert nested_file.exists()
|
134
|
+
assert nested_file.read_text() == "module nested(); endmodule"
|
135
|
+
|
136
|
+
@pytest.mark.asyncio
|
137
|
+
async def test_get_file_contents(self, engine):
|
138
|
+
"""Test file content retrieval."""
|
139
|
+
await engine._reset_engine()
|
140
|
+
|
141
|
+
# Write test file
|
142
|
+
await engine.write_file("new_test.v", "module new_test(); endmodule")
|
143
|
+
|
144
|
+
files = engine._get_file_contents()
|
145
|
+
assert "new_test.v" in files
|
146
|
+
assert "module new_test();" in files["new_test.v"]
|
147
|
+
|
148
|
+
@pytest.mark.asyncio
|
149
|
+
@patch("subprocess.run")
|
150
|
+
async def test_compile_success(self, mock_run, engine):
|
151
|
+
"""Test successful compilation."""
|
152
|
+
await engine._reset_engine()
|
153
|
+
|
154
|
+
# Mock successful compilation
|
155
|
+
mock_proc = MagicMock()
|
156
|
+
mock_proc.returncode = 0
|
157
|
+
mock_proc.stdout = ""
|
158
|
+
mock_proc.stderr = ""
|
159
|
+
mock_run.return_value = mock_proc
|
160
|
+
|
161
|
+
result = await engine.compile(sources=["TopModule.v"])
|
162
|
+
|
163
|
+
assert result["ok"] is True
|
164
|
+
assert result["type"] == "compile"
|
165
|
+
assert result["returncode"] == 0
|
166
|
+
assert "binary" in result
|
167
|
+
|
168
|
+
# Verify iverilog was called with correct flags
|
169
|
+
mock_run.assert_called_once()
|
170
|
+
args = mock_run.call_args[0][0]
|
171
|
+
assert "iverilog" in args
|
172
|
+
assert "-g2012" in args
|
173
|
+
assert "-o" in args
|
174
|
+
|
175
|
+
@pytest.mark.asyncio
|
176
|
+
@patch("subprocess.run")
|
177
|
+
async def test_compile_failure(self, mock_run, engine):
|
178
|
+
"""Test compilation failure."""
|
179
|
+
await engine._reset_engine()
|
180
|
+
|
181
|
+
# Mock failed compilation
|
182
|
+
mock_proc = MagicMock()
|
183
|
+
mock_proc.returncode = 1
|
184
|
+
mock_proc.stdout = ""
|
185
|
+
mock_proc.stderr = "Error: syntax error"
|
186
|
+
mock_run.return_value = mock_proc
|
187
|
+
|
188
|
+
result = await engine.compile(sources=["invalid.v"])
|
189
|
+
|
190
|
+
assert result["ok"] is False
|
191
|
+
assert result["type"] == "compile"
|
192
|
+
assert result["returncode"] == 1
|
193
|
+
assert "syntax error" in result["stderr"]
|
194
|
+
assert result["binary"] is None
|
195
|
+
|
196
|
+
@pytest.mark.asyncio
|
197
|
+
@patch("subprocess.run")
|
198
|
+
async def test_simulate_success(self, mock_run, engine):
|
199
|
+
"""Test successful simulation."""
|
200
|
+
await engine._reset_engine()
|
201
|
+
|
202
|
+
# Mock successful simulation
|
203
|
+
mock_proc = MagicMock()
|
204
|
+
mock_proc.returncode = 0
|
205
|
+
mock_proc.stdout = "Simulation output\nMismatches: 0 in 10 samples\n"
|
206
|
+
mock_proc.stderr = ""
|
207
|
+
mock_run.return_value = mock_proc
|
208
|
+
|
209
|
+
result = await engine.simulate()
|
210
|
+
|
211
|
+
assert result["ok"] is True
|
212
|
+
assert result["type"] == "simulate"
|
213
|
+
assert result["returncode"] == 0
|
214
|
+
assert result["passed"] is True
|
215
|
+
assert "Mismatches: 0" in result["stdout"]
|
216
|
+
|
217
|
+
@pytest.mark.asyncio
|
218
|
+
@patch("subprocess.run")
|
219
|
+
async def test_simulate_all_tests_passed(self, mock_run, engine):
|
220
|
+
"""Test simulation with ALL_TESTS_PASSED indicator."""
|
221
|
+
await engine._reset_engine()
|
222
|
+
|
223
|
+
# Mock simulation with ALL_TESTS_PASSED
|
224
|
+
mock_proc = MagicMock()
|
225
|
+
mock_proc.returncode = 0
|
226
|
+
mock_proc.stdout = "Simulation running\nALL_TESTS_PASSED\n"
|
227
|
+
mock_proc.stderr = ""
|
228
|
+
mock_run.return_value = mock_proc
|
229
|
+
|
230
|
+
result = await engine.simulate()
|
231
|
+
|
232
|
+
assert result["ok"] is True
|
233
|
+
assert result["passed"] is True
|
234
|
+
|
235
|
+
@pytest.mark.asyncio
|
236
|
+
@patch("subprocess.run")
|
237
|
+
async def test_simulate_failure(self, mock_run, engine):
|
238
|
+
"""Test simulation failure."""
|
239
|
+
await engine._reset_engine()
|
240
|
+
|
241
|
+
# Mock failed simulation
|
242
|
+
mock_proc = MagicMock()
|
243
|
+
mock_proc.returncode = 0
|
244
|
+
mock_proc.stdout = "Simulation output\nMismatches: 5 in 10 samples\n"
|
245
|
+
mock_proc.stderr = ""
|
246
|
+
mock_run.return_value = mock_proc
|
247
|
+
|
248
|
+
result = await engine.simulate()
|
249
|
+
|
250
|
+
assert result["ok"] is True
|
251
|
+
assert result["passed"] is False
|
252
|
+
assert "Mismatches: 5" in result["stdout"]
|
253
|
+
|
254
|
+
@pytest.mark.asyncio
|
255
|
+
async def test_submit(self, engine):
|
256
|
+
"""Test submission functionality."""
|
257
|
+
await engine._reset_engine()
|
258
|
+
|
259
|
+
result = await engine.submit()
|
260
|
+
|
261
|
+
assert result["ok"] is True
|
262
|
+
assert result["type"] == "submit"
|
263
|
+
assert result["submitted"] is True
|
264
|
+
|
265
|
+
@pytest.mark.asyncio
|
266
|
+
async def test_step_engine_compile_success(self, engine):
|
267
|
+
"""Test engine stepping with successful compilation."""
|
268
|
+
await engine._reset_engine()
|
269
|
+
|
270
|
+
action_result = {
|
271
|
+
"ok": True,
|
272
|
+
"type": "compile",
|
273
|
+
"returncode": 0,
|
274
|
+
"stdout": "Compilation successful",
|
275
|
+
}
|
276
|
+
|
277
|
+
priv, pub = await engine._step_engine(action_result)
|
278
|
+
|
279
|
+
assert priv.reward_last > 0 # Should get compile success reward
|
280
|
+
assert pub.last_compile_output == "Compilation successful"
|
281
|
+
assert not pub.task_completed
|
282
|
+
|
283
|
+
@pytest.mark.asyncio
|
284
|
+
async def test_step_engine_simulate_success(self, engine):
|
285
|
+
"""Test engine stepping with successful simulation."""
|
286
|
+
await engine._reset_engine()
|
287
|
+
|
288
|
+
action_result = {
|
289
|
+
"ok": True,
|
290
|
+
"type": "simulate",
|
291
|
+
"returncode": 0,
|
292
|
+
"stdout": "ALL_TESTS_PASSED",
|
293
|
+
"passed": True,
|
294
|
+
}
|
295
|
+
|
296
|
+
priv, pub = await engine._step_engine(action_result)
|
297
|
+
|
298
|
+
assert priv.reward_last > 0.5 # Should get large simulation success reward
|
299
|
+
assert pub.last_simulate_output == "ALL_TESTS_PASSED"
|
300
|
+
assert pub.task_completed is True
|
301
|
+
assert priv.terminated is True
|
302
|
+
|
303
|
+
@pytest.mark.asyncio
|
304
|
+
async def test_step_penalty(self, engine):
|
305
|
+
"""Test that each step incurs a small penalty."""
|
306
|
+
await engine._reset_engine()
|
307
|
+
|
308
|
+
action_result = {"ok": True, "type": "write_file"}
|
309
|
+
|
310
|
+
priv, pub = await engine._step_engine(action_result)
|
311
|
+
|
312
|
+
assert priv.reward_last < 0 # Should be negative due to step penalty
|
313
|
+
assert priv.total_reward < 0
|
314
|
+
|
315
|
+
|
316
|
+
class TestVerilogRewardComponents:
|
317
|
+
"""Test suite for Verilog reward components."""
|
318
|
+
|
319
|
+
@pytest.mark.asyncio
|
320
|
+
async def test_compile_success_component(self):
|
321
|
+
"""Test compile success reward component."""
|
322
|
+
component = VerilogCompileSuccessComponent()
|
323
|
+
state = VerilogPublicState(files={}, build_dir="", task_completed=False)
|
324
|
+
|
325
|
+
# Test successful compilation
|
326
|
+
action = {"type": "compile", "returncode": 0}
|
327
|
+
reward = await component.score(state, action)
|
328
|
+
assert reward == 0.1
|
329
|
+
|
330
|
+
# Test failed compilation
|
331
|
+
action = {"type": "compile", "returncode": 1}
|
332
|
+
reward = await component.score(state, action)
|
333
|
+
assert reward == 0.0
|
334
|
+
|
335
|
+
# Test non-compile action
|
336
|
+
action = {"type": "write_file"}
|
337
|
+
reward = await component.score(state, action)
|
338
|
+
assert reward == 0.0
|
339
|
+
|
340
|
+
@pytest.mark.asyncio
|
341
|
+
async def test_simulation_pass_component(self):
|
342
|
+
"""Test simulation pass reward component."""
|
343
|
+
component = VerilogSimulationPassComponent()
|
344
|
+
state = VerilogPublicState(files={}, build_dir="", task_completed=False)
|
345
|
+
|
346
|
+
# Test successful simulation
|
347
|
+
action = {"type": "simulate", "passed": True}
|
348
|
+
reward = await component.score(state, action)
|
349
|
+
assert reward == 1.0
|
350
|
+
|
351
|
+
# Test failed simulation
|
352
|
+
action = {"type": "simulate", "passed": False}
|
353
|
+
reward = await component.score(state, action)
|
354
|
+
assert reward == 0.0
|
355
|
+
|
356
|
+
# Test non-simulate action
|
357
|
+
action = {"type": "write_file"}
|
358
|
+
reward = await component.score(state, action)
|
359
|
+
assert reward == 0.0
|
360
|
+
|
361
|
+
@pytest.mark.asyncio
|
362
|
+
async def test_step_penalty_component(self):
|
363
|
+
"""Test step penalty reward component."""
|
364
|
+
penalty = -0.05
|
365
|
+
component = VerilogStepPenaltyComponent(penalty=penalty)
|
366
|
+
state = VerilogPublicState(files={}, build_dir="", task_completed=False)
|
367
|
+
|
368
|
+
# Any action should incur penalty
|
369
|
+
action = {"type": "write_file"}
|
370
|
+
reward = await component.score(state, action)
|
371
|
+
assert reward == penalty
|
372
|
+
|
373
|
+
action = {"type": "compile"}
|
374
|
+
reward = await component.score(state, action)
|
375
|
+
assert reward == penalty
|
376
|
+
|
377
|
+
|
378
|
+
class TestEngineIntegration:
|
379
|
+
"""Integration tests for the full engine workflow."""
|
380
|
+
|
381
|
+
@pytest.mark.asyncio
|
382
|
+
@patch("subprocess.run")
|
383
|
+
async def test_full_workflow_success(self, mock_run, engine):
|
384
|
+
"""Test complete workflow from reset to successful completion."""
|
385
|
+
|
386
|
+
# Setup mock subprocess calls
|
387
|
+
def mock_subprocess(cmd, **kwargs):
|
388
|
+
mock_proc = MagicMock()
|
389
|
+
if "iverilog" in cmd:
|
390
|
+
# Mock successful compilation
|
391
|
+
mock_proc.returncode = 0
|
392
|
+
mock_proc.stdout = ""
|
393
|
+
mock_proc.stderr = ""
|
394
|
+
elif "vvp" in cmd:
|
395
|
+
# Mock successful simulation
|
396
|
+
mock_proc.returncode = 0
|
397
|
+
mock_proc.stdout = "ALL_TESTS_PASSED\n"
|
398
|
+
mock_proc.stderr = ""
|
399
|
+
return mock_proc
|
400
|
+
|
401
|
+
mock_run.side_effect = mock_subprocess
|
402
|
+
|
403
|
+
# Initialize engine
|
404
|
+
priv, pub = await engine._reset_engine()
|
405
|
+
assert priv.total_reward == 0.0
|
406
|
+
|
407
|
+
# Write file
|
408
|
+
write_result = await engine.write_file(
|
409
|
+
"TopModule.v",
|
410
|
+
"""module TopModule(
|
411
|
+
output zero
|
412
|
+
);
|
413
|
+
assign zero = 1'b0;
|
414
|
+
endmodule""",
|
415
|
+
)
|
416
|
+
assert write_result["ok"] is True
|
417
|
+
|
418
|
+
# Compile
|
419
|
+
compile_result = await engine.compile()
|
420
|
+
assert compile_result["ok"] is True
|
421
|
+
|
422
|
+
priv, pub = await engine._step_engine(compile_result)
|
423
|
+
compile_reward = priv.reward_last
|
424
|
+
assert compile_reward > 0 # Should get compile success reward
|
425
|
+
|
426
|
+
# Simulate
|
427
|
+
simulate_result = await engine.simulate()
|
428
|
+
assert simulate_result["ok"] is True
|
429
|
+
assert simulate_result["passed"] is True
|
430
|
+
|
431
|
+
priv, pub = await engine._step_engine(simulate_result)
|
432
|
+
simulate_reward = priv.reward_last
|
433
|
+
assert simulate_reward > 0.5 # Should get large simulation reward
|
434
|
+
assert pub.task_completed is True
|
435
|
+
assert priv.terminated is True
|
436
|
+
|
437
|
+
# Total reward should be positive (compile + simulate - step penalties)
|
438
|
+
assert priv.total_reward > 0
|
439
|
+
|
440
|
+
@pytest.mark.asyncio
|
441
|
+
@patch("subprocess.run")
|
442
|
+
async def test_compilation_failure_workflow(self, mock_run, engine):
|
443
|
+
"""Test workflow with compilation failure."""
|
444
|
+
# Mock failed compilation
|
445
|
+
mock_proc = MagicMock()
|
446
|
+
mock_proc.returncode = 1
|
447
|
+
mock_proc.stdout = ""
|
448
|
+
mock_proc.stderr = "Error: syntax error"
|
449
|
+
mock_run.return_value = mock_proc
|
450
|
+
|
451
|
+
# Initialize engine
|
452
|
+
await engine._reset_engine()
|
453
|
+
|
454
|
+
# Write invalid file
|
455
|
+
await engine.write_file("invalid.v", "invalid verilog code")
|
456
|
+
|
457
|
+
# Attempt compilation
|
458
|
+
compile_result = await engine.compile()
|
459
|
+
assert compile_result["ok"] is False
|
460
|
+
|
461
|
+
priv, pub = await engine._step_engine(compile_result)
|
462
|
+
|
463
|
+
# Should only get step penalty, no compile success reward
|
464
|
+
assert priv.reward_last < 0
|
465
|
+
assert not pub.task_completed
|
466
|
+
assert not priv.terminated
|