synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +28 -2
- synth_ai/core/system.py +4 -0
- synth_ai/environments/__init__.py +35 -0
- synth_ai/environments/environment/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/base.py +50 -0
- synth_ai/environments/environment/core.py +22 -0
- synth_ai/environments/environment/db/__init__.py +1 -0
- synth_ai/environments/environment/db/sqlite.py +45 -0
- synth_ai/environments/environment/registry.py +24 -0
- synth_ai/environments/environment/resources/sqlite.py +46 -0
- synth_ai/environments/environment/results.py +1 -0
- synth_ai/environments/environment/rewards/__init__.py +1 -0
- synth_ai/environments/environment/rewards/core.py +28 -0
- synth_ai/environments/environment/shared_engine.py +26 -0
- synth_ai/environments/environment/tools/__init__.py +34 -0
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/engine.py +502 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/environment.py +255 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
- synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
- synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
- synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
- synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
- synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
- synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
- synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
- synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
- synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/red/test_fixes.py +125 -0
- synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
- synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
- synth_ai/environments/examples/red/units/test_engine.py +192 -0
- synth_ai/environments/examples/red/units/test_environment.py +455 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
- synth_ai/environments/examples/red/units/test_integration.py +217 -0
- synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
- synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
- synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
- synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
- synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
- synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
- synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
- synth_ai/environments/examples/red/units/test_taskset.py +116 -0
- synth_ai/environments/examples/red/units/test_tree.py +448 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
- synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
- synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
- synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
- synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
- synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
- synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
- synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
- synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
- synth_ai/environments/reproducibility/core.py +42 -0
- synth_ai/environments/reproducibility/tree.py +364 -0
- synth_ai/environments/service/app.py +78 -0
- synth_ai/environments/service/core_routes.py +775 -0
- synth_ai/environments/service/external_registry.py +57 -0
- synth_ai/environments/service/registry.py +9 -0
- synth_ai/environments/stateful/__init__.py +1 -0
- synth_ai/environments/stateful/core.py +28 -0
- synth_ai/environments/stateful/engine.py +21 -0
- synth_ai/environments/stateful/state.py +7 -0
- synth_ai/environments/tasks/api.py +19 -0
- synth_ai/environments/tasks/core.py +78 -0
- synth_ai/environments/tasks/filters.py +39 -0
- synth_ai/environments/tasks/utils.py +89 -0
- synth_ai/environments/v0_observability/history.py +3 -0
- synth_ai/environments/v0_observability/log.py +2 -0
- synth_ai/lm/caching/constants.py +1 -0
- synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
- synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
- synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
- synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
- synth_ai/{zyk/lms → lm}/config.py +2 -1
- synth_ai/{zyk/lms → lm}/constants.py +2 -2
- synth_ai/{zyk/lms → lm}/core/all.py +10 -10
- synth_ai/{zyk/lms → lm}/core/main.py +57 -33
- synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
- synth_ai/lm/cost/monitor.py +1 -0
- synth_ai/lm/cost/statefulness.py +1 -0
- synth_ai/lm/provider_support/__init__.py +8 -0
- synth_ai/lm/provider_support/anthropic.py +945 -0
- synth_ai/lm/provider_support/openai.py +1115 -0
- synth_ai/lm/provider_support/suppress_logging.py +31 -0
- synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
- synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
- synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
- synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
- synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
- synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
- synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
- synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
- synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
- synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
- synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
- synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
- synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
- synth_ai/tracing/__init__.py +0 -0
- synth_ai/tracing/abstractions.py +224 -0
- synth_ai/tracing/base_client.py +91 -0
- synth_ai/tracing/client_manager.py +131 -0
- synth_ai/tracing/config.py +140 -0
- synth_ai/tracing/context.py +146 -0
- synth_ai/tracing/decorators.py +679 -0
- synth_ai/tracing/events/__init__.py +0 -0
- synth_ai/tracing/events/manage.py +147 -0
- synth_ai/tracing/events/scope.py +86 -0
- synth_ai/tracing/events/store.py +227 -0
- synth_ai/tracing/immediate_client.py +152 -0
- synth_ai/tracing/local.py +18 -0
- synth_ai/tracing/log_client_base.py +74 -0
- synth_ai/tracing/retry_queue.py +187 -0
- synth_ai/tracing/trackers.py +515 -0
- synth_ai/tracing/upload.py +504 -0
- synth_ai/tracing/utils.py +9 -0
- synth_ai/zyk/__init__.py +28 -2
- synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
- synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai-0.1.9.dist-info/METADATA +0 -37
- synth_ai-0.1.9.dist-info/RECORD +0 -50
- /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
- /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
- /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
- /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
- /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,455 @@
|
|
1
|
+
import pytest
|
2
|
+
from unittest.mock import Mock, patch, AsyncMock
|
3
|
+
|
4
|
+
# Add imports for the new dataclasses
|
5
|
+
from synth_ai.environments.examples.red.engine import (
|
6
|
+
GameWorldState,
|
7
|
+
PlayerProgressState,
|
8
|
+
GameSystemState,
|
9
|
+
PokemonData,
|
10
|
+
)
|
11
|
+
from synth_ai.environments.examples.red.environment import (
|
12
|
+
PokemonRedEnvironment,
|
13
|
+
PokemonRedPublicState,
|
14
|
+
PokemonRedPrivateState,
|
15
|
+
PressButtonTool,
|
16
|
+
PokemonRedObservationCallable,
|
17
|
+
)
|
18
|
+
from synth_ai.environments.environment.tools import EnvToolCall, ToolResult
|
19
|
+
from synth_ai.environments.examples.red.taskset import INSTANCE as DEFAULT_TASK
|
20
|
+
|
21
|
+
|
22
|
+
class TestPokemonRedEnvironment:
|
23
|
+
"""Test Pokemon Red environment wrapper"""
|
24
|
+
|
25
|
+
@pytest.fixture
|
26
|
+
def mock_engine(self):
|
27
|
+
"""Create a mock engine"""
|
28
|
+
engine = Mock()
|
29
|
+
engine._reset_engine = AsyncMock(
|
30
|
+
return_value=(
|
31
|
+
PokemonRedPrivateState(
|
32
|
+
reward_last_step=0.0,
|
33
|
+
total_reward=0.0,
|
34
|
+
terminated=False,
|
35
|
+
truncated=False,
|
36
|
+
step_count=0,
|
37
|
+
),
|
38
|
+
create_test_public_state(
|
39
|
+
map_id=3,
|
40
|
+
player_x=10,
|
41
|
+
player_y=8,
|
42
|
+
badges=0,
|
43
|
+
in_battle=False,
|
44
|
+
party_level=10,
|
45
|
+
party_hp_current=35,
|
46
|
+
party_hp_max=35,
|
47
|
+
party_xp=1000,
|
48
|
+
step_count=0,
|
49
|
+
),
|
50
|
+
)
|
51
|
+
)
|
52
|
+
engine._step_engine = AsyncMock()
|
53
|
+
engine._serialize_engine = AsyncMock()
|
54
|
+
engine._create_states = Mock()
|
55
|
+
return engine
|
56
|
+
|
57
|
+
@patch("src.examples.red.environment.PokemonRedEngine")
|
58
|
+
def test_environment_initialization(self, mock_engine_class, mock_engine):
|
59
|
+
"""Test environment initialization"""
|
60
|
+
mock_engine_class.return_value = mock_engine
|
61
|
+
|
62
|
+
env = PokemonRedEnvironment()
|
63
|
+
|
64
|
+
assert env.name == "PokemonRed"
|
65
|
+
assert env.task_instance == DEFAULT_TASK
|
66
|
+
assert env.engine == mock_engine
|
67
|
+
assert isinstance(env._press_button_tool, PressButtonTool)
|
68
|
+
|
69
|
+
@patch("src.examples.red.environment.PokemonRedEngine")
|
70
|
+
@pytest.mark.asyncio
|
71
|
+
async def test_initialize(self, mock_engine_class, mock_engine):
|
72
|
+
"""Test environment initialization"""
|
73
|
+
mock_engine_class.return_value = mock_engine
|
74
|
+
|
75
|
+
env = PokemonRedEnvironment()
|
76
|
+
obs = await env.initialize()
|
77
|
+
|
78
|
+
mock_engine._reset_engine.assert_called_once()
|
79
|
+
assert "position" in obs
|
80
|
+
assert "badges_earned" in obs
|
81
|
+
assert obs["badges_earned"] == 0
|
82
|
+
assert obs["party_level"] == 10
|
83
|
+
|
84
|
+
@patch("src.examples.red.environment.PokemonRedEngine")
|
85
|
+
@pytest.mark.asyncio
|
86
|
+
async def test_terminate(self, mock_engine_class, mock_engine):
|
87
|
+
"""Test environment termination"""
|
88
|
+
mock_engine_class.return_value = mock_engine
|
89
|
+
mock_engine._create_states.return_value = (
|
90
|
+
PokemonRedPrivateState(
|
91
|
+
reward_last_step=0.0,
|
92
|
+
total_reward=10.5,
|
93
|
+
terminated=True,
|
94
|
+
truncated=False,
|
95
|
+
step_count=42,
|
96
|
+
),
|
97
|
+
create_test_public_state(
|
98
|
+
map_id=3,
|
99
|
+
player_x=10,
|
100
|
+
player_y=8,
|
101
|
+
badges=1,
|
102
|
+
in_battle=False,
|
103
|
+
party_level=12,
|
104
|
+
party_hp_current=30,
|
105
|
+
party_hp_max=35,
|
106
|
+
party_xp=1500,
|
107
|
+
step_count=42,
|
108
|
+
),
|
109
|
+
)
|
110
|
+
|
111
|
+
env = PokemonRedEnvironment()
|
112
|
+
obs = await env.terminate()
|
113
|
+
|
114
|
+
assert obs["terminated"] is True
|
115
|
+
assert "message" in obs
|
116
|
+
|
117
|
+
def test_validate_tool_calls_single_call(self):
|
118
|
+
"""Test tool call validation with single call"""
|
119
|
+
with patch("src.examples.red.environment.PokemonRedEngine"):
|
120
|
+
env = PokemonRedEnvironment()
|
121
|
+
|
122
|
+
call = EnvToolCall(tool="press_button", args={"button": "A"})
|
123
|
+
validated = env.validate_tool_calls(call)
|
124
|
+
|
125
|
+
assert validated == call
|
126
|
+
|
127
|
+
def test_validate_tool_calls_list(self):
|
128
|
+
"""Test tool call validation with list"""
|
129
|
+
with patch("src.examples.red.environment.PokemonRedEngine"):
|
130
|
+
env = PokemonRedEnvironment()
|
131
|
+
|
132
|
+
call = EnvToolCall(tool="press_button", args={"button": "A"})
|
133
|
+
validated = env.validate_tool_calls([call])
|
134
|
+
|
135
|
+
assert validated == call
|
136
|
+
|
137
|
+
def test_validate_tool_calls_nested_list(self):
|
138
|
+
"""Test tool call validation with nested list"""
|
139
|
+
with patch("src.examples.red.environment.PokemonRedEngine"):
|
140
|
+
env = PokemonRedEnvironment()
|
141
|
+
|
142
|
+
call = EnvToolCall(tool="press_button", args={"button": "A"})
|
143
|
+
validated = env.validate_tool_calls([[call]])
|
144
|
+
|
145
|
+
assert validated == call
|
146
|
+
|
147
|
+
def test_validate_tool_calls_invalid_tool(self):
|
148
|
+
"""Test tool call validation with invalid tool"""
|
149
|
+
with patch("src.examples.red.environment.PokemonRedEngine"):
|
150
|
+
env = PokemonRedEnvironment()
|
151
|
+
|
152
|
+
call = EnvToolCall(tool="invalid_tool", args={})
|
153
|
+
with pytest.raises(ValueError, match="Unknown tool: invalid_tool"):
|
154
|
+
env.validate_tool_calls(call)
|
155
|
+
|
156
|
+
def test_validate_tool_calls_empty_list(self):
|
157
|
+
"""Test tool call validation with empty list"""
|
158
|
+
with patch("src.examples.red.environment.PokemonRedEngine"):
|
159
|
+
env = PokemonRedEnvironment()
|
160
|
+
|
161
|
+
with pytest.raises(ValueError, match="empty list"):
|
162
|
+
env.validate_tool_calls([])
|
163
|
+
|
164
|
+
def test_validate_tool_calls_wrong_type(self):
|
165
|
+
"""Test tool call validation with wrong type"""
|
166
|
+
with patch("src.examples.red.environment.PokemonRedEngine"):
|
167
|
+
env = PokemonRedEnvironment()
|
168
|
+
|
169
|
+
with pytest.raises(TypeError):
|
170
|
+
env.validate_tool_calls("not_a_call")
|
171
|
+
|
172
|
+
@patch("src.examples.red.environment.PokemonRedEngine")
|
173
|
+
@pytest.mark.asyncio
|
174
|
+
async def test_step_successful(self, mock_engine_class, mock_engine):
|
175
|
+
"""Test successful step execution"""
|
176
|
+
mock_engine_class.return_value = mock_engine
|
177
|
+
|
178
|
+
# Mock successful tool execution
|
179
|
+
tool_result = ToolResult(
|
180
|
+
ok=True,
|
181
|
+
payload={
|
182
|
+
"private": PokemonRedPrivateState(
|
183
|
+
reward_last_step=0.1,
|
184
|
+
total_reward=0.1,
|
185
|
+
terminated=False,
|
186
|
+
truncated=False,
|
187
|
+
step_count=1,
|
188
|
+
),
|
189
|
+
"public": create_test_public_state(
|
190
|
+
map_id=3,
|
191
|
+
player_x=11,
|
192
|
+
player_y=8,
|
193
|
+
badges=0,
|
194
|
+
in_battle=False,
|
195
|
+
party_level=10,
|
196
|
+
party_hp_current=35,
|
197
|
+
party_hp_max=35,
|
198
|
+
party_xp=1000,
|
199
|
+
step_count=1,
|
200
|
+
),
|
201
|
+
},
|
202
|
+
)
|
203
|
+
|
204
|
+
env = PokemonRedEnvironment()
|
205
|
+
env._press_button_tool = AsyncMock(return_value=tool_result)
|
206
|
+
|
207
|
+
call = EnvToolCall(tool="press_button", args={"button": "RIGHT"})
|
208
|
+
obs = await env.step(call)
|
209
|
+
|
210
|
+
assert obs["position"] == "Map03:(11,8)"
|
211
|
+
assert obs["step_count"] == 1
|
212
|
+
assert obs["total_reward"] == 0.1
|
213
|
+
|
214
|
+
@patch("src.examples.red.environment.PokemonRedEngine")
|
215
|
+
@pytest.mark.asyncio
|
216
|
+
async def test_step_failed_tool(self, mock_engine_class, mock_engine):
|
217
|
+
"""Test step with failed tool execution"""
|
218
|
+
mock_engine_class.return_value = mock_engine
|
219
|
+
mock_engine._create_states.return_value = (
|
220
|
+
PokemonRedPrivateState(
|
221
|
+
reward_last_step=0.0,
|
222
|
+
total_reward=0.0,
|
223
|
+
terminated=False,
|
224
|
+
truncated=False,
|
225
|
+
step_count=0,
|
226
|
+
),
|
227
|
+
create_test_public_state(
|
228
|
+
map_id=3,
|
229
|
+
player_x=10,
|
230
|
+
player_y=8,
|
231
|
+
badges=0,
|
232
|
+
in_battle=False,
|
233
|
+
party_level=10,
|
234
|
+
party_hp_current=35,
|
235
|
+
party_hp_max=35,
|
236
|
+
party_xp=1000,
|
237
|
+
step_count=0,
|
238
|
+
error_info="Button press failed",
|
239
|
+
),
|
240
|
+
)
|
241
|
+
|
242
|
+
# Mock failed tool execution
|
243
|
+
tool_result = ToolResult(ok=False, error="Invalid button", payload={"public": {}})
|
244
|
+
|
245
|
+
env = PokemonRedEnvironment()
|
246
|
+
env._press_button_tool = AsyncMock(return_value=tool_result)
|
247
|
+
|
248
|
+
call = EnvToolCall(tool="press_button", args={"button": "INVALID"})
|
249
|
+
obs = await env.step(call)
|
250
|
+
|
251
|
+
# Should still return valid observation
|
252
|
+
assert "position" in obs
|
253
|
+
|
254
|
+
@patch("src.examples.red.environment.PokemonRedEngine")
|
255
|
+
@pytest.mark.asyncio
|
256
|
+
async def test_checkpoint(self, mock_engine_class, mock_engine):
|
257
|
+
"""Test environment checkpointing"""
|
258
|
+
mock_engine_class.return_value = mock_engine
|
259
|
+
mock_engine._serialize_engine.return_value = Mock(model_dump=lambda: {"test": "data"})
|
260
|
+
mock_engine._create_states.return_value = (
|
261
|
+
PokemonRedPrivateState(
|
262
|
+
reward_last_step=0.0,
|
263
|
+
total_reward=5.0,
|
264
|
+
terminated=False,
|
265
|
+
truncated=False,
|
266
|
+
step_count=20,
|
267
|
+
),
|
268
|
+
create_test_public_state(
|
269
|
+
map_id=4,
|
270
|
+
player_x=15,
|
271
|
+
player_y=12,
|
272
|
+
badges=1,
|
273
|
+
in_battle=False,
|
274
|
+
party_level=11,
|
275
|
+
party_hp_current=40,
|
276
|
+
party_hp_max=40,
|
277
|
+
party_xp=1200,
|
278
|
+
step_count=20,
|
279
|
+
),
|
280
|
+
)
|
281
|
+
|
282
|
+
env = PokemonRedEnvironment()
|
283
|
+
obs = await env.checkpoint()
|
284
|
+
|
285
|
+
assert "engine_snapshot_data" in obs
|
286
|
+
assert obs["step_count"] == 20
|
287
|
+
assert obs["total_reward"] == 5.0
|
288
|
+
|
289
|
+
@pytest.mark.asyncio
|
290
|
+
async def test_observation_callable(self):
|
291
|
+
"""Test observation callable functionality"""
|
292
|
+
obs_callable = PokemonRedObservationCallable()
|
293
|
+
|
294
|
+
priv_state = PokemonRedPrivateState(
|
295
|
+
reward_last_step=0.1,
|
296
|
+
total_reward=2.5,
|
297
|
+
terminated=False,
|
298
|
+
truncated=False,
|
299
|
+
step_count=25,
|
300
|
+
)
|
301
|
+
|
302
|
+
pub_state = create_test_public_state(
|
303
|
+
map_id=5,
|
304
|
+
player_x=20,
|
305
|
+
player_y=15,
|
306
|
+
badges=3, # 2 badges set
|
307
|
+
in_battle=True,
|
308
|
+
party_level=15,
|
309
|
+
party_hp_current=25,
|
310
|
+
party_hp_max=50,
|
311
|
+
party_xp=5000,
|
312
|
+
step_count=25,
|
313
|
+
error_info="Test error",
|
314
|
+
)
|
315
|
+
|
316
|
+
obs = await obs_callable.get_observation(pub_state, priv_state)
|
317
|
+
|
318
|
+
assert obs["position"] == "Map05:(20,15)"
|
319
|
+
assert obs["badges_earned"] == 2 # bin(3).count('1')
|
320
|
+
assert obs["badges_bitfield"] == 3
|
321
|
+
assert obs["hp_status"] == "HP: 25/50 (50%)"
|
322
|
+
assert obs["party_level"] == 15
|
323
|
+
assert obs["in_battle"] is True
|
324
|
+
assert obs["step_count"] == 25
|
325
|
+
assert obs["total_reward"] == 2.5
|
326
|
+
assert obs["error"] == "Test error"
|
327
|
+
|
328
|
+
|
329
|
+
class TestPressButtonTool:
|
330
|
+
"""Test the press button tool"""
|
331
|
+
|
332
|
+
@pytest.fixture
|
333
|
+
def mock_engine(self):
|
334
|
+
"""Create a mock engine for tool testing"""
|
335
|
+
engine = Mock()
|
336
|
+
engine._step_engine = AsyncMock(
|
337
|
+
return_value=(
|
338
|
+
PokemonRedPrivateState(
|
339
|
+
reward_last_step=0.0,
|
340
|
+
total_reward=0.0,
|
341
|
+
terminated=False,
|
342
|
+
truncated=False,
|
343
|
+
step_count=1,
|
344
|
+
),
|
345
|
+
create_test_public_state(
|
346
|
+
map_id=3,
|
347
|
+
player_x=10,
|
348
|
+
player_y=8,
|
349
|
+
badges=0,
|
350
|
+
in_battle=False,
|
351
|
+
party_level=10,
|
352
|
+
party_hp_current=35,
|
353
|
+
party_hp_max=35,
|
354
|
+
party_xp=1000,
|
355
|
+
step_count=1,
|
356
|
+
),
|
357
|
+
)
|
358
|
+
)
|
359
|
+
return engine
|
360
|
+
|
361
|
+
@pytest.mark.asyncio
|
362
|
+
async def test_press_button_tool_success(self, mock_engine):
|
363
|
+
"""Test successful button press tool execution"""
|
364
|
+
tool = PressButtonTool(mock_engine)
|
365
|
+
|
366
|
+
call = EnvToolCall(tool="press_button", args={"button": "A", "frames": 2})
|
367
|
+
result = await tool(call)
|
368
|
+
|
369
|
+
assert result.ok is True
|
370
|
+
assert "public" in result.payload
|
371
|
+
assert "private" in result.payload
|
372
|
+
mock_engine._step_engine.assert_called_once_with({"button": "A", "frames": 2})
|
373
|
+
|
374
|
+
@pytest.mark.asyncio
|
375
|
+
async def test_press_button_tool_invalid_args(self, mock_engine):
|
376
|
+
"""Test button press tool with invalid arguments"""
|
377
|
+
tool = PressButtonTool(mock_engine)
|
378
|
+
mock_engine._create_states.return_value = (Mock(), Mock())
|
379
|
+
|
380
|
+
# Missing required button argument
|
381
|
+
call = EnvToolCall(tool="press_button", args={"frames": 1})
|
382
|
+
result = await tool(call)
|
383
|
+
|
384
|
+
assert result.ok is False
|
385
|
+
assert result.error is not None
|
386
|
+
|
387
|
+
@pytest.mark.asyncio
|
388
|
+
async def test_press_button_tool_engine_error(self, mock_engine):
|
389
|
+
"""Test button press tool when engine raises error"""
|
390
|
+
tool = PressButtonTool(mock_engine)
|
391
|
+
mock_engine._step_engine.side_effect = Exception("Engine error")
|
392
|
+
mock_engine._create_states.return_value = (Mock(), Mock())
|
393
|
+
|
394
|
+
call = EnvToolCall(tool="press_button", args={"button": "A"})
|
395
|
+
result = await tool(call)
|
396
|
+
|
397
|
+
assert result.ok is False
|
398
|
+
assert "Engine error" in result.error
|
399
|
+
|
400
|
+
|
401
|
+
# Helper function to create properly structured PokemonRedPublicState
|
402
|
+
def create_test_public_state(
|
403
|
+
map_id: int = 3,
|
404
|
+
player_x: int = 10,
|
405
|
+
player_y: int = 8,
|
406
|
+
badges: int = 0,
|
407
|
+
in_battle: bool = False,
|
408
|
+
party_level: int = 10,
|
409
|
+
party_hp_current: int = 35,
|
410
|
+
party_hp_max: int = 35,
|
411
|
+
party_xp: int = 1000,
|
412
|
+
step_count: int = 0,
|
413
|
+
error_info: str = None,
|
414
|
+
) -> PokemonRedPublicState:
|
415
|
+
"""Create a properly structured PokemonRedPublicState for testing"""
|
416
|
+
|
417
|
+
# Create structured components
|
418
|
+
world = GameWorldState(map_id=map_id, player_x=player_x, player_y=player_y)
|
419
|
+
|
420
|
+
progress = PlayerProgressState(
|
421
|
+
badges=badges,
|
422
|
+
badge_count=badges, # badge_count should match badges
|
423
|
+
money=3000,
|
424
|
+
step_count=step_count,
|
425
|
+
)
|
426
|
+
|
427
|
+
system = GameSystemState(
|
428
|
+
in_battle=in_battle,
|
429
|
+
battle_outcome=0,
|
430
|
+
menu_state=1,
|
431
|
+
text_box_active=False,
|
432
|
+
warp_flag=207,
|
433
|
+
)
|
434
|
+
|
435
|
+
# Create party if stats are provided
|
436
|
+
party = []
|
437
|
+
if party_level > 0:
|
438
|
+
pokemon = PokemonData(
|
439
|
+
species_id=25, # Pikachu
|
440
|
+
level=party_level,
|
441
|
+
hp_current=party_hp_current,
|
442
|
+
hp_max=party_hp_max,
|
443
|
+
xp=party_xp,
|
444
|
+
hp_percentage=party_hp_current / party_hp_max * 100.0 if party_hp_max > 0 else 0.0,
|
445
|
+
)
|
446
|
+
party.append(pokemon)
|
447
|
+
|
448
|
+
return PokemonRedPublicState(
|
449
|
+
world=world,
|
450
|
+
progress=progress,
|
451
|
+
party=party,
|
452
|
+
inventory=[],
|
453
|
+
system=system,
|
454
|
+
error_info=error_info,
|
455
|
+
)
|
@@ -0,0 +1,227 @@
|
|
1
|
+
import pytest
|
2
|
+
import asyncio
|
3
|
+
import uuid
|
4
|
+
|
5
|
+
from synth_ai.environments.examples.red.environment import (
|
6
|
+
PokemonRedEnvironment,
|
7
|
+
PokemonRedPublicState,
|
8
|
+
PokemonRedPrivateState,
|
9
|
+
)
|
10
|
+
from synth_ai.environments.environment.shared_engine import (
|
11
|
+
GetObservationCallable,
|
12
|
+
InternalObservation,
|
13
|
+
)
|
14
|
+
from synth_ai.environments.examples.red.taskset import PokemonRedTaskInstance
|
15
|
+
from synth_ai.environments.tasks.core import Impetus, Intent, TaskInstanceMetadata
|
16
|
+
from synth_ai.environments.environment.tools import EnvToolCall
|
17
|
+
|
18
|
+
|
19
|
+
class PressButtonCall(EnvToolCall):
|
20
|
+
"""Helper class for creating button press calls"""
|
21
|
+
|
22
|
+
def __init__(self, button: str, frames: int = 1):
|
23
|
+
super().__init__(tool="press_button", args={"button": button, "frames": frames})
|
24
|
+
|
25
|
+
|
26
|
+
class ExplorationObservationCallable(GetObservationCallable):
|
27
|
+
"""Observation callable for exploration testing"""
|
28
|
+
|
29
|
+
def __init__(self):
|
30
|
+
self.screen_buffer = None
|
31
|
+
|
32
|
+
async def get_observation(
|
33
|
+
self, pub: PokemonRedPublicState, priv: PokemonRedPrivateState
|
34
|
+
) -> InternalObservation:
|
35
|
+
if pub is None or priv is None:
|
36
|
+
raise RuntimeError("Missing public or private state in get_observation")
|
37
|
+
|
38
|
+
formatted_obs = (
|
39
|
+
f"Step: {pub.step_count}, Position: ({pub.player_x}, {pub.player_y}), Map: {pub.map_id}"
|
40
|
+
)
|
41
|
+
|
42
|
+
return {
|
43
|
+
"public": pub,
|
44
|
+
"private": priv,
|
45
|
+
"formatted_obs": formatted_obs,
|
46
|
+
"screen_buffer": self.screen_buffer,
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
async def test_exploration_when_stuck():
|
51
|
+
"""
|
52
|
+
Test what happens when we try different buttons in the initial game state.
|
53
|
+
"""
|
54
|
+
print("\n" + "=" * 80)
|
55
|
+
print("EXPLORATION STRATEGY TEST - FINDING AVAILABLE ACTIONS")
|
56
|
+
print("=" * 80)
|
57
|
+
|
58
|
+
# Create a task instance
|
59
|
+
task_metadata = TaskInstanceMetadata()
|
60
|
+
inst = PokemonRedTaskInstance(
|
61
|
+
id=uuid.uuid4(),
|
62
|
+
impetus=Impetus(instructions="Explore available actions in initial state."),
|
63
|
+
intent=Intent(
|
64
|
+
rubric={"goal": "Find working actions"},
|
65
|
+
gold_trajectories=None,
|
66
|
+
gold_state_diff={},
|
67
|
+
),
|
68
|
+
metadata=task_metadata,
|
69
|
+
is_reproducible=True,
|
70
|
+
initial_engine_snapshot=None,
|
71
|
+
)
|
72
|
+
|
73
|
+
exploration_obs = ExplorationObservationCallable()
|
74
|
+
env = PokemonRedEnvironment(inst, custom_step_obs=exploration_obs)
|
75
|
+
|
76
|
+
try:
|
77
|
+
# Initialize environment
|
78
|
+
print("\n[DEBUG] Initializing environment...")
|
79
|
+
obs_payload = await env.initialize()
|
80
|
+
|
81
|
+
if "error" in obs_payload:
|
82
|
+
pytest.fail(f"Environment initialization failed: {obs_payload['error']}")
|
83
|
+
|
84
|
+
print("[DEBUG] Environment initialized successfully")
|
85
|
+
|
86
|
+
# Get initial state
|
87
|
+
initial_pub = obs_payload["public"]
|
88
|
+
initial_position = (initial_pub.player_x, initial_pub.player_y)
|
89
|
+
initial_map_id = initial_pub.map_id
|
90
|
+
|
91
|
+
print(f"[DEBUG] Initial position: {initial_position}")
|
92
|
+
print(f"[DEBUG] Initial map ID: {initial_map_id}")
|
93
|
+
|
94
|
+
# Test all available buttons systematically
|
95
|
+
buttons_to_test = ["A", "B", "UP", "DOWN", "LEFT", "RIGHT", "START", "SELECT"]
|
96
|
+
|
97
|
+
results = {}
|
98
|
+
|
99
|
+
for button in buttons_to_test:
|
100
|
+
print(f"\n--- Testing {button} button ---")
|
101
|
+
|
102
|
+
# Get state before button press
|
103
|
+
before_pub = obs_payload["public"]
|
104
|
+
before_position = (before_pub.player_x, before_pub.player_y)
|
105
|
+
before_map = before_pub.map_id
|
106
|
+
|
107
|
+
print(f"Before {button}: pos={before_position}, map={before_map}")
|
108
|
+
|
109
|
+
# Press the button
|
110
|
+
step_result = await env.step([[PressButtonCall(button)]])
|
111
|
+
|
112
|
+
if "error" in step_result:
|
113
|
+
print(f"[ERROR] {button} button failed: {step_result['error']}")
|
114
|
+
results[button] = {"error": step_result["error"]}
|
115
|
+
continue
|
116
|
+
|
117
|
+
# Check state after button press
|
118
|
+
after_pub = step_result["public"]
|
119
|
+
after_position = (after_pub.player_x, after_pub.player_y)
|
120
|
+
after_map = after_pub.map_id
|
121
|
+
|
122
|
+
print(f"After {button}: pos={after_position}, map={after_map}")
|
123
|
+
|
124
|
+
# Analyze what changed
|
125
|
+
position_changed = after_position != before_position
|
126
|
+
map_changed = after_map != before_map
|
127
|
+
|
128
|
+
# Check if any other state changed
|
129
|
+
state_changes = []
|
130
|
+
if position_changed:
|
131
|
+
state_changes.append(f"position: {before_position} -> {after_position}")
|
132
|
+
if map_changed:
|
133
|
+
state_changes.append(f"map: {before_map} -> {after_map}")
|
134
|
+
|
135
|
+
# Check other state attributes
|
136
|
+
if hasattr(before_pub, "party_level") and hasattr(after_pub, "party_level"):
|
137
|
+
if before_pub.party_level != after_pub.party_level:
|
138
|
+
state_changes.append(
|
139
|
+
f"party_level: {before_pub.party_level} -> {after_pub.party_level}"
|
140
|
+
)
|
141
|
+
|
142
|
+
if hasattr(before_pub, "badges") and hasattr(after_pub, "badges"):
|
143
|
+
if before_pub.badges != after_pub.badges:
|
144
|
+
state_changes.append(f"badges: {before_pub.badges} -> {after_pub.badges}")
|
145
|
+
|
146
|
+
results[button] = {
|
147
|
+
"position_changed": position_changed,
|
148
|
+
"map_changed": map_changed,
|
149
|
+
"state_changes": state_changes,
|
150
|
+
"effective": len(state_changes) > 0,
|
151
|
+
}
|
152
|
+
|
153
|
+
if state_changes:
|
154
|
+
print(f"[SUCCESS] {button} caused changes: {', '.join(state_changes)}")
|
155
|
+
else:
|
156
|
+
print(f"[NO EFFECT] {button} had no visible effect")
|
157
|
+
|
158
|
+
# Update obs_payload for next test
|
159
|
+
obs_payload = step_result
|
160
|
+
|
161
|
+
# Analysis and recommendations
|
162
|
+
print("\n" + "=" * 80)
|
163
|
+
print("EXPLORATION RESULTS AND RECOMMENDATIONS")
|
164
|
+
print("=" * 80)
|
165
|
+
|
166
|
+
effective_buttons = [
|
167
|
+
btn
|
168
|
+
for btn, result in results.items()
|
169
|
+
if isinstance(result, dict) and result.get("effective", False)
|
170
|
+
]
|
171
|
+
|
172
|
+
ineffective_buttons = [
|
173
|
+
btn
|
174
|
+
for btn, result in results.items()
|
175
|
+
if isinstance(result, dict) and not result.get("effective", False)
|
176
|
+
]
|
177
|
+
|
178
|
+
error_buttons = [
|
179
|
+
btn for btn, result in results.items() if isinstance(result, dict) and "error" in result
|
180
|
+
]
|
181
|
+
|
182
|
+
print(f"\n✅ EFFECTIVE BUTTONS ({len(effective_buttons)}): {', '.join(effective_buttons)}")
|
183
|
+
for btn in effective_buttons:
|
184
|
+
changes = results[btn]["state_changes"]
|
185
|
+
print(f" {btn}: {', '.join(changes)}")
|
186
|
+
|
187
|
+
print(
|
188
|
+
f"\n❌ INEFFECTIVE BUTTONS ({len(ineffective_buttons)}): {', '.join(ineffective_buttons)}"
|
189
|
+
)
|
190
|
+
|
191
|
+
if error_buttons:
|
192
|
+
print(f"\n🚫 ERROR BUTTONS ({len(error_buttons)}): {', '.join(error_buttons)}")
|
193
|
+
|
194
|
+
# Recommendations
|
195
|
+
print("\n💡 RECOMMENDATIONS:")
|
196
|
+
if effective_buttons:
|
197
|
+
print(f" - Agent should prioritize: {', '.join(effective_buttons[:3])}")
|
198
|
+
print(" - These buttons cause state changes and may lead to progress")
|
199
|
+
else:
|
200
|
+
print(" - No buttons caused state changes in this initial position")
|
201
|
+
print(" - May need to investigate game state or save file")
|
202
|
+
|
203
|
+
if "LEFT" in effective_buttons or "RIGHT" in effective_buttons:
|
204
|
+
print(" - Movement is working - agent should explore the area")
|
205
|
+
|
206
|
+
if "A" not in effective_buttons:
|
207
|
+
print(" - 'A' button ineffective at this position - agent needs to move first")
|
208
|
+
|
209
|
+
return results
|
210
|
+
|
211
|
+
except Exception as e:
|
212
|
+
print(f"[ERROR] Test failed with exception: {e}")
|
213
|
+
raise
|
214
|
+
|
215
|
+
|
216
|
+
@pytest.mark.asyncio
|
217
|
+
async def test_exploration_strategy():
|
218
|
+
"""Main test function"""
|
219
|
+
results = await test_exploration_when_stuck()
|
220
|
+
|
221
|
+
# The test always passes but provides diagnostic information
|
222
|
+
assert True, "Exploration strategy test completed - see output for recommendations"
|
223
|
+
|
224
|
+
|
225
|
+
if __name__ == "__main__":
|
226
|
+
# Run the test directly
|
227
|
+
asyncio.run(test_exploration_strategy())
|