synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +28 -2
- synth_ai/core/system.py +4 -0
- synth_ai/environments/__init__.py +35 -0
- synth_ai/environments/environment/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/base.py +50 -0
- synth_ai/environments/environment/core.py +22 -0
- synth_ai/environments/environment/db/__init__.py +1 -0
- synth_ai/environments/environment/db/sqlite.py +45 -0
- synth_ai/environments/environment/registry.py +24 -0
- synth_ai/environments/environment/resources/sqlite.py +46 -0
- synth_ai/environments/environment/results.py +1 -0
- synth_ai/environments/environment/rewards/__init__.py +1 -0
- synth_ai/environments/environment/rewards/core.py +28 -0
- synth_ai/environments/environment/shared_engine.py +26 -0
- synth_ai/environments/environment/tools/__init__.py +34 -0
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/engine.py +502 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/environment.py +255 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
- synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
- synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
- synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
- synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
- synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
- synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
- synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
- synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
- synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/red/test_fixes.py +125 -0
- synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
- synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
- synth_ai/environments/examples/red/units/test_engine.py +192 -0
- synth_ai/environments/examples/red/units/test_environment.py +455 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
- synth_ai/environments/examples/red/units/test_integration.py +217 -0
- synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
- synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
- synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
- synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
- synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
- synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
- synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
- synth_ai/environments/examples/red/units/test_taskset.py +116 -0
- synth_ai/environments/examples/red/units/test_tree.py +448 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
- synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
- synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
- synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
- synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
- synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
- synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
- synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
- synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
- synth_ai/environments/reproducibility/core.py +42 -0
- synth_ai/environments/reproducibility/tree.py +364 -0
- synth_ai/environments/service/app.py +78 -0
- synth_ai/environments/service/core_routes.py +775 -0
- synth_ai/environments/service/external_registry.py +57 -0
- synth_ai/environments/service/registry.py +9 -0
- synth_ai/environments/stateful/__init__.py +1 -0
- synth_ai/environments/stateful/core.py +28 -0
- synth_ai/environments/stateful/engine.py +21 -0
- synth_ai/environments/stateful/state.py +7 -0
- synth_ai/environments/tasks/api.py +19 -0
- synth_ai/environments/tasks/core.py +78 -0
- synth_ai/environments/tasks/filters.py +39 -0
- synth_ai/environments/tasks/utils.py +89 -0
- synth_ai/environments/v0_observability/history.py +3 -0
- synth_ai/environments/v0_observability/log.py +2 -0
- synth_ai/lm/caching/constants.py +1 -0
- synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
- synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
- synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
- synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
- synth_ai/{zyk/lms → lm}/config.py +2 -1
- synth_ai/{zyk/lms → lm}/constants.py +2 -2
- synth_ai/{zyk/lms → lm}/core/all.py +10 -10
- synth_ai/{zyk/lms → lm}/core/main.py +57 -33
- synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
- synth_ai/lm/cost/monitor.py +1 -0
- synth_ai/lm/cost/statefulness.py +1 -0
- synth_ai/lm/provider_support/__init__.py +8 -0
- synth_ai/lm/provider_support/anthropic.py +945 -0
- synth_ai/lm/provider_support/openai.py +1115 -0
- synth_ai/lm/provider_support/suppress_logging.py +31 -0
- synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
- synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
- synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
- synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
- synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
- synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
- synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
- synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
- synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
- synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
- synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
- synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
- synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
- synth_ai/tracing/__init__.py +0 -0
- synth_ai/tracing/abstractions.py +224 -0
- synth_ai/tracing/base_client.py +91 -0
- synth_ai/tracing/client_manager.py +131 -0
- synth_ai/tracing/config.py +140 -0
- synth_ai/tracing/context.py +146 -0
- synth_ai/tracing/decorators.py +679 -0
- synth_ai/tracing/events/__init__.py +0 -0
- synth_ai/tracing/events/manage.py +147 -0
- synth_ai/tracing/events/scope.py +86 -0
- synth_ai/tracing/events/store.py +227 -0
- synth_ai/tracing/immediate_client.py +152 -0
- synth_ai/tracing/local.py +18 -0
- synth_ai/tracing/log_client_base.py +74 -0
- synth_ai/tracing/retry_queue.py +187 -0
- synth_ai/tracing/trackers.py +515 -0
- synth_ai/tracing/upload.py +504 -0
- synth_ai/tracing/utils.py +9 -0
- synth_ai/zyk/__init__.py +28 -2
- synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
- synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai-0.1.9.dist-info/METADATA +0 -37
- synth_ai-0.1.9.dist-info/RECORD +0 -50
- /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
- /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
- /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
- /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
- /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,217 @@
|
|
1
|
+
import pytest
|
2
|
+
from synth_ai.environments.examples.red.environment import PokemonRedEnvironment
|
3
|
+
from synth_ai.environments.examples.red.taskset import INSTANCE as POKEMON_TASK
|
4
|
+
from synth_ai.environments.environment.tools import EnvToolCall
|
5
|
+
|
6
|
+
|
7
|
+
class TestPokemonRedIntegration:
|
8
|
+
"""Integration tests for Pokemon Red environment with REAL ROM"""
|
9
|
+
|
10
|
+
@pytest.mark.asyncio
|
11
|
+
async def test_full_workflow_real(self):
|
12
|
+
"""Test complete workflow from initialization to termination with REAL ROM"""
|
13
|
+
# Initialize environment with real ROM
|
14
|
+
env = PokemonRedEnvironment(POKEMON_TASK)
|
15
|
+
|
16
|
+
# Test initialization
|
17
|
+
obs = await env.initialize()
|
18
|
+
assert "position" in obs
|
19
|
+
assert "badges_earned" in obs
|
20
|
+
# Note: badges_earned might be 0 or could have some initial value from ROM
|
21
|
+
assert isinstance(obs["badges_earned"], int)
|
22
|
+
|
23
|
+
# Test series of actions
|
24
|
+
actions = [
|
25
|
+
EnvToolCall(tool="press_button", args={"button": "RIGHT", "frames": 1}),
|
26
|
+
EnvToolCall(tool="press_button", args={"button": "UP", "frames": 2}),
|
27
|
+
EnvToolCall(tool="press_button", args={"button": "A", "frames": 1}),
|
28
|
+
]
|
29
|
+
|
30
|
+
for action in actions:
|
31
|
+
obs = await env.step(action)
|
32
|
+
assert "step_count" in obs
|
33
|
+
assert "total_reward" in obs
|
34
|
+
assert isinstance(obs["step_count"], int)
|
35
|
+
assert isinstance(obs["total_reward"], float)
|
36
|
+
|
37
|
+
# Test checkpointing
|
38
|
+
checkpoint_obs = await env.checkpoint()
|
39
|
+
assert "engine_snapshot_data" in checkpoint_obs
|
40
|
+
|
41
|
+
# Test termination
|
42
|
+
final_obs = await env.terminate()
|
43
|
+
assert final_obs["terminated"] is True
|
44
|
+
|
45
|
+
@pytest.mark.asyncio
|
46
|
+
async def test_button_sequence_real(self):
|
47
|
+
"""Test sequence of different button presses with real ROM"""
|
48
|
+
env = PokemonRedEnvironment(POKEMON_TASK)
|
49
|
+
await env.initialize()
|
50
|
+
|
51
|
+
# Test all basic buttons
|
52
|
+
buttons = ["A", "B", "UP", "DOWN", "LEFT", "RIGHT", "START", "SELECT"]
|
53
|
+
|
54
|
+
for i, button in enumerate(buttons):
|
55
|
+
action = EnvToolCall(tool="press_button", args={"button": button, "frames": 1})
|
56
|
+
obs = await env.step(action)
|
57
|
+
|
58
|
+
assert obs["step_count"] == i + 1
|
59
|
+
assert "position" in obs
|
60
|
+
assert "badges_earned" in obs
|
61
|
+
assert "hp_status" in obs
|
62
|
+
assert "party_level" in obs
|
63
|
+
|
64
|
+
@pytest.mark.asyncio
|
65
|
+
async def test_multiple_frame_actions_real(self):
|
66
|
+
"""Test actions with multiple frames using real ROM"""
|
67
|
+
env = PokemonRedEnvironment(POKEMON_TASK)
|
68
|
+
await env.initialize()
|
69
|
+
|
70
|
+
# Test holding buttons for multiple frames
|
71
|
+
action = EnvToolCall(tool="press_button", args={"button": "RIGHT", "frames": 5})
|
72
|
+
obs = await env.step(action)
|
73
|
+
|
74
|
+
assert obs["step_count"] == 1 # Should count as one step
|
75
|
+
assert "position" in obs
|
76
|
+
|
77
|
+
# Test another multi-frame action
|
78
|
+
action = EnvToolCall(tool="press_button", args={"button": "A", "frames": 3})
|
79
|
+
obs = await env.step(action)
|
80
|
+
|
81
|
+
assert obs["step_count"] == 2
|
82
|
+
|
83
|
+
@pytest.mark.asyncio
|
84
|
+
async def test_state_consistency_real(self):
|
85
|
+
"""Test that game state remains consistent across steps with real ROM"""
|
86
|
+
env = PokemonRedEnvironment(POKEMON_TASK)
|
87
|
+
|
88
|
+
obs1 = await env.initialize()
|
89
|
+
initial_position = obs1["position"]
|
90
|
+
initial_badges = obs1["badges_earned"]
|
91
|
+
initial_hp = obs1["hp_status"]
|
92
|
+
|
93
|
+
# Take some actions
|
94
|
+
for _ in range(5):
|
95
|
+
action = EnvToolCall(tool="press_button", args={"button": "A", "frames": 1})
|
96
|
+
obs = await env.step(action)
|
97
|
+
|
98
|
+
# State should remain valid even if unchanged
|
99
|
+
assert "position" in obs
|
100
|
+
assert "badges_earned" in obs
|
101
|
+
assert "hp_status" in obs
|
102
|
+
assert isinstance(obs["badges_earned"], int)
|
103
|
+
|
104
|
+
@pytest.mark.asyncio
|
105
|
+
async def test_reward_accumulation_real(self):
|
106
|
+
"""Test that rewards accumulate properly with real ROM"""
|
107
|
+
env = PokemonRedEnvironment(POKEMON_TASK)
|
108
|
+
|
109
|
+
obs = await env.initialize()
|
110
|
+
initial_reward = obs["total_reward"]
|
111
|
+
|
112
|
+
# Take several steps and track reward changes
|
113
|
+
for i in range(3):
|
114
|
+
action = EnvToolCall(tool="press_button", args={"button": "DOWN", "frames": 1})
|
115
|
+
obs = await env.step(action)
|
116
|
+
|
117
|
+
# Reward should change (likely negative step penalty)
|
118
|
+
assert obs["total_reward"] != initial_reward
|
119
|
+
assert isinstance(obs["total_reward"], float)
|
120
|
+
assert obs["step_count"] == i + 1
|
121
|
+
|
122
|
+
@pytest.mark.asyncio
|
123
|
+
async def test_checkpointing_real(self):
|
124
|
+
"""Test checkpointing functionality with real ROM"""
|
125
|
+
env = PokemonRedEnvironment(POKEMON_TASK)
|
126
|
+
|
127
|
+
# Initialize and take some steps
|
128
|
+
await env.initialize()
|
129
|
+
action = EnvToolCall(tool="press_button", args={"button": "RIGHT", "frames": 1})
|
130
|
+
await env.step(action)
|
131
|
+
|
132
|
+
# Create checkpoint
|
133
|
+
checkpoint_obs = await env.checkpoint()
|
134
|
+
|
135
|
+
assert "engine_snapshot_data" in checkpoint_obs
|
136
|
+
snapshot = checkpoint_obs["engine_snapshot_data"]
|
137
|
+
assert "state_data" in snapshot
|
138
|
+
assert "total_reward" in snapshot
|
139
|
+
assert "step_count" in snapshot
|
140
|
+
assert isinstance(snapshot["total_reward"], float)
|
141
|
+
assert isinstance(snapshot["step_count"], int)
|
142
|
+
|
143
|
+
@pytest.mark.asyncio
|
144
|
+
async def test_invalid_button_handling_real(self):
|
145
|
+
"""Test handling of invalid buttons with real ROM"""
|
146
|
+
env = PokemonRedEnvironment(POKEMON_TASK)
|
147
|
+
await env.initialize()
|
148
|
+
|
149
|
+
# Try invalid button
|
150
|
+
action = EnvToolCall(tool="press_button", args={"button": "INVALID", "frames": 1})
|
151
|
+
|
152
|
+
# Should handle gracefully and return valid observation
|
153
|
+
obs = await env.step(action)
|
154
|
+
assert "position" in obs
|
155
|
+
assert "step_count" in obs
|
156
|
+
|
157
|
+
@pytest.mark.asyncio
|
158
|
+
async def test_observation_format_real(self):
|
159
|
+
"""Test that observations have expected format with real ROM"""
|
160
|
+
env = PokemonRedEnvironment(POKEMON_TASK)
|
161
|
+
|
162
|
+
obs = await env.initialize()
|
163
|
+
|
164
|
+
# Check required observation keys (based on actual observation format)
|
165
|
+
required_keys = [
|
166
|
+
"position",
|
167
|
+
"badges_earned",
|
168
|
+
"badges_bitfield",
|
169
|
+
"hp_status",
|
170
|
+
"party_level",
|
171
|
+
"party_xp",
|
172
|
+
"in_battle",
|
173
|
+
"step_count",
|
174
|
+
"reward_last_step",
|
175
|
+
"total_reward",
|
176
|
+
"terminated",
|
177
|
+
]
|
178
|
+
|
179
|
+
for key in required_keys:
|
180
|
+
assert key in obs, f"Missing key: {key}"
|
181
|
+
|
182
|
+
# Check types
|
183
|
+
assert isinstance(obs["position"], str)
|
184
|
+
assert isinstance(obs["badges_earned"], int)
|
185
|
+
assert isinstance(obs["badges_bitfield"], int)
|
186
|
+
assert isinstance(obs["hp_status"], str)
|
187
|
+
assert isinstance(obs["party_level"], int)
|
188
|
+
assert isinstance(obs["party_xp"], int)
|
189
|
+
assert isinstance(obs["in_battle"], bool)
|
190
|
+
assert isinstance(obs["step_count"], int)
|
191
|
+
assert isinstance(obs["reward_last_step"], float)
|
192
|
+
assert isinstance(obs["total_reward"], float)
|
193
|
+
assert isinstance(obs["terminated"], bool)
|
194
|
+
|
195
|
+
@pytest.mark.asyncio
|
196
|
+
async def test_rom_memory_integration_real(self):
|
197
|
+
"""Test that we can access and read ROM memory consistently"""
|
198
|
+
env = PokemonRedEnvironment(POKEMON_TASK)
|
199
|
+
await env.initialize()
|
200
|
+
|
201
|
+
# Should be able to access engine and emulator
|
202
|
+
assert env.engine is not None
|
203
|
+
assert env.engine.emulator is not None
|
204
|
+
assert hasattr(env.engine.emulator, "memory")
|
205
|
+
|
206
|
+
# Memory reads should be consistent
|
207
|
+
memory = env.engine.emulator.memory
|
208
|
+
badge_flags1 = memory[0xD356]
|
209
|
+
badge_flags2 = memory[0xD356]
|
210
|
+
assert badge_flags1 == badge_flags2 # Should be deterministic
|
211
|
+
|
212
|
+
# After taking an action, memory should still be accessible
|
213
|
+
action = EnvToolCall(tool="press_button", args={"button": "A", "frames": 1})
|
214
|
+
await env.step(action)
|
215
|
+
|
216
|
+
badge_flags3 = memory[0xD356]
|
217
|
+
assert isinstance(badge_flags3, int) # Should still be valid
|
@@ -0,0 +1,111 @@
|
|
1
|
+
from synth_ai.environments.examples.red.engine_helpers.state_extraction import (
|
2
|
+
extract_game_state,
|
3
|
+
get_badge_count,
|
4
|
+
format_position,
|
5
|
+
format_hp_status,
|
6
|
+
get_byte,
|
7
|
+
get_word,
|
8
|
+
get_3byte_int,
|
9
|
+
)
|
10
|
+
from synth_ai.environments.examples.red.engine_helpers.memory_map import *
|
11
|
+
|
12
|
+
|
13
|
+
class TestMemoryExtraction:
|
14
|
+
"""Test memory extraction functions"""
|
15
|
+
|
16
|
+
def test_get_byte(self):
|
17
|
+
"""Test single byte extraction"""
|
18
|
+
memory = bytearray([0x00, 0x42, 0xFF, 0x80])
|
19
|
+
assert get_byte(memory, 0) == 0x00
|
20
|
+
assert get_byte(memory, 1) == 0x42
|
21
|
+
assert get_byte(memory, 2) == 0xFF
|
22
|
+
assert get_byte(memory, 3) == 0x80
|
23
|
+
|
24
|
+
# Test bounds checking
|
25
|
+
assert get_byte(memory, 100) == 0
|
26
|
+
|
27
|
+
def test_get_word(self):
|
28
|
+
"""Test 16-bit word extraction (little endian)"""
|
29
|
+
memory = bytearray([0x34, 0x12, 0xFF, 0x00])
|
30
|
+
assert get_word(memory, 0) == 0x1234
|
31
|
+
assert get_word(memory, 2) == 0x00FF
|
32
|
+
|
33
|
+
def test_get_3byte_int(self):
|
34
|
+
"""Test 24-bit integer extraction for XP values"""
|
35
|
+
memory = bytearray([0x56, 0x34, 0x12, 0x00])
|
36
|
+
assert get_3byte_int(memory, 0) == 0x123456
|
37
|
+
|
38
|
+
def test_extract_game_state(self):
|
39
|
+
"""Test full game state extraction"""
|
40
|
+
# Create mock Game Boy memory
|
41
|
+
memory = bytearray(0x10000) # 64KB
|
42
|
+
|
43
|
+
# Set test values at known addresses
|
44
|
+
memory[MAP_ID] = 0x03 # Pewter City
|
45
|
+
memory[PLAYER_X] = 10 # X position
|
46
|
+
memory[PLAYER_Y] = 8 # Y position
|
47
|
+
memory[BADGE_FLAGS] = 0x01 # Boulder Badge
|
48
|
+
memory[IN_BATTLE_FLAG] = 0 # Not in battle
|
49
|
+
memory[PARTY_COUNT] = 1 # One Pokemon in party
|
50
|
+
memory[PARTY_LEVELS] = 12 # Level 12
|
51
|
+
memory[PARTY_HP_CURRENT] = 35 # Current HP (low byte)
|
52
|
+
memory[PARTY_HP_CURRENT + 1] = 0 # Current HP (high byte)
|
53
|
+
memory[PARTY_HP_MAX] = 42 # Max HP (low byte)
|
54
|
+
memory[PARTY_HP_MAX + 1] = 0 # Max HP (high byte)
|
55
|
+
memory[PARTY_XP] = 0x40 # XP (low byte)
|
56
|
+
memory[PARTY_XP + 1] = 0x42 # XP (mid byte)
|
57
|
+
memory[PARTY_XP + 2] = 0x0F # XP (high byte)
|
58
|
+
|
59
|
+
state = extract_game_state(memory)
|
60
|
+
|
61
|
+
assert state["map_id"] == 0x03
|
62
|
+
assert state["player_x"] == 10
|
63
|
+
assert state["player_y"] == 8
|
64
|
+
assert state["badges"] == 0x01
|
65
|
+
assert state["in_battle"] == False
|
66
|
+
assert state["party_level"] == 12
|
67
|
+
assert state["party_hp_current"] == 35
|
68
|
+
assert state["party_hp_max"] == 42
|
69
|
+
assert state["party_xp"] == 0x0F4240 # 1000000 in decimal
|
70
|
+
|
71
|
+
def test_get_badge_count(self):
|
72
|
+
"""Test badge counting from bitfield"""
|
73
|
+
assert get_badge_count(0x00) == 0 # No badges
|
74
|
+
assert get_badge_count(0x01) == 1 # Boulder Badge
|
75
|
+
assert get_badge_count(0x03) == 2 # Boulder + Cascade
|
76
|
+
assert get_badge_count(0xFF) == 8 # All badges
|
77
|
+
assert get_badge_count(0x55) == 4 # Every other badge
|
78
|
+
|
79
|
+
def test_format_position(self):
|
80
|
+
"""Test position formatting"""
|
81
|
+
assert format_position(10, 8, 3) == "Map03:(10,8)"
|
82
|
+
assert format_position(0, 0, 255) == "MapFF:(0,0)"
|
83
|
+
|
84
|
+
def test_format_hp_status(self):
|
85
|
+
"""Test HP status formatting"""
|
86
|
+
assert format_hp_status(35, 50) == "HP: 35/50 (70%)"
|
87
|
+
assert format_hp_status(0, 35) == "HP: 0/35 (0%)"
|
88
|
+
assert format_hp_status(35, 35) == "HP: 35/35 (100%)"
|
89
|
+
assert format_hp_status(10, 0) == "HP: Unknown"
|
90
|
+
|
91
|
+
def test_memory_addresses_valid(self):
|
92
|
+
"""Test that all memory addresses are valid Game Boy addresses"""
|
93
|
+
addresses = [
|
94
|
+
BADGE_FLAGS,
|
95
|
+
MAP_ID,
|
96
|
+
PLAYER_X,
|
97
|
+
PLAYER_Y,
|
98
|
+
IN_BATTLE_FLAG,
|
99
|
+
BATTLE_OUTCOME,
|
100
|
+
PARTY_LEVELS,
|
101
|
+
PARTY_HP_CURRENT,
|
102
|
+
PARTY_HP_MAX,
|
103
|
+
PARTY_XP,
|
104
|
+
INVENTORY_COUNT,
|
105
|
+
INVENTORY_START,
|
106
|
+
MENU_STATE,
|
107
|
+
WARP_FLAG,
|
108
|
+
]
|
109
|
+
|
110
|
+
for addr in addresses:
|
111
|
+
assert 0x8000 <= addr <= 0xFFFF, f"Address {hex(addr)} outside Game Boy RAM range"
|