synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +28 -2
- synth_ai/core/system.py +4 -0
- synth_ai/environments/__init__.py +35 -0
- synth_ai/environments/environment/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/base.py +50 -0
- synth_ai/environments/environment/core.py +22 -0
- synth_ai/environments/environment/db/__init__.py +1 -0
- synth_ai/environments/environment/db/sqlite.py +45 -0
- synth_ai/environments/environment/registry.py +24 -0
- synth_ai/environments/environment/resources/sqlite.py +46 -0
- synth_ai/environments/environment/results.py +1 -0
- synth_ai/environments/environment/rewards/__init__.py +1 -0
- synth_ai/environments/environment/rewards/core.py +28 -0
- synth_ai/environments/environment/shared_engine.py +26 -0
- synth_ai/environments/environment/tools/__init__.py +34 -0
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/engine.py +502 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/environment.py +255 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
- synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
- synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
- synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
- synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
- synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
- synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
- synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
- synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
- synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/red/test_fixes.py +125 -0
- synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
- synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
- synth_ai/environments/examples/red/units/test_engine.py +192 -0
- synth_ai/environments/examples/red/units/test_environment.py +455 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
- synth_ai/environments/examples/red/units/test_integration.py +217 -0
- synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
- synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
- synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
- synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
- synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
- synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
- synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
- synth_ai/environments/examples/red/units/test_taskset.py +116 -0
- synth_ai/environments/examples/red/units/test_tree.py +448 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
- synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
- synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
- synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
- synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
- synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
- synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
- synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
- synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
- synth_ai/environments/reproducibility/core.py +42 -0
- synth_ai/environments/reproducibility/tree.py +364 -0
- synth_ai/environments/service/app.py +78 -0
- synth_ai/environments/service/core_routes.py +775 -0
- synth_ai/environments/service/external_registry.py +57 -0
- synth_ai/environments/service/registry.py +9 -0
- synth_ai/environments/stateful/__init__.py +1 -0
- synth_ai/environments/stateful/core.py +28 -0
- synth_ai/environments/stateful/engine.py +21 -0
- synth_ai/environments/stateful/state.py +7 -0
- synth_ai/environments/tasks/api.py +19 -0
- synth_ai/environments/tasks/core.py +78 -0
- synth_ai/environments/tasks/filters.py +39 -0
- synth_ai/environments/tasks/utils.py +89 -0
- synth_ai/environments/v0_observability/history.py +3 -0
- synth_ai/environments/v0_observability/log.py +2 -0
- synth_ai/lm/caching/constants.py +1 -0
- synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
- synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
- synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
- synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
- synth_ai/{zyk/lms → lm}/config.py +2 -1
- synth_ai/{zyk/lms → lm}/constants.py +2 -2
- synth_ai/{zyk/lms → lm}/core/all.py +10 -10
- synth_ai/{zyk/lms → lm}/core/main.py +57 -33
- synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
- synth_ai/lm/cost/monitor.py +1 -0
- synth_ai/lm/cost/statefulness.py +1 -0
- synth_ai/lm/provider_support/__init__.py +8 -0
- synth_ai/lm/provider_support/anthropic.py +945 -0
- synth_ai/lm/provider_support/openai.py +1115 -0
- synth_ai/lm/provider_support/suppress_logging.py +31 -0
- synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
- synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
- synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
- synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
- synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
- synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
- synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
- synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
- synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
- synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
- synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
- synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
- synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
- synth_ai/tracing/__init__.py +0 -0
- synth_ai/tracing/abstractions.py +224 -0
- synth_ai/tracing/base_client.py +91 -0
- synth_ai/tracing/client_manager.py +131 -0
- synth_ai/tracing/config.py +140 -0
- synth_ai/tracing/context.py +146 -0
- synth_ai/tracing/decorators.py +679 -0
- synth_ai/tracing/events/__init__.py +0 -0
- synth_ai/tracing/events/manage.py +147 -0
- synth_ai/tracing/events/scope.py +86 -0
- synth_ai/tracing/events/store.py +227 -0
- synth_ai/tracing/immediate_client.py +152 -0
- synth_ai/tracing/local.py +18 -0
- synth_ai/tracing/log_client_base.py +74 -0
- synth_ai/tracing/retry_queue.py +187 -0
- synth_ai/tracing/trackers.py +515 -0
- synth_ai/tracing/upload.py +504 -0
- synth_ai/tracing/utils.py +9 -0
- synth_ai/zyk/__init__.py +28 -2
- synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
- synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai-0.1.9.dist-info/METADATA +0 -37
- synth_ai-0.1.9.dist-info/RECORD +0 -50
- /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
- /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
- /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
- /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
- /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,217 @@
|
|
1
|
+
import pytest
|
2
|
+
import asyncio
|
3
|
+
import uuid
|
4
|
+
|
5
|
+
from synth_ai.environments.examples.red.environment import (
|
6
|
+
PokemonRedEnvironment,
|
7
|
+
PokemonRedPublicState,
|
8
|
+
PokemonRedPrivateState,
|
9
|
+
)
|
10
|
+
from synth_ai.environments.environment.shared_engine import (
|
11
|
+
GetObservationCallable,
|
12
|
+
InternalObservation,
|
13
|
+
)
|
14
|
+
from synth_ai.environments.examples.red.taskset import PokemonRedTaskInstance
|
15
|
+
from synth_ai.environments.tasks.core import Impetus, Intent, TaskInstanceMetadata
|
16
|
+
from synth_ai.environments.environment.tools import EnvToolCall
|
17
|
+
|
18
|
+
|
19
|
+
class PressButtonCall(EnvToolCall):
|
20
|
+
"""Helper class for creating button press calls"""
|
21
|
+
|
22
|
+
def __init__(self, button: str, frames: int = 1):
|
23
|
+
super().__init__(tool="press_button", args={"button": button, "frames": frames})
|
24
|
+
|
25
|
+
|
26
|
+
class ButtonTestObservationCallable(GetObservationCallable):
|
27
|
+
"""Observation callable for systematic button testing"""
|
28
|
+
|
29
|
+
def __init__(self):
|
30
|
+
self.screen_buffer = None
|
31
|
+
|
32
|
+
async def get_observation(
|
33
|
+
self, pub: PokemonRedPublicState, priv: PokemonRedPrivateState
|
34
|
+
) -> InternalObservation:
|
35
|
+
if pub is None or priv is None:
|
36
|
+
raise RuntimeError("Missing public or private state in get_observation")
|
37
|
+
|
38
|
+
# Extract screen buffer
|
39
|
+
try:
|
40
|
+
import inspect
|
41
|
+
|
42
|
+
frame = inspect.currentframe()
|
43
|
+
env = None
|
44
|
+
|
45
|
+
while frame:
|
46
|
+
if "self" in frame.f_locals and hasattr(frame.f_locals["self"], "engine"):
|
47
|
+
env = frame.f_locals["self"]
|
48
|
+
break
|
49
|
+
frame = frame.f_back
|
50
|
+
|
51
|
+
if env and hasattr(env, "engine") and env.engine:
|
52
|
+
if hasattr(env.engine, "emulator") and env.engine.emulator:
|
53
|
+
if hasattr(env.engine.emulator, "screen"):
|
54
|
+
screen_buffer = env.engine.emulator.screen.ndarray.copy()
|
55
|
+
self.screen_buffer = screen_buffer
|
56
|
+
except Exception as e:
|
57
|
+
print(f"[DEBUG] Failed to extract screen buffer: {e}")
|
58
|
+
|
59
|
+
formatted_obs = (
|
60
|
+
f"Step: {pub.step_count}, Position: ({pub.player_x}, {pub.player_y}), Map: {pub.map_id}"
|
61
|
+
)
|
62
|
+
|
63
|
+
return {
|
64
|
+
"public": pub,
|
65
|
+
"private": priv,
|
66
|
+
"formatted_obs": formatted_obs,
|
67
|
+
"screen_buffer": self.screen_buffer,
|
68
|
+
}
|
69
|
+
|
70
|
+
|
71
|
+
async def test_single_vs_multiple_presses():
|
72
|
+
"""
|
73
|
+
Test how many button presses are needed for reliable movement in different directions.
|
74
|
+
"""
|
75
|
+
print("\n" + "=" * 80)
|
76
|
+
print("SYSTEMATIC BUTTON PRESS REQUIREMENT ANALYSIS")
|
77
|
+
print("=" * 80)
|
78
|
+
|
79
|
+
# Test different buttons and press counts
|
80
|
+
test_scenarios = [
|
81
|
+
("LEFT", "movement"),
|
82
|
+
("RIGHT", "movement"),
|
83
|
+
("UP", "movement"),
|
84
|
+
("DOWN", "movement"),
|
85
|
+
("A", "interaction"),
|
86
|
+
("B", "cancel/back"),
|
87
|
+
]
|
88
|
+
|
89
|
+
results = {}
|
90
|
+
|
91
|
+
for button, action_type in test_scenarios:
|
92
|
+
print(f"\n{'=' * 60}")
|
93
|
+
print(f"TESTING {button} BUTTON ({action_type})")
|
94
|
+
print(f"{'=' * 60}")
|
95
|
+
|
96
|
+
# Test with different numbers of presses (1-5)
|
97
|
+
button_results = {}
|
98
|
+
|
99
|
+
for press_count in range(1, 6):
|
100
|
+
print(f"\nTesting {press_count} press(es) of {button}...")
|
101
|
+
|
102
|
+
# Create fresh environment for each test
|
103
|
+
task_metadata = TaskInstanceMetadata()
|
104
|
+
inst = PokemonRedTaskInstance(
|
105
|
+
id=uuid.uuid4(),
|
106
|
+
impetus=Impetus(instructions=f"Test {button} button with {press_count} presses."),
|
107
|
+
intent=Intent(
|
108
|
+
rubric={"goal": f"Test {button}"},
|
109
|
+
gold_trajectories=None,
|
110
|
+
gold_state_diff={},
|
111
|
+
),
|
112
|
+
metadata=task_metadata,
|
113
|
+
is_reproducible=True,
|
114
|
+
initial_engine_snapshot=None,
|
115
|
+
)
|
116
|
+
|
117
|
+
test_obs = ButtonTestObservationCallable()
|
118
|
+
env = PokemonRedEnvironment(inst, custom_step_obs=test_obs)
|
119
|
+
|
120
|
+
try:
|
121
|
+
# Initialize
|
122
|
+
obs_payload = await env.initialize()
|
123
|
+
if "error" in obs_payload:
|
124
|
+
print(f"[ERROR] Init failed: {obs_payload['error']}")
|
125
|
+
continue
|
126
|
+
|
127
|
+
initial_pub = obs_payload["public"]
|
128
|
+
initial_position = (initial_pub.player_x, initial_pub.player_y)
|
129
|
+
initial_map_id = initial_pub.map_id
|
130
|
+
|
131
|
+
print(f" Initial state: pos={initial_position}, map={initial_map_id}")
|
132
|
+
|
133
|
+
# Press button the specified number of times
|
134
|
+
final_position = initial_position
|
135
|
+
final_map_id = initial_map_id
|
136
|
+
|
137
|
+
for press_num in range(press_count):
|
138
|
+
step_result = await env.step([[PressButtonCall(button)]])
|
139
|
+
if "error" in step_result:
|
140
|
+
print(f" [ERROR] Step {press_num + 1} failed: {step_result['error']}")
|
141
|
+
break
|
142
|
+
|
143
|
+
new_pub = step_result["public"]
|
144
|
+
final_position = (new_pub.player_x, new_pub.player_y)
|
145
|
+
final_map_id = new_pub.map_id
|
146
|
+
|
147
|
+
# Analyze results
|
148
|
+
position_changed = final_position != initial_position
|
149
|
+
map_changed = final_map_id != initial_map_id
|
150
|
+
|
151
|
+
result = {
|
152
|
+
"initial_position": initial_position,
|
153
|
+
"final_position": final_position,
|
154
|
+
"initial_map": initial_map_id,
|
155
|
+
"final_map": final_map_id,
|
156
|
+
"position_changed": position_changed,
|
157
|
+
"map_changed": map_changed,
|
158
|
+
"effective": position_changed or map_changed,
|
159
|
+
}
|
160
|
+
|
161
|
+
button_results[press_count] = result
|
162
|
+
|
163
|
+
print(f" Result: pos={final_position}, map={final_map_id}")
|
164
|
+
print(f" Effect: {'YES' if result['effective'] else 'NO'}")
|
165
|
+
|
166
|
+
except Exception as e:
|
167
|
+
print(f" [ERROR] Test failed: {e}")
|
168
|
+
button_results[press_count] = {"error": str(e)}
|
169
|
+
|
170
|
+
results[button] = button_results
|
171
|
+
|
172
|
+
# Analysis and recommendations
|
173
|
+
print("\n" + "=" * 80)
|
174
|
+
print("ANALYSIS AND RECOMMENDATIONS")
|
175
|
+
print("=" * 80)
|
176
|
+
|
177
|
+
for button, button_results in results.items():
|
178
|
+
print(f"\n{button} BUTTON:")
|
179
|
+
|
180
|
+
# Find minimum presses for reliable effect
|
181
|
+
min_effective_presses = None
|
182
|
+
for press_count in range(1, 6):
|
183
|
+
if press_count in button_results:
|
184
|
+
result = button_results[press_count]
|
185
|
+
if not isinstance(result, dict) or "error" in result:
|
186
|
+
continue
|
187
|
+
if result.get("effective", False):
|
188
|
+
min_effective_presses = press_count
|
189
|
+
break
|
190
|
+
|
191
|
+
if min_effective_presses:
|
192
|
+
print(f" ✓ Minimum effective presses: {min_effective_presses}")
|
193
|
+
print(f" ✓ Recommendation: Use {min_effective_presses} presses for {button}")
|
194
|
+
else:
|
195
|
+
print(" ✗ No effective movement detected with up to 5 presses")
|
196
|
+
|
197
|
+
# Show detailed results
|
198
|
+
for press_count, result in button_results.items():
|
199
|
+
if isinstance(result, dict) and "error" not in result:
|
200
|
+
effect_str = "EFFECTIVE" if result.get("effective") else "no effect"
|
201
|
+
print(f" {press_count} press(es): {effect_str}")
|
202
|
+
|
203
|
+
return results
|
204
|
+
|
205
|
+
|
206
|
+
@pytest.mark.asyncio
|
207
|
+
async def test_button_press_requirements():
|
208
|
+
"""Main test function"""
|
209
|
+
results = await test_single_vs_multiple_presses()
|
210
|
+
|
211
|
+
# The test always passes but provides diagnostic information
|
212
|
+
assert True, "Button press requirements test completed - see output for recommendations"
|
213
|
+
|
214
|
+
|
215
|
+
if __name__ == "__main__":
|
216
|
+
# Run the test directly
|
217
|
+
asyncio.run(test_button_press_requirements())
|
@@ -0,0 +1,192 @@
|
|
1
|
+
import pytest
|
2
|
+
from synth_ai.environments.examples.red.engine import (
|
3
|
+
PokemonRedEngine,
|
4
|
+
BUTTON_MAP,
|
5
|
+
PokemonRedEngineSnapshot,
|
6
|
+
)
|
7
|
+
from synth_ai.environments.examples.red.taskset import (
|
8
|
+
INSTANCE as DEFAULT_TASK,
|
9
|
+
)
|
10
|
+
|
11
|
+
|
12
|
+
class TestPokemonRedEngine:
|
13
|
+
"""Test Pokemon Red engine functionality with REAL ROM"""
|
14
|
+
|
15
|
+
@pytest.fixture
|
16
|
+
def task_instance(self):
|
17
|
+
"""Create a task instance"""
|
18
|
+
return DEFAULT_TASK
|
19
|
+
|
20
|
+
def test_button_map_completeness(self):
|
21
|
+
"""Test that all expected buttons are mapped"""
|
22
|
+
expected_buttons = ["A", "B", "UP", "DOWN", "LEFT", "RIGHT", "START", "SELECT"]
|
23
|
+
assert all(button in BUTTON_MAP for button in expected_buttons)
|
24
|
+
|
25
|
+
# Each button should map to a string (PyBoy event name)
|
26
|
+
for button, mapped in BUTTON_MAP.items():
|
27
|
+
assert isinstance(mapped, str)
|
28
|
+
assert mapped
|
29
|
+
|
30
|
+
def test_engine_initialization(self, task_instance):
|
31
|
+
"""Test engine initialization with REAL ROM"""
|
32
|
+
engine = PokemonRedEngine(task_instance)
|
33
|
+
|
34
|
+
assert engine.task_instance == task_instance
|
35
|
+
assert engine._total_reward == 0.0
|
36
|
+
assert engine._step_count == 0
|
37
|
+
assert engine._previous_state is None
|
38
|
+
assert engine.emulator is not None # Should have real PyBoy instance
|
39
|
+
|
40
|
+
def test_rom_path_resolution(self, task_instance):
|
41
|
+
"""Test ROM path resolution logic"""
|
42
|
+
engine = PokemonRedEngine(task_instance)
|
43
|
+
rom_path = engine._get_rom_path()
|
44
|
+
|
45
|
+
# Should find the actual ROM file
|
46
|
+
assert rom_path.exists()
|
47
|
+
assert rom_path.name == "pokemon_red.gb"
|
48
|
+
|
49
|
+
@pytest.mark.asyncio
|
50
|
+
async def test_press_button_real(self, task_instance):
|
51
|
+
"""Test button press functionality with real ROM"""
|
52
|
+
engine = PokemonRedEngine(task_instance)
|
53
|
+
|
54
|
+
# Test valid button press - should not raise exception
|
55
|
+
engine._press_button("A", 1)
|
56
|
+
|
57
|
+
# Test multiple frames
|
58
|
+
engine._press_button("RIGHT", 3)
|
59
|
+
|
60
|
+
@pytest.mark.asyncio
|
61
|
+
async def test_press_button_invalid(self, task_instance):
|
62
|
+
"""Test invalid button press"""
|
63
|
+
engine = PokemonRedEngine(task_instance)
|
64
|
+
|
65
|
+
with pytest.raises(ValueError, match="Invalid button: INVALID"):
|
66
|
+
engine._press_button("INVALID")
|
67
|
+
|
68
|
+
@pytest.mark.asyncio
|
69
|
+
async def test_extract_current_state_real(self, task_instance):
|
70
|
+
"""Test state extraction from real emulator"""
|
71
|
+
engine = PokemonRedEngine(task_instance)
|
72
|
+
state = engine._extract_current_state()
|
73
|
+
|
74
|
+
# Should return a dictionary with expected keys (from actual state extraction)
|
75
|
+
expected_keys = [
|
76
|
+
"map_id",
|
77
|
+
"player_x",
|
78
|
+
"player_y",
|
79
|
+
"badges",
|
80
|
+
"party_hp_current",
|
81
|
+
"party_hp_max",
|
82
|
+
"party_level",
|
83
|
+
"party_xp",
|
84
|
+
"in_battle",
|
85
|
+
"battle_outcome",
|
86
|
+
"inventory_count",
|
87
|
+
"menu_state",
|
88
|
+
"warp_flag",
|
89
|
+
]
|
90
|
+
for key in expected_keys:
|
91
|
+
assert key in state
|
92
|
+
|
93
|
+
# Values should be correct types
|
94
|
+
assert isinstance(state["map_id"], int)
|
95
|
+
assert isinstance(state["player_x"], int)
|
96
|
+
assert isinstance(state["player_y"], int)
|
97
|
+
assert isinstance(state["badges"], int)
|
98
|
+
assert isinstance(state["in_battle"], bool)
|
99
|
+
|
100
|
+
@pytest.mark.asyncio
|
101
|
+
async def test_reset_engine_real(self, task_instance):
|
102
|
+
"""Test engine reset with real ROM"""
|
103
|
+
engine = PokemonRedEngine(task_instance)
|
104
|
+
|
105
|
+
priv, pub = await engine._reset_engine()
|
106
|
+
|
107
|
+
assert engine._total_reward == 0.0
|
108
|
+
assert engine._step_count == 0
|
109
|
+
assert priv.reward_last_step == 0.0
|
110
|
+
assert priv.total_reward == 0.0
|
111
|
+
assert not priv.terminated
|
112
|
+
|
113
|
+
# Public state should have real values
|
114
|
+
assert isinstance(pub.map_id, int)
|
115
|
+
assert isinstance(pub.player_x, int)
|
116
|
+
assert isinstance(pub.player_y, int)
|
117
|
+
|
118
|
+
@pytest.mark.asyncio
|
119
|
+
async def test_step_engine_real(self, task_instance):
|
120
|
+
"""Test engine step execution with real ROM"""
|
121
|
+
engine = PokemonRedEngine(task_instance)
|
122
|
+
await engine._reset_engine()
|
123
|
+
|
124
|
+
action = {"button": "A", "frames": 1}
|
125
|
+
priv, pub = await engine._step_engine(action)
|
126
|
+
|
127
|
+
assert engine._step_count == 1
|
128
|
+
assert priv.step_count == 1
|
129
|
+
assert isinstance(priv.reward_last_step, float)
|
130
|
+
assert priv.total_reward == engine._total_reward
|
131
|
+
|
132
|
+
# Should have actual game state
|
133
|
+
assert isinstance(pub.map_id, int)
|
134
|
+
assert isinstance(pub.badges, int)
|
135
|
+
assert isinstance(pub.party_hp_current, int)
|
136
|
+
|
137
|
+
@pytest.mark.asyncio
|
138
|
+
async def test_button_sequence_real(self, task_instance):
|
139
|
+
"""Test a sequence of button presses with real ROM"""
|
140
|
+
engine = PokemonRedEngine(task_instance)
|
141
|
+
await engine._reset_engine()
|
142
|
+
|
143
|
+
# Try a sequence of different buttons
|
144
|
+
buttons = ["A", "B", "UP", "DOWN", "LEFT", "RIGHT", "START", "SELECT"]
|
145
|
+
|
146
|
+
for i, button in enumerate(buttons):
|
147
|
+
action = {"button": button, "frames": 1}
|
148
|
+
priv, pub = await engine._step_engine(action)
|
149
|
+
|
150
|
+
assert engine._step_count == i + 1
|
151
|
+
assert priv.step_count == i + 1
|
152
|
+
|
153
|
+
# Game state should remain consistent
|
154
|
+
assert isinstance(pub.map_id, int)
|
155
|
+
assert isinstance(pub.player_x, int)
|
156
|
+
assert isinstance(pub.player_y, int)
|
157
|
+
|
158
|
+
@pytest.mark.asyncio
|
159
|
+
async def test_serialization_real(self, task_instance):
|
160
|
+
"""Test engine serialization with real ROM"""
|
161
|
+
engine = PokemonRedEngine(task_instance)
|
162
|
+
await engine._reset_engine()
|
163
|
+
|
164
|
+
# Take a few steps to change state
|
165
|
+
await engine._step_engine({"button": "A", "frames": 1})
|
166
|
+
await engine._step_engine({"button": "RIGHT", "frames": 1})
|
167
|
+
|
168
|
+
snapshot = await engine._serialize_engine()
|
169
|
+
|
170
|
+
assert isinstance(snapshot, PokemonRedEngineSnapshot)
|
171
|
+
assert snapshot.total_reward == engine._total_reward
|
172
|
+
assert snapshot.step_count == engine._step_count
|
173
|
+
assert "_save_state_bytes" in snapshot.state_data
|
174
|
+
|
175
|
+
@pytest.mark.asyncio
|
176
|
+
async def test_rom_memory_access(self, task_instance):
|
177
|
+
"""Test that we can actually read ROM memory"""
|
178
|
+
engine = PokemonRedEngine(task_instance)
|
179
|
+
|
180
|
+
# Should be able to access memory
|
181
|
+
assert engine.emulator is not None
|
182
|
+
assert hasattr(engine.emulator, "memory")
|
183
|
+
|
184
|
+
# Try reading some memory locations
|
185
|
+
badge_flags = engine.emulator.memory[0xD356]
|
186
|
+
player_x = engine.emulator.memory[0xD362]
|
187
|
+
player_y = engine.emulator.memory[0xD361]
|
188
|
+
|
189
|
+
# Should be valid integers (even if zero initially)
|
190
|
+
assert isinstance(badge_flags, int)
|
191
|
+
assert isinstance(player_x, int)
|
192
|
+
assert isinstance(player_y, int)
|