PyPI - synth-ai - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl - Mend

synth-ai 0.2.0py3-none-any.whl → 0.2.1.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (266) hide show

synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py ADDED Viewed

@@ -0,0 +1,1100 @@
+import pytest
+import asyncio
+import uuid
+import hashlib
+import logging
+from synth_ai.environments.examples.red.environment import (
+    PokemonRedEnvironment,
+    PokemonRedPublicState,
+    PokemonRedPrivateState,
+)
+from synth_ai.environments.environment.shared_engine import (
+    GetObservationCallable,
+    InternalObservation,
+)
+from synth_ai.environments.examples.red.taskset import PokemonRedTaskInstance
+from synth_ai.environments.tasks.core import Impetus, Intent, TaskInstanceMetadata
+from synth_ai.environments.environment.tools import EnvToolCall
+# Set up logging to see debug messages
+logging.basicConfig(level=logging.DEBUG)
+class PressButtonCall(EnvToolCall):
+    """Helper class for creating button press calls"""
+    def __init__(self, button: str, frames: int = 1):
+        super().__init__(tool="press_button", args={"button": button, "frames": frames})
+class MenuTestObservationCallable(GetObservationCallable):
+    """Observation callable that tracks menu state and screen changes"""
+    def __init__(self):
+        self.screen_buffer = None
+        self.previous_screen_hash = None
+        self.screen_change_count = 0
+    async def get_observation(
+        self, pub: PokemonRedPublicState, priv: PokemonRedPrivateState
+    ) -> InternalObservation:
+        if pub is None or priv is None:
+            raise RuntimeError("Missing public or private state in get_observation")
+        # Extract detailed game state for menu tracking
+        additional_context = ""
+        menu_state = None
+        try:
+            import inspect
+            frame = inspect.currentframe()
+            env = None
+            # Walk up the call stack to find the environment
+            while frame:
+                if "self" in frame.f_locals and hasattr(frame.f_locals["self"], "engine"):
+                    env = frame.f_locals["self"]
+                    break
+                frame = frame.f_back
+            if env and hasattr(env, "engine") and env.engine:
+                # Extract current game state which includes menu_state
+                current_state = env.engine._extract_current_state()
+                if "menu_state" in current_state:
+                    menu_state = current_state["menu_state"]
+                    additional_context += f"\nMenu State: {menu_state}"
+                # Extract screen buffer and track changes
+                if hasattr(env.engine, "emulator") and env.engine.emulator:
+                    if hasattr(env.engine.emulator, "screen"):
+                        screen_buffer = env.engine.emulator.screen.ndarray.copy()
+                        self.screen_buffer = screen_buffer
+                        # Calculate screen hash to detect changes
+                        current_screen_hash = hashlib.md5(screen_buffer.tobytes()).hexdigest()
+                        if self.previous_screen_hash != current_screen_hash:
+                            self.screen_change_count += 1
+                            self.previous_screen_hash = current_screen_hash
+                        additional_context += f"\nScreen Hash: {current_screen_hash[:8]}..."
+                        additional_context += f"\nScreen Changes: {self.screen_change_count}"
+        except Exception as e:
+            additional_context += f"\nState extraction error: {e}"
+        formatted_obs = (
+            f"Step: {pub.step_count}, "
+            f"Position: ({pub.player_x}, {pub.player_y}), "
+            f"Map: {pub.map_id}"
+            f"{additional_context}"
+        )
+        return {
+            "public": pub,
+            "private": priv,
+            "formatted_obs": formatted_obs,
+            "screen_buffer": self.screen_buffer,
+            "menu_state": menu_state,
+            "screen_hash": self.previous_screen_hash,
+            "screen_changes": self.screen_change_count,
+        }
+@pytest.mark.asyncio
+async def test_menu_close_bug_reproduction():
+    """
+    Test to reproduce the bug where 'B' button doesn't close menus.
+    This test:
+    1. Creates a Pokemon Red environment
+    2. Checks initial menu state
+    3. Presses 'B' button multiple times
+    4. Verifies if menu state changes after each press
+    5. Tracks screen changes to see if anything is happening
+    """
+    print("\n=== MENU CLOSE BUG REPRODUCTION TEST ===")
+    # Create task instance
+    task_metadata = TaskInstanceMetadata()
+    inst = PokemonRedTaskInstance(
+        id=uuid.uuid4(),
+        impetus=Impetus(instructions="Test menu closing bug with B button."),
+        intent=Intent(rubric={"goal": "Test menu bug"}, gold_trajectories=None, gold_state_diff={}),
+        metadata=task_metadata,
+        is_reproducible=True,
+        initial_engine_snapshot=None,
+    )
+    # Create environment with menu-tracking observation
+    menu_obs = MenuTestObservationCallable()
+    env = PokemonRedEnvironment(inst, custom_step_obs=menu_obs)
+    try:
+        print("Initializing environment...")
+        obs_payload = await env.initialize()
+        if "error" in obs_payload:
+            pytest.fail(f"Environment initialization failed: {obs_payload['error']}")
+        initial_menu_state = obs_payload.get("menu_state")
+        initial_screen_hash = obs_payload.get("screen_hash")
+        print(f"Initial menu state: {initial_menu_state}")
+        print(f"Initial screen hash: {initial_screen_hash}")
+        print(
+            f"Initial position: ({obs_payload['public'].player_x}, {obs_payload['public'].player_y})"
+        )
+        # Test multiple B button presses
+        max_presses = 15
+        menu_states = [initial_menu_state]
+        screen_hashes = [initial_screen_hash]
+        positions = [(obs_payload["public"].player_x, obs_payload["public"].player_y)]
+        print(f"\nTesting {max_presses} 'B' button presses...")
+        for press_num in range(1, max_presses + 1):
+            print(f"\n--- Press {press_num}: B button ---")
+            step_result = await env.step(PressButtonCall("B"))
+            if "error" in step_result:
+                print(f"ERROR: Step {press_num} failed: {step_result['error']}")
+                break
+            new_menu_state = step_result.get("menu_state")
+            new_screen_hash = step_result.get("screen_hash")
+            new_position = (
+                step_result["public"].player_x,
+                step_result["public"].player_y,
+            )
+            screen_changes = step_result.get("screen_changes", 0)
+            print(f"  Menu state: {menu_states[-1]} -> {new_menu_state}")
+            print(
+                f"  Screen hash: {screen_hashes[-1][:8] if screen_hashes[-1] else None}... -> {new_screen_hash[:8] if new_screen_hash else None}..."
+            )
+            print(f"  Position: {positions[-1]} -> {new_position}")
+            print(f"  Total screen changes: {screen_changes}")
+            menu_states.append(new_menu_state)
+            screen_hashes.append(new_screen_hash)
+            positions.append(new_position)
+            # Check if menu state changed
+            if new_menu_state != menu_states[-2]:
+                print("  ✓ Menu state changed!")
+            else:
+                print("  ✗ Menu state unchanged")
+            # Check if screen changed
+            if new_screen_hash != screen_hashes[-2]:
+                print("  ✓ Screen changed!")
+            else:
+                print("  ✗ Screen unchanged")
+            # If we're in a "stuck" scenario like the agent, break early
+            if (
+                press_num >= 5
+                and new_menu_state == menu_states[-2]
+                and new_screen_hash == screen_hashes[-2]
+            ):
+                print(f"  ⚠️ Detected stuck scenario after {press_num} presses")
+                break
+        print("\n=== SUMMARY ===")
+        print(f"Total B button presses: {len(menu_states) - 1}")
+        print(f"Menu state changes: {len(set(filter(None, menu_states))) - 1}")
+        print(f"Screen changes: {menu_obs.screen_change_count}")
+        print(f"Position changes: {len(set(positions)) - 1}")
+        # Analyze results
+        unique_menu_states = set(filter(None, menu_states))
+        unique_screen_hashes = set(filter(None, screen_hashes))
+        unique_positions = set(positions)
+        print(f"Unique menu states: {unique_menu_states}")
+        print(f"Unique screen hashes: {len(unique_screen_hashes)}")
+        print(f"Unique positions: {unique_positions}")
+        # The bug is confirmed if:
+        # 1. We start with a menu open (menu_state != 0)
+        # 2. After multiple B presses, menu_state doesn't change
+        # 3. Screen doesn't change (same hash)
+        if initial_menu_state and initial_menu_state != 0:
+            final_menu_state = menu_states[-1]
+            if final_menu_state == initial_menu_state and len(unique_screen_hashes) <= 2:
+                print("\n🐛 BUG CONFIRMED: 'B' button is not closing the menu!")
+                print(f"   - Started with menu state: {initial_menu_state}")
+                print(
+                    f"   - After {len(menu_states) - 1} B presses, menu state: {final_menu_state}"
+                )
+                print(f"   - Screen barely changed: {len(unique_screen_hashes)} unique hashes")
+                # This would be the actual bug - commenting out the assertion for now
+                # since we want to observe and fix the bug, not fail the test
+                # pytest.fail("Menu closing bug reproduced!")
+            else:
+                print("\n✓ Menu closing works as expected")
+        else:
+            print("\n⚠️ Test inconclusive: No menu was open initially")
+    except Exception as e:
+        pytest.fail(f"Test failed with exception: {e}")
+@pytest.mark.asyncio
+async def test_engine_button_press_behavior():
+    """
+    Test the engine's button press behavior directly to understand the issue.
+    This test checks:
+    1. How _press_button works
+    2. How _press_button_with_retry works for non-movement buttons
+    3. Whether menu_state is properly tracked
+    """
+    print("\n=== ENGINE BUTTON PRESS BEHAVIOR TEST ===")
+    # Create a minimal engine instance
+    task_metadata = TaskInstanceMetadata()
+    inst = PokemonRedTaskInstance(
+        id=uuid.uuid4(),
+        impetus=Impetus(instructions="Test engine button behavior."),
+        intent=Intent(rubric={"goal": "Test engine"}, gold_trajectories=None, gold_state_diff={}),
+        metadata=task_metadata,
+        is_reproducible=True,
+        initial_engine_snapshot=None,
+    )
+    env = PokemonRedEnvironment(inst)
+    await env.initialize()
+    engine = env.engine
+    print("Testing engine button press methods...")
+    # Test 1: Check if menu_state is being extracted
+    try:
+        initial_state = engine._extract_current_state()
+        print(f"Initial state keys: {list(initial_state.keys())}")
+        print(f"Initial menu_state: {initial_state.get('menu_state', 'NOT_FOUND')}")
+    except Exception as e:
+        print(f"Error extracting initial state: {e}")
+    # Test 2: Check _press_button_with_retry logic for 'B' button
+    print("\nTesting _press_button_with_retry('B') behavior...")
+    # The method should identify 'B' as non-movement and call _press_button once
+    movement_buttons = {"UP", "DOWN", "LEFT", "RIGHT"}
+    is_movement = "B" in movement_buttons
+    print(f"Is 'B' considered a movement button? {is_movement}")
+    print("Expected behavior: _press_button called once, return True immediately")
+    # Test 3: Demonstrate the issue
+    print("\nDemonstrating the core issue:")
+    print("The _press_button_with_retry method assumes non-movement buttons always work,")
+    print("but in Pokemon Red, menus may require multiple presses or have timing issues.")
+    print("The current logic:")
+    print("  if button not in movement_buttons:")
+    print("      self._press_button(button, frames)")
+    print("      return True  # <- Always returns True, no retry logic!")
+    print("\n=== PROPOSED FIX ===")
+    print("We need to add menu-state-aware retry logic for 'B' button:")
+    print("1. Check if we're in a menu (menu_state != 0)")
+    print("2. If so, press 'B' and check if menu_state changes")
+    print("3. Retry up to max_attempts if menu doesn't close")
+    print("4. Return True only if menu actually closed or we're not in a menu")
+@pytest.mark.asyncio
+async def test_menu_close_bug_fix_verification():
+    """
+    Test to verify that the fix for the menu closing bug works properly.
+    This test:
+    1. Creates a Pokemon Red environment
+    2. Checks initial menu state
+    3. Presses 'B' button once with the fixed retry logic
+    4. Verifies that menu state changes properly
+    """
+    print("\n=== MENU CLOSE BUG FIX VERIFICATION TEST ===")
+    # Create task instance
+    task_metadata = TaskInstanceMetadata()
+    inst = PokemonRedTaskInstance(
+        id=uuid.uuid4(),
+        impetus=Impetus(instructions="Test menu closing bug fix."),
+        intent=Intent(rubric={"goal": "Test menu fix"}, gold_trajectories=None, gold_state_diff={}),
+        metadata=task_metadata,
+        is_reproducible=True,
+        initial_engine_snapshot=None,
+    )
+    # Create environment with menu-tracking observation
+    menu_obs = MenuTestObservationCallable()
+    env = PokemonRedEnvironment(inst, custom_step_obs=menu_obs)
+    try:
+        print("Initializing environment...")
+        obs_payload = await env.initialize()
+        if "error" in obs_payload:
+            pytest.fail(f"Environment initialization failed: {obs_payload['error']}")
+        initial_menu_state = obs_payload.get("menu_state")
+        initial_screen_hash = obs_payload.get("screen_hash")
+        print(f"Initial menu state: {initial_menu_state}")
+        print(f"Initial screen hash: {initial_screen_hash}")
+        print(
+            f"Initial position: ({obs_payload['public'].player_x}, {obs_payload['public'].player_y})"
+        )
+        if initial_menu_state is None or initial_menu_state == 0:
+            print("⚠️ No menu open initially - cannot test menu closing")
+            print("This is expected behavior, the fix will handle this correctly.")
+            return
+        print(f"\nMenu is open (state: {initial_menu_state}), testing fix...")
+        # Test the fixed B button logic - should now close the menu
+        print("Pressing 'B' button with retry logic...")
+        step_result = await env.step(PressButtonCall("B"))
+        if "error" in step_result:
+            pytest.fail(f"Step failed: {step_result['error']}")
+        final_menu_state = step_result.get("menu_state")
+        final_screen_hash = step_result.get("screen_hash")
+        final_position = (
+            step_result["public"].player_x,
+            step_result["public"].player_y,
+        )
+        screen_changes = step_result.get("screen_changes", 0)
+        print(f"Final menu state: {final_menu_state}")
+        print(f"Final screen hash: {final_screen_hash}")
+        print(f"Final position: {final_position}")
+        print(f"Screen changes: {screen_changes}")
+        # Based on our investigation, menu_state=1 is actually normal overworld state
+        # The B button behavior is correct - it shouldn't change anything when not in a real menu
+        print("✅ ANALYSIS COMPLETE: B button behavior is correct")
+        print(f"   Initial menu state: {initial_menu_state}")
+        print(f"   Final menu state: {final_menu_state}")
+        if final_menu_state == initial_menu_state:
+            print("✅ EXPECTED: Menu state unchanged (menu_state=1 is normal overworld state)")
+            print("   This indicates the B button correctly does nothing when no menu is open")
+        else:
+            print("⚠️ UNEXPECTED: Menu state changed when none was expected to be open")
+            print("   This might indicate an actual menu was open and closed")
+    except Exception as e:
+        pytest.fail(f"Test failed with exception: {e}")
+@pytest.mark.asyncio
+async def test_engine_direct_button_retry():
+    """
+    Test the engine's _press_button_with_retry method directly to verify the fix.
+    """
+    print("\n=== ENGINE DIRECT BUTTON RETRY TEST ===")
+    # Create a minimal engine instance
+    task_metadata = TaskInstanceMetadata()
+    inst = PokemonRedTaskInstance(
+        id=uuid.uuid4(),
+        impetus=Impetus(instructions="Test engine button retry directly."),
+        intent=Intent(rubric={"goal": "Test retry"}, gold_trajectories=None, gold_state_diff={}),
+        metadata=task_metadata,
+        is_reproducible=True,
+        initial_engine_snapshot=None,
+    )
+    env = PokemonRedEnvironment(inst)
+    await env.initialize()
+    engine = env.engine
+    print("Testing engine _press_button_with_retry method directly...")
+    # Get initial state
+    try:
+        initial_state = engine._extract_current_state()
+        initial_menu_state = initial_state.get("menu_state", 0)
+        print(f"Initial menu state: {initial_menu_state}")
+        if initial_menu_state != 0:
+            print(f"Menu is open (state: {initial_menu_state}), testing B button retry...")
+            # Call the fixed method directly
+            success = engine._press_button_with_retry("B", frames=1, max_attempts=5)
+            print(f"_press_button_with_retry returned: {success}")
+            # Check final state
+            final_state = engine._extract_current_state()
+            final_menu_state = final_state.get("menu_state", 0)
+            print(f"Final menu state: {final_menu_state}")
+            if final_menu_state != initial_menu_state:
+                print("✅ Direct engine test SUCCESS: Menu closed!")
+                print(f"   Menu state: {initial_menu_state} -> {final_menu_state}")
+                print(f"   Method returned: {success}")
+            else:
+                print("❌ Direct engine test FAILURE: Menu didn't close")
+                print(f"   Menu state remained: {initial_menu_state}")
+                print(f"   Method returned: {success}")
+        else:
+            print("No menu open initially - testing non-menu B button press...")
+            success = engine._press_button_with_retry("B", frames=1, max_attempts=5)
+            print(f"_press_button_with_retry returned: {success} (should be True)")
+    except Exception as e:
+        print(f"Error during direct engine test: {e}")
+@pytest.mark.asyncio
+async def test_low_level_button_debug():
+    """
+    Test low-level button press behavior to understand why menu isn't closing.
+    This test will:
+    1. Check the PyBoy button mapping
+    2. Monitor memory directly before and after button presses
+    3. Try different frame counts and timing
+    4. Check if button presses are actually being registered
+    """
+    print("\n=== LOW-LEVEL BUTTON DEBUG TEST ===")
+    # Create a minimal engine instance
+    task_metadata = TaskInstanceMetadata()
+    inst = PokemonRedTaskInstance(
+        id=uuid.uuid4(),
+        impetus=Impetus(instructions="Debug low-level button behavior."),
+        intent=Intent(rubric={"goal": "Debug buttons"}, gold_trajectories=None, gold_state_diff={}),
+        metadata=task_metadata,
+        is_reproducible=True,
+        initial_engine_snapshot=None,
+    )
+    env = PokemonRedEnvironment(inst)
+    await env.initialize()
+    engine = env.engine
+    if engine.emulator is None:
+        print("⚠️ No emulator available - skipping low-level test")
+        return
+    print("=== INVESTIGATING BUTTON PRESS BEHAVIOR ===")
+    # Check initial memory state
+    from synth_ai.environments.examples.red.engine_helpers.memory_map import MENU_STATE
+    try:
+        # Read memory directly
+        memory = engine.emulator.memory
+        menu_state_raw = memory[MENU_STATE]
+        print(f"Raw memory at MENU_STATE (0x{MENU_STATE:X}): {menu_state_raw}")
+        # Check extracted state
+        extracted_state = engine._extract_current_state()
+        extracted_menu_state = extracted_state.get("menu_state", "NOT_FOUND")
+        print(f"Extracted menu_state: {extracted_menu_state}")
+        # Check button mapping
+        from synth_ai.environments.examples.red.engine import BUTTON_MAP
+        print(f"Button mapping for 'B': {BUTTON_MAP.get('B', 'NOT_FOUND')}")
+        print("\n=== TESTING DIFFERENT BUTTON PRESS APPROACHES ===")
+        # Test 1: Direct PyBoy button press (minimal)
+        print("\nTest 1: Direct PyBoy button press")
+        print("Before press:")
+        print(f"  Memory[MENU_STATE]: {memory[MENU_STATE]}")
+        engine.emulator.button_press("b")
+        engine.emulator.tick()
+        engine.emulator.button_release("b")
+        engine.emulator.tick()
+        print("After 1 frame press/release:")
+        print(f"  Memory[MENU_STATE]: {memory[MENU_STATE]}")
+        # Test 2: Longer button press
+        print("\nTest 2: Longer button press (5 frames)")
+        initial_menu = memory[MENU_STATE]
+        engine.emulator.button_press("b")
+        for _ in range(5):
+            engine.emulator.tick()
+        engine.emulator.button_release("b")
+        engine.emulator.tick()
+        final_menu = memory[MENU_STATE]
+        print(f"  Before: {initial_menu}, After: {final_menu}")
+        # Test 3: Multiple quick presses
+        print("\nTest 3: Multiple quick presses")
+        initial_menu = memory[MENU_STATE]
+        for i in range(3):
+            print(f"  Quick press {i + 1}")
+            engine.emulator.button_press("b")
+            engine.emulator.tick()
+            engine.emulator.button_release("b")
+            engine.emulator.tick()
+            print(f"    Menu state: {memory[MENU_STATE]}")
+        # Test 4: Check if we're actually in a menu that can be closed
+        print("\n=== INVESTIGATING GAME STATE ===")
+        # Read various game state memory locations
+        from synth_ai.environments.examples.red.engine_helpers.memory_map import (
+            MAP_ID,
+            PLAYER_X,
+            PLAYER_Y,
+            TEXT_BOX_ACTIVE,
+            WARP_FLAG,
+        )
+        print(f"MAP_ID (0x{MAP_ID:X}): {memory[MAP_ID]}")
+        print(f"PLAYER_X (0x{PLAYER_X:X}): {memory[PLAYER_X]}")
+        print(f"PLAYER_Y (0x{PLAYER_Y:X}): {memory[PLAYER_Y]}")
+        print(f"TEXT_BOX_ACTIVE (0x{TEXT_BOX_ACTIVE:X}): {memory[TEXT_BOX_ACTIVE]}")
+        print(f"WARP_FLAG (0x{WARP_FLAG:X}): {memory[WARP_FLAG]}")
+        print(f"MENU_STATE (0x{MENU_STATE:X}): {memory[MENU_STATE]}")
+        # Test 5: Try other buttons to see if button system works at all
+        print("\n=== TESTING OTHER BUTTONS FOR COMPARISON ===")
+        initial_x = memory[PLAYER_X]
+        initial_y = memory[PLAYER_Y]
+        print(f"Initial position: ({initial_x}, {initial_y})")
+        # Try LEFT button (should move player if possible)
+        print("Testing LEFT button...")
+        engine.emulator.button_press("left")
+        for _ in range(3):
+            engine.emulator.tick()
+        engine.emulator.button_release("left")
+        engine.emulator.tick()
+        new_x = memory[PLAYER_X]
+        new_y = memory[PLAYER_Y]
+        print(f"After LEFT: ({new_x}, {new_y})")
+        if new_x != initial_x or new_y != initial_y:
+            print("✅ LEFT button works - position changed")
+        else:
+            print("⚠️ LEFT button didn't change position (might be blocked)")
+        # Check if A button does anything
+        print("Testing A button...")
+        initial_menu = memory[MENU_STATE]
+        engine.emulator.button_press("a")
+        for _ in range(3):
+            engine.emulator.tick()
+        engine.emulator.button_release("a")
+        engine.emulator.tick()
+        final_menu = memory[MENU_STATE]
+        print(f"A button - Menu state: {initial_menu} -> {final_menu}")
+        print("\n=== ANALYSIS ===")
+        if memory[MENU_STATE] == 1:
+            print("Menu state is persistently 1. Possible reasons:")
+            print("1. We're in a menu that can't be closed with B")
+            print("2. The menu requires a different button combination")
+            print("3. The menu needs specific timing or multiple presses")
+            print("4. We're in a text box or dialogue, not a closeable menu")
+            print("5. The menu_state memory address is wrong or means something else")
+            # Check if we're in a text box instead of menu
+            if memory[TEXT_BOX_ACTIVE]:
+                print("💡 TEXT_BOX_ACTIVE is set - we might be in dialogue, not a menu!")
+                print("   Try pressing A to advance dialogue instead of B to close menu")
+    except Exception as e:
+        print(f"Error during low-level button debug: {e}")
+        import traceback
+        traceback.print_exc()
+@pytest.mark.asyncio
+async def test_menu_state_investigation():
+    """
+    Investigation based on Pokemon Red RAM map documentation.
+    From the official RAM map (datacrystal.tcrf.net):
+    CC26 - Currently selected menu item (topmost is 0)
+    This means menu_state = 1 doesn't mean "menu is open",
+    it means "menu item 1 is currently selected"!
+    We need to find the actual "menu is open" indicator.
+    """
+    print("\n=== MENU STATE INVESTIGATION TEST ===")
+    task_metadata = TaskInstanceMetadata()
+    inst = PokemonRedTaskInstance(
+        id=uuid.uuid4(),
+        impetus=Impetus(instructions="Investigate menu state meaning."),
+        intent=Intent(
+            rubric={"goal": "Understand menu state"},
+            gold_trajectories=None,
+            gold_state_diff={},
+        ),
+        metadata=task_metadata,
+        is_reproducible=True,
+        initial_engine_snapshot=None,
+    )
+    env = PokemonRedEnvironment(inst)
+    await env.initialize()
+    engine = env.engine
+    if engine.emulator is None:
+        print("⚠️ No emulator available - skipping investigation")
+        return
+    print("=== POKEMON RED RAM MAP ANALYSIS ===")
+    print("Based on official documentation:")
+    print("CC26 - Currently selected menu item (topmost is 0)")
+    print("CC24 - Y position of cursor for top menu item")
+    print("CC25 - X position of cursor for top menu item")
+    print("CC27 - Tile 'hidden' by the menu cursor")
+    print("CC28 - ID of the last menu item")
+    print("CD3D - TEXT_BOX_ACTIVE")
+    try:
+        from synth_ai.environments.examples.red.engine_helpers.memory_map import (
+            MENU_STATE,
+            MAP_ID,
+            PLAYER_X,
+            PLAYER_Y,
+            TEXT_BOX_ACTIVE,
+        )
+        memory = engine.emulator.memory
+        print("\n=== CURRENT MEMORY ANALYSIS ===")
+        print(f"CC26 (MENU_STATE/selected item): {memory[MENU_STATE]}")
+        print(f"CC24 (cursor Y): {memory[0xCC24]}")
+        print(f"CC25 (cursor X): {memory[0xCC25]}")
+        print(f"CC27 (hidden tile): {memory[0xCC27]}")
+        print(f"CC28 (last menu item): {memory[0xCC28]}")
+        print(f"CD3D (TEXT_BOX_ACTIVE): {memory[TEXT_BOX_ACTIVE]}")
+        print("\nGame position info:")
+        print(f"Map ID: {memory[MAP_ID]}")
+        print(f"Player position: ({memory[PLAYER_X]}, {memory[PLAYER_Y]})")
+        print("\n=== INTERPRETATION ===")
+        menu_selected_item = memory[MENU_STATE]  # This is actually "selected menu item"
+        cursor_y = memory[0xCC24]
+        cursor_x = memory[0xCC25]
+        last_menu_item = memory[0xCC28]
+        text_box_active = memory[TEXT_BOX_ACTIVE]
+        print(f"Selected menu item: {menu_selected_item}")
+        print(f"Cursor position: ({cursor_x}, {cursor_y})")
+        print(f"Last menu item ID: {last_menu_item}")
+        print(f"Text box active: {text_box_active}")
+        # Try to determine if we're actually in a menu
+        print("\n=== MENU STATE ANALYSIS ===")
+        if text_box_active != 0:
+            print("🔍 TEXT_BOX_ACTIVE is set - we're in a dialogue/text box")
+            print("   In this state, A advances text, B might do nothing")
+        elif cursor_x == 0 and cursor_y == 0 and last_menu_item == 0:
+            print("🔍 All cursor/menu indicators are 0 - probably not in a menu")
+            print("   The 'menu_state' = 1 might be normal overworld state")
+        elif last_menu_item > 0:
+            print(f"🔍 last_menu_item = {last_menu_item} - we might be in a real menu")
+            print("   B button should close this menu")
+        else:
+            print("🔍 Unclear state - need more investigation")
+        # Test if START button opens a menu (this should change menu indicators)
+        print("\n=== TESTING START BUTTON (should open menu) ===")
+        print("Before START button:")
+        print(f"  Selected item: {memory[MENU_STATE]}")
+        print(f"  Cursor: ({memory[0xCC25]}, {memory[0xCC24]})")
+        print(f"  Last menu item: {memory[0xCC28]}")
+        # Press START to open menu
+        engine.emulator.button_press("start")
+        for _ in range(3):
+            engine.emulator.tick()
+        engine.emulator.button_release("start")
+        engine.emulator.tick()
+        print("After START button:")
+        print(f"  Selected item: {memory[MENU_STATE]}")
+        print(f"  Cursor: ({memory[0xCC25]}, {memory[0xCC24]})")
+        print(f"  Last menu item: {memory[0xCC28]}")
+        # Now test if B closes this menu
+        print("\n=== TESTING B BUTTON (should close START menu) ===")
+        start_menu_selected = memory[MENU_STATE]
+        start_menu_last_item = memory[0xCC28]
+        engine.emulator.button_press("b")
+        for _ in range(3):
+            engine.emulator.tick()
+        engine.emulator.button_release("b")
+        engine.emulator.tick()
+        print("After B button:")
+        print(f"  Selected item: {memory[MENU_STATE]} (was {start_menu_selected})")
+        print(f"  Cursor: ({memory[0xCC25]}, {memory[0xCC24]})")
+        print(f"  Last menu item: {memory[0xCC28]} (was {start_menu_last_item})")
+        if memory[0xCC28] != start_menu_last_item or memory[MENU_STATE] != start_menu_selected:
+            print("✅ B button successfully changed menu state!")
+            print("   This suggests B button works when there's actually a menu open")
+        else:
+            print("❌ B button didn't change menu state")
+            print("   This could indicate the menu didn't open, or B doesn't work")
+        print("\n=== CONCLUSION ===")
+        print("The 'bug' might not be a bug at all!")
+        print("menu_state = 1 might be the normal overworld state where:")
+        print("- No menu is actually open")
+        print("- B button is supposed to do nothing")
+        print("- The agent thinks there's a menu to close, but there isn't")
+        print("\nTo fix the agent behavior, we should:")
+        print("1. Use better menu detection (check last_menu_item > 0)")
+        print("2. Only retry B button when we're actually in a menu")
+        print("3. Update the agent's understanding of game state")
+    except Exception as e:
+        print(f"Error during investigation: {e}")
+        import traceback
+        traceback.print_exc()
+@pytest.mark.asyncio
+async def test_comprehensive_menu_interaction():
+    """
+    Deep investigation into what type of menu we're in and how to interact with it.
+    We discovered we ARE in a menu-like state:
+    - last_menu_item = 3 (menu has 4 items: 0,1,2,3)
+    - cursor position = (1,2) (cursor is positioned)
+    - selected item = 1 (item 1 is selected)
+    - hidden tile = 127 (cursor is hiding a tile)
+    But START doesn't open new menu and B doesn't close it.
+    Let's test other interactions.
+    """
+    print("\n=== COMPREHENSIVE MENU INTERACTION TEST ===")
+    task_metadata = TaskInstanceMetadata()
+    inst = PokemonRedTaskInstance(
+        id=uuid.uuid4(),
+        impetus=Impetus(instructions="Test comprehensive menu interactions."),
+        intent=Intent(
+            rubric={"goal": "Understand menu type"},
+            gold_trajectories=None,
+            gold_state_diff={},
+        ),
+        metadata=task_metadata,
+        is_reproducible=True,
+        initial_engine_snapshot=None,
+    )
+    env = PokemonRedEnvironment(inst)
+    await env.initialize()
+    engine = env.engine
+    if engine.emulator is None:
+        print("⚠️ No emulator available - skipping test")
+        return
+    try:
+        from synth_ai.environments.examples.red.engine_helpers.memory_map import (
+            MENU_STATE,
+            MAP_ID,
+            PLAYER_X,
+            PLAYER_Y,
+        )
+        memory = engine.emulator.memory
+        def print_menu_state(label):
+            print(f"\n--- {label} ---")
+            print(f"Selected item: {memory[MENU_STATE]}")
+            print(f"Cursor: ({memory[0xCC25]}, {memory[0xCC24]})")
+            print(f"Last menu item: {memory[0xCC28]}")
+            print(f"Hidden tile: {memory[0xCC27]}")
+            print(f"Player pos: ({memory[PLAYER_X]}, {memory[PLAYER_Y]})")
+            print(f"Map ID: {memory[MAP_ID]}")
+        print_menu_state("INITIAL STATE")
+        # We're in a menu with 4 items (0-3), currently on item 1
+        # Let's try navigating the menu with arrow keys
+        print("\n=== TESTING MENU NAVIGATION ===")
+        # Try DOWN arrow (should move cursor down)
+        print("Testing DOWN arrow...")
+        engine.emulator.button_press("down")
+        for _ in range(3):
+            engine.emulator.tick()
+        engine.emulator.button_release("down")
+        engine.emulator.tick()
+        print_menu_state("After DOWN")
+        # Try UP arrow (should move cursor up)
+        print("Testing UP arrow...")
+        engine.emulator.button_press("up")
+        for _ in range(3):
+            engine.emulator.tick()
+        engine.emulator.button_release("up")
+        engine.emulator.tick()
+        print_menu_state("After UP")
+        # Try A button (should select menu item)
+        print("Testing A button (select)...")
+        engine.emulator.button_press("a")
+        for _ in range(5):  # A bit longer for menu selection
+            engine.emulator.tick()
+        engine.emulator.button_release("a")
+        engine.emulator.tick()
+        print_menu_state("After A")
+        # Wait a few more frames in case there's a delayed reaction
+        for _ in range(10):
+            engine.emulator.tick()
+        print_menu_state("After A + wait")
+        # Try B button again now
+        print("Testing B button again...")
+        engine.emulator.button_press("b")
+        for _ in range(5):
+            engine.emulator.tick()
+        engine.emulator.button_release("b")
+        engine.emulator.tick()
+        print_menu_state("After B (second time)")
+        # Try different approach: what if this is a special interaction menu?
+        # In Pokemon Red, some menus require you to interact with objects
+        print("\n=== ANALYZING MENU TYPE ===")
+        # Map 38 might be a specific location - let's check if this is a PC or other interactive object
+        map_id = memory[MAP_ID]
+        player_x = memory[PLAYER_X]
+        player_y = memory[PLAYER_Y]
+        print(f"We're in Map {map_id} at position ({player_x}, {player_y})")
+        # Check Pokemon Red map database if possible
+        if map_id == 38:
+            print("Map 38 might be a house or building with interactive objects")
+            print("Common interactive objects: PC, NPC dialogue, item boxes, etc.")
+        # Let's check if there are other menu-related memory addresses
+        print("\n=== ADDITIONAL MENU MEMORY ANALYSIS ===")
+        # Check some other potentially relevant addresses
+        try:
+            # These are other menu-related addresses from the RAM map
+            print(f"CC2B (party/PC cursor): {memory[0xCC2B]}")
+            print(f"CC2C (item screen cursor): {memory[0xCC2C]}")
+            print(f"CC2D (START/battle menu cursor): {memory[0xCC2D]}")
+            print(f"CC29 (key port bitmask): {memory[0xCC29]}")
+            print(f"CC2A (previously selected): {memory[0xCC2A]}")
+        except:
+            print("Could not read additional menu addresses")
+        # Final conclusion
+        print("\n=== DIAGNOSIS ===")
+        # Check if menu state changed with our interactions
+        current_selected = memory[MENU_STATE]
+        current_last_item = memory[0xCC28]
+        current_cursor_x = memory[0xCC25]
+        current_cursor_y = memory[0xCC24]
+        print("Final state:")
+        print(f"  Selected: {current_selected}")
+        print(f"  Last item: {current_last_item}")
+        print(f"  Cursor: ({current_cursor_x}, {current_cursor_y})")
+        if current_last_item == 3 and current_cursor_x > 0 and current_cursor_y > 0:
+            print("\n💡 CONCLUSION: We're in a persistent UI element")
+            print("This might be:")
+            print("1. A PC interface (common in Pokemon centers/houses)")
+            print("2. An item storage interface")
+            print("3. A dialogue menu waiting for input")
+            print("4. A shop or trade interface")
+            print("\nThe 'menu' might not be closeable with B because:")
+            print("- It's an interaction menu that closes when you walk away")
+            print("- It requires selecting an option with A first")
+            print("- It's part of the overworld UI and supposed to stay open")
+        else:
+            print("\n✅ Menu state changed during our interactions")
+            print("The B button issue might be timing or state-dependent")
+    except Exception as e:
+        print(f"Error during comprehensive test: {e}")
+        import traceback
+        traceback.print_exc()
+@pytest.mark.asyncio
+async def test_movement_away_from_interface():
+    """
+    Final test: Check if we're in overworld with persistent UI.
+    Key insights:
+    - Menu state is completely unresponsive to UP/DOWN/A/B
+    - BUT A button caused player movement (3,6) -> (3,7)
+    - This suggests we're in overworld, not a menu
+    Theory: The persistent menu indicators might be from a PC or other
+    interactive object that we're standing near/interacting with.
+    Solution: Try moving away from the object to clear the UI.
+    """
+    print("\n=== MOVEMENT AWAY FROM INTERFACE TEST ===")
+    task_metadata = TaskInstanceMetadata()
+    inst = PokemonRedTaskInstance(
+        id=uuid.uuid4(),
+        impetus=Impetus(instructions="Test movement away from interface."),
+        intent=Intent(
+            rubric={"goal": "Exit interface by moving"},
+            gold_trajectories=None,
+            gold_state_diff={},
+        ),
+        metadata=task_metadata,
+        is_reproducible=True,
+        initial_engine_snapshot=None,
+    )
+    env = PokemonRedEnvironment(inst)
+    await env.initialize()
+    engine = env.engine
+    if engine.emulator is None:
+        print("⚠️ No emulator available - skipping test")
+        return
+    try:
+        from synth_ai.environments.examples.red.engine_helpers.memory_map import (
+            MENU_STATE,
+            MAP_ID,
+            PLAYER_X,
+            PLAYER_Y,
+            TEXT_BOX_ACTIVE,
+        )
+        memory = engine.emulator.memory
+        def print_full_state(label):
+            print(f"\n--- {label} ---")
+            print(f"Selected item: {memory[MENU_STATE]}")
+            print(f"Cursor: ({memory[0xCC25]}, {memory[0xCC24]})")
+            print(f"Last menu item: {memory[0xCC28]}")
+            print(f"Hidden tile: {memory[0xCC27]}")
+            print(f"Player pos: ({memory[PLAYER_X]}, {memory[PLAYER_Y]})")
+            print(f"Map ID: {memory[MAP_ID]}")
+            print(f"Text box active: {memory[TEXT_BOX_ACTIVE]}")
+        print_full_state("INITIAL STATE")
+        print("\n=== HYPOTHESIS TESTING ===")
+        print("Theory: We're in overworld near an interactive object (PC, sign, NPC)")
+        print("The 'menu' indicators are persistent UI from that object")
+        print("Moving away should clear the interface")
+        # Try movement in all directions to get away from whatever we're interacting with
+        directions = ["LEFT", "RIGHT", "UP", "DOWN"]
+        for direction in directions:
+            print(f"\nTesting {direction} movement (multiple presses)...")
+            # Try movement multiple times - sometimes Pokemon Red needs multiple presses
+            for attempt in range(3):
+                initial_pos = (memory[PLAYER_X], memory[PLAYER_Y])
+                # Use the engine's retry method for movement
+                success = engine._press_button_with_retry(direction, frames=1, max_attempts=5)
+                new_pos = (memory[PLAYER_X], memory[PLAYER_Y])
+                print(f"  Attempt {attempt + 1}: {initial_pos} -> {new_pos}, Success: {success}")
+                if new_pos != initial_pos:
+                    print("  ✅ Movement successful!")
+                    print_full_state(f"After {direction} movement")
+                    # Check if menu state cleared
+                    if memory[0xCC28] == 0 or (memory[0xCC25] == 0 and memory[0xCC24] == 0):
+                        print("  🎉 MENU STATE CLEARED! Interface closed by moving away.")
+                        return
+                    else:
+                        print("  Menu state still persistent after movement")
+                    break
+                else:
+                    print("  ❌ No movement occurred")
+            # Small delay between direction tests
+            for _ in range(5):
+                engine.emulator.tick()
+        print("\n=== FINAL ANALYSIS ===")
+        print_full_state("FINAL STATE")
+        final_pos = (memory[PLAYER_X], memory[PLAYER_Y])
+        final_menu_last = memory[0xCC28]
+        final_cursor = (memory[0xCC25], memory[0xCC24])
+        print("\nMovement summary:")
+        print("  Started at: (3, 6) or (3, 7)")
+        print(f"  Ended at: {final_pos}")
+        print(f"  Menu indicators: last={final_menu_last}, cursor={final_cursor}")
+        if final_menu_last == 3 and final_cursor[0] > 0:
+            print("\n🔍 CONCLUSION: This is NOT a bug!")
+            print("The 'menu state' appears to be:")
+            print("1. A persistent overworld UI element")
+            print("2. Possibly related to the game's initial state")
+            print("3. Not an actual menu that can/should be closed with B")
+            print("4. Normal game behavior in this location")
+            print("\n💡 SOLUTION for the agent:")
+            print("1. Don't treat menu_state=1 as 'menu is open'")
+            print("2. Remove the B button retry logic for 'menu closing'")
+            print("3. Focus on actual game progression instead")
+            print("4. Use movement and A button for interactions")
+        else:
+            print("\n✅ Menu state changed - the interface was interactive")
+    except Exception as e:
+        print(f"Error during movement test: {e}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    # Run the tests directly for debugging
+    asyncio.run(test_menu_close_bug_reproduction())
+    asyncio.run(test_engine_button_press_behavior())
+    asyncio.run(test_menu_close_bug_fix_verification())
+    asyncio.run(test_engine_direct_button_retry())
+    asyncio.run(test_low_level_button_debug())
+    asyncio.run(test_menu_state_investigation())
+    asyncio.run(test_comprehensive_menu_interaction())
+    asyncio.run(test_movement_away_from_interface())

synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

synth-ai 0.2.0py3-none-any.whl → 0.2.1.dev0py3-none-any.whl