PyPI - synth-ai - Versions diffs - 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl - Mend

synth-ai 0.1.9py3-none-any.whl → 0.2.1.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (266) hide show

synth_ai/environments/examples/minigrid/units/test_action_behavior.py ADDED Viewed

@@ -0,0 +1,226 @@
+"""Test actual action behavior to debug left/right turn issues."""
+import pytest
+from synth_ai.environments.examples.minigrid.environment import MiniGridEnvironment
+from synth_ai.environments.examples.minigrid.taskset import DEFAULT_MINIGRID_TASK
+@pytest.mark.asyncio
+async def test_initial_state():
+    """Test the initial state of the agent."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    obs = await env.initialize()
+    # Check initial state
+    assert "Agent Position: (1, 1)" in obs["observation"]
+    assert "Agent Direction: →" in obs["observation"]
+    print(f"✓ Initial state verified: position (1,1), direction →")
+@pytest.mark.asyncio
+async def test_right_turn_action():
+    """Test what happens when we send 'right' action."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    await env.initialize()
+    # Send 'right' action
+    tool_call = {"tool": "minigrid_act", "args": {"action": "right"}}
+    obs = await env.step(tool_call)
+    # Extract direction from observation
+    obs_text = obs["observation"]
+    if "Agent Direction: ↓" in obs_text:
+        actual_dir = "↓ (down)"
+        expected = True
+    elif "Agent Direction: ↑" in obs_text:
+        actual_dir = "↑ (up)"
+        expected = False
+    elif "Agent Direction: ←" in obs_text:
+        actual_dir = "← (left)"
+        expected = False
+    elif "Agent Direction: →" in obs_text:
+        actual_dir = "→ (right)"
+        expected = False
+    else:
+        actual_dir = "unknown"
+        expected = False
+    print(f"RIGHT action result: {actual_dir}")
+    print(f"Expected: ↓ (down) for clockwise turn")
+    print(f"✓ RIGHT turn working correctly: {expected}")
+    assert expected, f"RIGHT turn failed: expected ↓ (down), got {actual_dir}"
+@pytest.mark.asyncio
+async def test_left_turn_action():
+    """Test what happens when we send 'left' action."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    await env.initialize()
+    # Send 'left' action
+    tool_call = {"tool": "minigrid_act", "args": {"action": "left"}}
+    obs = await env.step(tool_call)
+    # Extract direction from observation
+    obs_text = obs["observation"]
+    if "Agent Direction: ↑" in obs_text:
+        actual_dir = "↑ (up)"
+        expected = True
+    elif "Agent Direction: ↓" in obs_text:
+        actual_dir = "↓ (down)"
+        expected = False
+    elif "Agent Direction: ←" in obs_text:
+        actual_dir = "← (left)"
+        expected = False
+    elif "Agent Direction: →" in obs_text:
+        actual_dir = "→ (right)"
+        expected = False
+    else:
+        actual_dir = "unknown"
+        expected = False
+    print(f"LEFT action result: {actual_dir}")
+    print(f"Expected: ↑ (up) for counter-clockwise turn")
+    print(f"✓ LEFT turn working correctly: {expected}")
+    assert expected, f"LEFT turn failed: expected ↑ (up), got {actual_dir}"
+@pytest.mark.asyncio
+async def test_full_rotation_sequence():
+    """Test a full sequence of turns to verify direction logic."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    await env.initialize()
+    directions = []
+    # Get initial direction
+    obs = await env.checkpoint()
+    if "Agent Direction: →" in obs["observation"]:
+        directions.append("→")
+    # Do 4 right turns (should return to initial direction)
+    for i in range(4):
+        tool_call = {"tool": "minigrid_act", "args": {"action": "right"}}
+        obs = await env.step(tool_call)
+        # Extract direction
+        obs_text = obs["observation"]
+        if "Agent Direction: ↓" in obs_text:
+            directions.append("↓")
+        elif "Agent Direction: ←" in obs_text:
+            directions.append("←")
+        elif "Agent Direction: ↑" in obs_text:
+            directions.append("↑")
+        elif "Agent Direction: →" in obs_text:
+            directions.append("→")
+    print(f"Full rotation sequence: {' -> '.join(directions)}")
+    print(f"Expected clockwise: → -> ↓ -> ← -> ↑ -> →")
+    expected_sequence = ["→", "↓", "←", "↑", "→"]
+    assert directions == expected_sequence, f"Rotation sequence wrong: {directions}"
+@pytest.mark.asyncio
+async def test_forward_movement():
+    """Test forward movement in different directions."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    await env.initialize()
+    # Test forward when facing right (initial direction)
+    tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
+    obs = await env.step(tool_call)
+    # Should move from (1,1) to (2,1)
+    assert "Agent Position: (2, 1)" in obs["observation"]
+    print("✓ Forward movement verified: (1,1) -> (2,1)")
+    # Move to (3,1)
+    obs = await env.step(tool_call)
+    assert "Agent Position: (3, 1)" in obs["observation"]
+    print("✓ Forward movement verified: (2,1) -> (3,1)")
+    # Try to move forward again (should hit wall)
+    obs = await env.step(tool_call)
+    assert "Agent Position: (3, 1)" in obs["observation"]  # Should stay at (3,1)
+    assert obs.get("last_action_result") == "blocked_by_wall"
+    print("✓ Wall blocking verified: stayed at (3,1)")
+@pytest.mark.asyncio
+async def test_turn_then_move_sequence():
+    """Test the critical sequence: move to (3,1), turn right, then move toward goal."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    await env.initialize()
+    # Move to (3,1)
+    forward_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
+    await env.step(forward_call)  # (1,1) -> (2,1)
+    await env.step(forward_call)  # (2,1) -> (3,1)
+    # Verify at (3,1) facing right
+    obs = await env.checkpoint()
+    assert "Agent Position: (3, 1)" in obs["observation"]
+    assert "Agent Direction: →" in obs["observation"]
+    print("✓ At position (3,1) facing right")
+    # Turn right (should face down toward goal)
+    right_call = {"tool": "minigrid_act", "args": {"action": "right"}}
+    obs = await env.step(right_call)
+    direction_after_right = None
+    if "Agent Direction: ↓" in obs["observation"]:
+        direction_after_right = "↓ (down)"
+        facing_goal = True
+    elif "Agent Direction: ↑" in obs["observation"]:
+        direction_after_right = "↑ (up)"
+        facing_goal = False
+    else:
+        direction_after_right = "other"
+        facing_goal = False
+    print(f"After RIGHT turn at (3,1): facing {direction_after_right}")
+    print(f"Goal is at (3,3), so agent should face ↓ (down)")
+    print(f"✓ Facing toward goal: {facing_goal}")
+    # If facing down, try to move toward goal
+    if facing_goal:
+        obs = await env.step(forward_call)
+        if "Agent Position: (3, 2)" in obs["observation"]:
+            print("✓ Successfully moved toward goal: (3,1) -> (3,2)")
+            # Try to reach goal
+            obs = await env.step(forward_call)
+            if "Agent Position: (3, 3)" in obs["observation"]:
+                print("✓ SUCCESS: Reached goal at (3,3)!")
+                return True
+    return False
+if __name__ == "__main__":
+    import asyncio
+    async def run_tests():
+        print("=== TESTING MINIGRID ACTION BEHAVIOR ===")
+        try:
+            await test_initial_state()
+            await test_right_turn_action()
+            await test_left_turn_action()
+            await test_full_rotation_sequence()
+            await test_forward_movement()
+            success = await test_turn_then_move_sequence()
+            print(f"\n=== SUMMARY ===")
+            print(f"Goal reached successfully: {success}")
+        except Exception as e:
+            print(f"❌ Test failed: {e}")
+            import traceback
+            traceback.print_exc()
+    asyncio.run(run_tests())

synth_ai/environments/examples/minigrid/units/test_debug_messages.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""Test debug message functionality in MiniGrid."""
+import pytest
+import asyncio
+from synth_ai.environments.examples.minigrid.environment import MiniGridEnvironment
+from synth_ai.environments.examples.minigrid.taskset import DEFAULT_MINIGRID_TASK
+@pytest.mark.asyncio
+async def test_debug_messages_on_movement():
+    """Test that debug messages are properly generated on movement."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    obs = await env.initialize()
+    # Test successful forward movement
+    tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
+    obs = await env.step(tool_call)
+    assert "debug_message" in obs
+    assert "last_action" in obs
+    assert "last_action_result" in obs
+    assert obs["last_action"] == "forward"
+    assert obs["last_action_result"] == "moved"
+    assert "Moved forward" in obs["debug_message"]
+@pytest.mark.asyncio
+async def test_debug_messages_on_blocked_movement():
+    """Test debug messages when movement is blocked."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    obs = await env.initialize()
+    # Move to a position where we'll be blocked
+    # Move right twice to reach the edge
+    for _ in range(2):
+        tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
+        obs = await env.step(tool_call)
+    # Now try to move forward again - should be blocked
+    tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
+    obs = await env.step(tool_call)
+    assert obs["last_action_result"] in ["blocked_by_wall", "blocked_by_boundary"]
+    assert "blocked" in obs["debug_message"].lower()
+@pytest.mark.asyncio
+async def test_debug_messages_on_turn():
+    """Test debug messages when turning."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    obs = await env.initialize()
+    # Test turning left
+    tool_call = {"tool": "minigrid_act", "args": {"action": "left"}}
+    obs = await env.step(tool_call)
+    assert obs["last_action"] == "left"
+    assert obs["last_action_result"] == "turned"
+    assert "Turned left" in obs["debug_message"]
+    # Test turning right
+    tool_call = {"tool": "minigrid_act", "args": {"action": "right"}}
+    obs = await env.step(tool_call)
+    assert obs["last_action"] == "right"
+    assert obs["last_action_result"] == "turned"
+    assert "Turned right" in obs["debug_message"]
+@pytest.mark.asyncio
+async def test_debug_messages_in_observation_text():
+    """Test that debug messages appear in the observation text."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    obs = await env.initialize()
+    # Move forward
+    tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
+    obs = await env.step(tool_call)
+    # Check that debug info appears in the text observation
+    observation_text = obs["observation"]
+    assert "Debug:" in observation_text
+    assert "Last action result:" in observation_text or obs["last_action_result"] == "moved"

synth_ai/environments/examples/minigrid/units/test_exploration.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Test exploration mechanics in MiniGrid."""
+import pytest
+import asyncio
+from synth_ai.environments.examples.minigrid.environment import MiniGridEnvironment
+from synth_ai.environments.examples.minigrid.taskset import DEFAULT_MINIGRID_TASK
+@pytest.mark.asyncio
+async def test_goal_not_always_visible():
+    """Test that the goal is not always visible initially."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    obs = await env.initialize()
+    # Check if 'G' appears in the actual grid (not in legend)
+    lines = obs["observation"].split("\n")
+    grid_lines = []
+    grid_started = False
+    for line in lines:
+        if "Grid:" in line:
+            grid_started = True
+        elif "Legend:" in line:
+            break
+        elif grid_started and line.strip():
+            grid_lines.append(line)
+    # The goal 'G' should not be visible in the initial 5x5 view
+    # (though this depends on the specific seed/layout)
+    grid_text = "\n".join(grid_lines)
+    # Goal might or might not be visible - this is expected
+    # The test is mainly to document this behavior
+    has_goal_in_grid = "G" in grid_text
+    assert isinstance(has_goal_in_grid, bool)  # Can be True or False
+@pytest.mark.asyncio
+async def test_limited_visibility():
+    """Test that the agent has limited visibility."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    obs = await env.initialize()
+    # Check that the grid contains '?' symbols indicating unseen areas
+    observation_text = obs["observation"]
+    assert "?" in observation_text
+    # The grid should be small (agent's view)
+    lines = observation_text.split("\n")
+    grid_lines = []
+    grid_started = False
+    for line in lines:
+        if "Grid:" in line:
+            grid_started = True
+        elif "Legend:" in line:
+            break
+        elif grid_started and line.strip():
+            grid_lines.append(line)
+    # In a 5x5 environment, the agent sees a 5x5 view
+    assert len(grid_lines) == 5
+@pytest.mark.asyncio
+async def test_exploration_reveals_new_areas():
+    """Test that moving reveals new areas of the grid."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    initial_obs = await env.initialize()
+    # Move to a new position
+    tool_call = {"tool": "minigrid_act", "args": {"action": "forward"}}
+    new_obs = await env.step(tool_call)
+    # The observations should be different (agent moved)
+    assert initial_obs["observation"] != new_obs["observation"]
+    # Agent position should have changed
+    initial_pos = None
+    new_pos = None
+    for line in initial_obs["observation"].split("\n"):
+        if "Agent Position:" in line:
+            initial_pos = line
+            break
+    for line in new_obs["observation"].split("\n"):
+        if "Agent Position:" in line:
+            new_pos = line
+            break
+    assert initial_pos != new_pos
+@pytest.mark.asyncio
+async def test_complete_exploration_finds_goal():
+    """Test that systematic exploration can find the goal."""
+    env = MiniGridEnvironment(DEFAULT_MINIGRID_TASK)
+    obs = await env.initialize()
+    # Known solution path for the default task
+    solution_path = [
+        "forward",  # Move right to (2,1)
+        "forward",  # Move right to (3,1)
+        "right",  # Turn to face down
+        "forward",  # Move down to (3,2)
+        "forward",  # Move down to (3,3) - goal
+    ]
+    for action in solution_path:
+        tool_call = {"tool": "minigrid_act", "args": {"action": action}}
+        obs = await env.step(tool_call)
+        if obs.get("terminated", False):
+            # Should have found the goal
+            assert obs.get("total_reward", 0) > 0
+            return
+    # If we didn't terminate, the test fails
+    assert False, "Failed to reach goal with known solution path"

synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""Unit tests for MiniGrid engine."""
+import asyncio
+import pytest
+import numpy as np
+from synth_ai.environments.examples.minigrid.engine import (
+    MiniGridEngine,
+    MiniGridPublicState,
+    MiniGridPrivateState,
+    MiniGridStepPenaltyComponent,
+)
+from synth_ai.environments.examples.minigrid.taskset import DEFAULT_MINIGRID_TASK
+@pytest.mark.asyncio
+async def test_engine_initialization():
+    """Test engine initialization."""
+    engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
+    # Check initial state
+    assert engine.env_name == "MiniGrid-Empty-5x5-v0"
+    assert engine.seed == 42
+    assert engine.total_reward == 0.0
+    assert not engine._initialized
+    # Reset engine
+    priv, pub = await engine._reset_engine()
+    # Check reset state
+    assert engine._initialized
+    assert isinstance(priv, MiniGridPrivateState)
+    assert isinstance(pub, MiniGridPublicState)
+    assert priv.terminated is False
+    assert priv.truncated is False
+    assert priv.total_reward == 0.0
+    assert pub.grid_array.shape == (5, 5, 3)
+    assert pub.agent_pos == (1, 1)  # Default starting position
+    assert pub.step_count == 0
+    assert pub.mission == "get to the green goal square"
+@pytest.mark.asyncio
+async def test_engine_step():
+    """Test engine step functionality."""
+    engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
+    await engine._reset_engine()
+    # Test moving forward
+    initial_pos = engine.env.unwrapped.agent_pos
+    priv, pub = await engine._step_engine(2)  # Forward action
+    # Check step results
+    assert isinstance(priv, MiniGridPrivateState)
+    assert isinstance(pub, MiniGridPublicState)
+    assert pub.step_count == 1
+    assert priv.reward_last == -0.01  # Step penalty
+    assert priv.total_reward == -0.01
+    # Test turning
+    initial_dir = pub.agent_dir
+    priv, pub = await engine._step_engine(0)  # Turn left
+    assert pub.agent_dir == (initial_dir - 1) % 4
+    assert pub.step_count == 2
+@pytest.mark.asyncio
+async def test_invalid_actions():
+    """Test invalid action handling."""
+    engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
+    await engine._reset_engine()
+    # Test invalid action values
+    with pytest.raises(ValueError, match="Invalid action"):
+        await engine._step_engine(-1)
+    with pytest.raises(ValueError, match="Invalid action"):
+        await engine._step_engine(7)
+    with pytest.raises(ValueError, match="Invalid action"):
+        await engine._step_engine("forward")
+@pytest.mark.asyncio
+async def test_grid_to_array():
+    """Test grid to array conversion."""
+    engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
+    await engine._reset_engine()
+    grid_array = engine._grid_to_array()
+    # Check array properties
+    assert isinstance(grid_array, np.ndarray)
+    assert grid_array.shape == (5, 5, 3)
+    assert grid_array.dtype == np.uint8
+    # Check agent is in the grid
+    agent_pos = engine.env.unwrapped.agent_pos
+    agent_cell = grid_array[agent_pos[1], agent_pos[0]]
+    assert agent_cell[0] == 9  # Agent object type
+@pytest.mark.asyncio
+async def test_state_diff():
+    """Test state diff functionality."""
+    engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
+    priv1, pub1 = await engine._reset_engine()
+    # Take a step
+    priv2, pub2 = await engine._step_engine(2)  # Forward
+    # Check public state diff
+    diff = pub2.diff(pub1)
+    assert "step_count" in diff
+    assert diff["step_count"] == 1
+    if pub1.agent_pos != pub2.agent_pos:
+        assert "agent_pos" in diff
+    # Check private state diff
+    priv_diff = priv2.diff(priv1)
+    assert "reward_last" in priv_diff
+    assert "total_reward" in priv_diff
+@pytest.mark.asyncio
+async def test_reward_components():
+    """Test reward components."""
+    component = MiniGridStepPenaltyComponent()
+    # Create a dummy state
+    from synth_ai.environments.examples.minigrid.engine import MiniGridPublicState
+    state = MiniGridPublicState(
+        grid_array=np.zeros((5, 5, 3)),
+        agent_pos=(1, 1),
+        agent_dir=0,
+        step_count=1,
+        max_steps=100,
+        mission="test",
+    )
+    # Test penalty
+    reward = await component.score(state, 2)
+    assert reward == -0.01
+@pytest.mark.asyncio
+async def test_serialization():
+    """Test engine serialization."""
+    engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
+    await engine._reset_engine()
+    # Take some steps
+    await engine._step_engine(2)
+    await engine._step_engine(1)
+    # Serialize
+    snapshot = await engine._serialize_engine()
+    # Check snapshot
+    assert snapshot.engine_snapshot["env_name"] == "MiniGrid-Empty-5x5-v0"
+    assert snapshot.engine_snapshot["seed"] == 42
+    assert snapshot.engine_snapshot["initialized"] is True
+    assert "total_reward" in snapshot.engine_snapshot
+@pytest.mark.asyncio
+async def test_get_available_actions():
+    """Test getting available actions."""
+    engine = MiniGridEngine(DEFAULT_MINIGRID_TASK)
+    actions = engine.get_available_actions()
+    assert len(actions) == 7
+    assert actions[0] == (0, "turn left")
+    assert actions[2] == (2, "move forward")
+    assert actions[3] == (3, "pickup")
+@pytest.mark.asyncio
+async def test_different_environments():
+    """Test different MiniGrid environments."""
+    from synth_ai.environments.examples.minigrid.taskset import (
+        MiniGridTaskInstance,
+        MiniGridTaskInstanceMetadata,
+    )
+    from synth_ai.environments.tasks.api import Impetus, Intent
+    from uuid import uuid4
+    # Test DoorKey environment
+    task = MiniGridTaskInstance(
+        id=uuid4(),
+        impetus=Impetus(instructions="Test"),
+        intent=Intent(rubric={"goal": "Test"}, gold_trajectories=None, gold_state_diff={}),
+        metadata=MiniGridTaskInstanceMetadata(
+            env_name="MiniGrid-DoorKey-5x5-v0",
+            grid_size=(5, 5),
+            difficulty="medium",
+            has_key=True,
+            has_door=True,
+        ),
+        is_reproducible=True,
+        initial_engine_snapshot=None,
+    )
+    engine = MiniGridEngine(task)
+    priv, pub = await engine._reset_engine()
+    # Check environment properties
+    assert pub.mission == "open the door then get to the goal"
+    assert pub.grid_array.shape == (5, 5, 3)
+if __name__ == "__main__":
+    asyncio.run(pytest.main([__file__, "-v"]))

synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

synth-ai 0.1.9py3-none-any.whl → 0.2.1.dev0py3-none-any.whl