PyPI - synth-ai - Versions diffs - 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl - Mend

synth-ai 0.1.9py3-none-any.whl → 0.2.1.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (266) hide show

synth_ai/environments/examples/tictactoe/taskset.py ADDED Viewed

@@ -0,0 +1,214 @@
+from __future__ import annotations
+import random
+from dataclasses import dataclass
+from typing import List
+import numpy as np
+from uuid import uuid4
+from synth_ai.environments.tasks.core import (
+    TaskInstance,
+    TaskInstanceMetadata,
+    TaskInstanceSet,
+    Impetus,
+    Intent,
+    SplitInfo,
+)
+from .engine import COORD_TO_IDX, WIN_PATTERNS, PLAYER_MARKS
+@dataclass
+class TicTacToeTaskInstanceMetadata(TaskInstanceMetadata):
+    starting_player: str  # "X" or "O"
+    opening_moves: List[str]  # Pre-made moves to create position
+    optimal_outcome: str  # "win", "draw", "loss" for starting player
+    position_complexity: int  # Number of pre-moves made
+    shortest_win_length: int  # Min moves to force win/draw
+@dataclass
+class TicTacToeTaskInstance(TaskInstance):
+    async def serialize(self) -> dict:
+        return {
+            "id": str(self.id),
+            "impetus": {"instructions": self.impetus.instructions},
+            "intent": {
+                "rubric": self.intent.rubric,
+                "gold_trajectories": self.intent.gold_trajectories,
+                "gold_state_diff": self.intent.gold_state_diff,
+            },
+            "metadata": {
+                "starting_player": self.metadata.starting_player,
+                "opening_moves": self.metadata.opening_moves,
+                "optimal_outcome": self.metadata.optimal_outcome,
+                "position_complexity": self.metadata.position_complexity,
+                "shortest_win_length": self.metadata.shortest_win_length,
+            },
+            "is_reproducible": self.is_reproducible,
+            "initial_engine_snapshot": self.initial_engine_snapshot,
+        }
+    @classmethod
+    async def deserialize(cls, data: dict) -> "TicTacToeTaskInstance":
+        from uuid import UUID
+        metadata = TicTacToeTaskInstanceMetadata(
+            starting_player=data["metadata"]["starting_player"],
+            opening_moves=data["metadata"]["opening_moves"],
+            optimal_outcome=data["metadata"]["optimal_outcome"],
+            position_complexity=data["metadata"]["position_complexity"],
+            shortest_win_length=data["metadata"]["shortest_win_length"],
+        )
+        return cls(
+            id=UUID(data["id"]),
+            impetus=Impetus(instructions=data["impetus"]["instructions"]),
+            intent=Intent(
+                rubric=data["intent"]["rubric"],
+                gold_trajectories=data["intent"]["gold_trajectories"],
+                gold_state_diff=data["intent"]["gold_state_diff"],
+            ),
+            metadata=metadata,
+            is_reproducible=data["is_reproducible"],
+            initial_engine_snapshot=data["initial_engine_snapshot"],
+        )
+def _evaluate_position(board: np.ndarray, player: int) -> str:
+    """Simple evaluation of position outcome with perfect play"""
+    # Check for immediate win
+    for pattern in WIN_PATTERNS:
+        values = [board[i] for i in pattern]
+        if values.count(player) == 3:
+            return "win"
+        if values.count(3 - player) == 3:
+            return "loss"
+    # Check if board is full
+    if np.all(board != 0):
+        return "draw"
+    # For simplicity, assume draw for non-terminal positions
+    # In a real implementation, this would use minimax
+    return "draw"
+def _count_shortest_win(board: np.ndarray, player: int) -> int:
+    """Count minimum moves to force a win/draw"""
+    # Simplified: return remaining empty cells
+    empty_cells = sum(1 for i in range(9) if board[i] == 0)
+    return max(1, empty_cells // 2)
+async def create_tictactoe_taskset() -> TaskInstanceSet:
+    """Generate diverse TicTacToe starting positions"""
+    instances = []
+    # Configuration for different position types
+    POSITION_CONFIGS = {
+        "opening": {"pre_moves": 0, "count": 10},  # Fresh games
+        "early": {"pre_moves": 1, "count": 15},  # After 1 move
+        "mid": {"pre_moves": 2, "count": 15},  # After 2 moves
+        "complex": {"pre_moves": 3, "count": 10},  # After 3 moves
+    }
+    all_coords = list(COORD_TO_IDX.keys())
+    for config_name, config in POSITION_CONFIGS.items():
+        for i in range(config["count"]):
+            # Generate random opening moves
+            opening_moves = []
+            board = np.zeros(9, dtype=int)
+            current_player = "X"
+            # Make pre-moves
+            available_coords = all_coords.copy()
+            for move_idx in range(config["pre_moves"]):
+                if not available_coords:
+                    break
+                # Random move
+                move = random.choice(available_coords)
+                opening_moves.append(move)
+                available_coords.remove(move)
+                # Update board
+                board[COORD_TO_IDX[move]] = PLAYER_MARKS[current_player]
+                current_player = "O" if current_player == "X" else "X"
+            # Evaluate position
+            starting_player = current_player
+            optimal_outcome = _evaluate_position(board, PLAYER_MARKS[starting_player])
+            shortest_win = _count_shortest_win(board, PLAYER_MARKS[starting_player])
+            # Create metadata
+            metadata = TicTacToeTaskInstanceMetadata(
+                starting_player=starting_player,
+                opening_moves=opening_moves,
+                optimal_outcome=optimal_outcome,
+                position_complexity=config["pre_moves"],
+                shortest_win_length=shortest_win,
+            )
+            # Create instance
+            impetus = Impetus(
+                instructions=(
+                    f"You are playing TicTacToe as {starting_player}. "
+                    + "The game is played on a 3x3 grid with cells labeled A1-A3, B1-B3, C1-C3. "
+                    + (
+                        f"The game has already had {len(opening_moves)} moves."
+                        if opening_moves
+                        else "This is a fresh game."
+                    )
+                    + f" You must place your mark ({starting_player}) in an empty cell. "
+                    + "Win by getting three of your marks in a row (horizontally, vertically, or diagonally)."
+                )
+            )
+            intent = Intent(
+                rubric={"goal": f"Win the game as {starting_player}, or at least force a draw"},
+                gold_trajectories=None,
+                gold_state_diff={"optimal_outcome": optimal_outcome},
+            )
+            instance = TicTacToeTaskInstance(
+                id=uuid4(),
+                impetus=impetus,
+                intent=intent,
+                metadata=metadata,
+                is_reproducible=True,
+                initial_engine_snapshot=None,
+            )
+            instances.append(instance)
+    # Shuffle instances
+    random.shuffle(instances)
+    # Define splits based on complexity
+    val_ids = {inst.id for inst in instances if inst.metadata.position_complexity == 1}
+    test_ids = {inst.id for inst in instances if inst.metadata.position_complexity >= 2}
+    # If not enough instances for splits, use simple division
+    if len(val_ids) == 0 or len(test_ids) == 0:
+        total = len(instances)
+        val_end = int(total * 0.15)
+        test_end = int(total * 0.30)
+        val_ids = {instances[i].id for i in range(val_end)}
+        test_ids = {instances[i].id for i in range(val_end, test_end)}
+    split_info = SplitInfo(
+        val_instance_ids=val_ids, test_instance_ids=test_ids, _is_split_defined=True
+    )
+    return TaskInstanceSet(
+        name="TicTacToe Procedural TaskSet",
+        description="Procedurally generated TicTacToe tasks with varying starting positions.",
+        instances=instances,
+        split_info=split_info,
+    )
+# Make taskset available as module attribute
+taskset = create_tictactoe_taskset

synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py ADDED Viewed

@@ -0,0 +1,393 @@
+import pytest
+import numpy as np
+from uuid import uuid4
+from synth_ai.environments.tasks.core import TaskInstance, Impetus, Intent
+from synth_ai.environments.examples.tictactoe.engine import (
+    TicTacToeEngine,
+    TicTacToePublicState,
+    TicTacToePrivateState,
+    TicTacToeWinComponent,
+    TicTacToeDrawComponent,
+    TicTacToeIllegalMoveComponent,
+    COORD_TO_IDX,
+    IDX_TO_COORD,
+    WIN_PATTERNS,
+    PLAYER_MARKS,
+    MARK_TO_PLAYER,
+)
+from synth_ai.environments.examples.tictactoe.taskset import (
+    TicTacToeTaskInstance,
+    TicTacToeTaskInstanceMetadata,
+)
+@pytest.fixture
+def simple_task_instance():
+    """Create a simple task instance for testing."""
+    metadata = TicTacToeTaskInstanceMetadata(
+        starting_player="X",
+        opening_moves=[],
+        optimal_outcome="draw",
+        position_complexity=0,
+        shortest_win_length=5,
+    )
+    return TicTacToeTaskInstance(
+        id=uuid4(),
+        impetus=Impetus(instructions="Test TicTacToe game"),
+        intent=Intent(rubric={"goal": "Test game"}, gold_trajectories=None, gold_state_diff={}),
+        metadata=metadata,
+        is_reproducible=True,
+        initial_engine_snapshot=None,
+    )
+@pytest.fixture
+def task_with_premoves():
+    """Create a task instance with pre-moves."""
+    metadata = TicTacToeTaskInstanceMetadata(
+        starting_player="O",
+        opening_moves=["A1", "B2"],
+        optimal_outcome="win",
+        position_complexity=2,
+        shortest_win_length=3,
+    )
+    return TicTacToeTaskInstance(
+        id=uuid4(),
+        impetus=Impetus(instructions="Test TicTacToe with premoves"),
+        intent=Intent(rubric={"goal": "Test game"}, gold_trajectories=None, gold_state_diff={}),
+        metadata=metadata,
+        is_reproducible=True,
+        initial_engine_snapshot=None,
+    )
+class TestTicTacToeEngine:
+    @pytest.mark.asyncio
+    async def test_engine_initialization(self, simple_task_instance):
+        """Test engine initializes correctly."""
+        engine = TicTacToeEngine(simple_task_instance)
+        assert engine.current_player == "X"
+        assert engine.move_count == 0
+        assert engine.winner is None
+        assert not engine.terminated
+        assert engine.total_reward == 0.0
+        assert np.all(engine.board == 0)
+    @pytest.mark.asyncio
+    async def test_engine_with_premoves(self, task_with_premoves):
+        """Test engine applies pre-moves correctly."""
+        engine = TicTacToeEngine(task_with_premoves)
+        # Check pre-moves were applied
+        assert engine.board[COORD_TO_IDX["A1"]] == PLAYER_MARKS["X"]
+        assert engine.board[COORD_TO_IDX["B2"]] == PLAYER_MARKS["O"]
+        assert engine.current_player == "X"  # After 2 moves, back to X
+        assert engine.move_count == 2
+    @pytest.mark.asyncio
+    async def test_reset_engine(self, simple_task_instance):
+        """Test engine reset functionality."""
+        engine = TicTacToeEngine(simple_task_instance)
+        # Make a move first
+        await engine._step_engine("B2")
+        # Reset
+        priv, pub = await engine._reset_engine()
+        assert pub.current_player == "X"
+        assert pub.move_count == 0
+        assert pub.winner is None
+        assert not pub.terminated
+        assert np.all(pub.board == 0)
+        assert priv.total_reward == 0.0
+    @pytest.mark.asyncio
+    async def test_valid_moves(self, simple_task_instance):
+        """Test making valid moves."""
+        engine = TicTacToeEngine(simple_task_instance)
+        # Make first move
+        priv, pub = await engine._step_engine("B2")
+        assert pub.last_move == "B2"
+        assert pub.board[COORD_TO_IDX["B2"]] == PLAYER_MARKS["X"]
+        assert pub.current_player == "O"
+        assert pub.move_count == 1
+        assert not pub.terminated
+        # Make second move
+        priv, pub = await engine._step_engine("A1")
+        assert pub.last_move == "A1"
+        assert pub.board[COORD_TO_IDX["A1"]] == PLAYER_MARKS["O"]
+        assert pub.current_player == "X"
+        assert pub.move_count == 2
+    @pytest.mark.asyncio
+    async def test_invalid_moves(self, simple_task_instance):
+        """Test handling of invalid moves."""
+        engine = TicTacToeEngine(simple_task_instance)
+        # Make a valid move first
+        await engine._step_engine("B2")
+        # Try to make move in occupied cell
+        priv, pub = await engine._step_engine("B2")
+        assert pub.terminated
+        assert priv.reward_last == -1.0  # Illegal move penalty
+        # Test invalid coordinate
+        engine = TicTacToeEngine(simple_task_instance)
+        priv, pub = await engine._step_engine("Z9")
+        assert pub.terminated
+        assert priv.reward_last == -1.0
+    @pytest.mark.asyncio
+    async def test_win_detection_row(self, simple_task_instance):
+        """Test detecting wins in rows."""
+        engine = TicTacToeEngine(simple_task_instance)
+        # X wins in top row
+        await engine._step_engine("A1")  # X
+        await engine._step_engine("B1")  # O
+        await engine._step_engine("A2")  # X
+        await engine._step_engine("B2")  # O
+        priv, pub = await engine._step_engine("A3")  # X wins
+        assert pub.winner == "X"
+        assert pub.terminated
+        assert priv.reward_last == 1.0  # Win reward
+    @pytest.mark.asyncio
+    async def test_win_detection_column(self, simple_task_instance):
+        """Test detecting wins in columns."""
+        engine = TicTacToeEngine(simple_task_instance)
+        # X wins in first column
+        await engine._step_engine("A1")  # X
+        await engine._step_engine("A2")  # O
+        await engine._step_engine("B1")  # X
+        await engine._step_engine("B2")  # O
+        priv, pub = await engine._step_engine("C1")  # X wins
+        assert pub.winner == "X"
+        assert pub.terminated
+    @pytest.mark.asyncio
+    async def test_win_detection_diagonal(self, simple_task_instance):
+        """Test detecting wins in diagonals."""
+        engine = TicTacToeEngine(simple_task_instance)
+        # X wins in main diagonal
+        await engine._step_engine("A1")  # X
+        await engine._step_engine("A2")  # O
+        await engine._step_engine("B2")  # X
+        await engine._step_engine("B1")  # O
+        priv, pub = await engine._step_engine("C3")  # X wins
+        assert pub.winner == "X"
+        assert pub.terminated
+    @pytest.mark.asyncio
+    async def test_draw_detection(self, simple_task_instance):
+        """Test detecting draws."""
+        engine = TicTacToeEngine(simple_task_instance)
+        # Play a game that ends in draw
+        moves = ["A1", "B2", "A2", "A3", "B3", "B1", "C1", "C3", "C2"]
+        for move in moves:
+            priv, pub = await engine._step_engine(move)
+        assert pub.winner == "draw"
+        assert pub.terminated
+        assert pub.move_count == 9
+        assert priv.reward_last == 0.0  # Draw reward
+    @pytest.mark.asyncio
+    async def test_board_text_representation(self, simple_task_instance):
+        """Test board text representation."""
+        engine = TicTacToeEngine(simple_task_instance)
+        await engine._step_engine("B2")
+        await engine._step_engine("A1")
+        priv, pub = engine.get_current_states_for_observation()
+        board_text = pub.board_text
+        assert "  A B C" in board_text
+        assert "1 O    " in board_text
+        assert "2   X  " in board_text
+        assert "3      " in board_text
+    @pytest.mark.asyncio
+    async def test_serialization(self, simple_task_instance):
+        """Test engine serialization and deserialization."""
+        engine = TicTacToeEngine(simple_task_instance)
+        # Make some moves
+        await engine._step_engine("B2")
+        await engine._step_engine("A1")
+        # Serialize
+        snapshot = await engine._serialize_engine()
+        assert snapshot.engine_snapshot["current_player"] == "X"
+        assert snapshot.engine_snapshot["move_count"] == 2
+        assert snapshot.engine_snapshot["last_move"] == "A1"
+        # Deserialize
+        restored_engine = await TicTacToeEngine._deserialize_engine(snapshot)
+        assert restored_engine.current_player == engine.current_player
+        assert restored_engine.move_count == engine.move_count
+        assert np.array_equal(restored_engine.board, engine.board)
+    @pytest.mark.asyncio
+    async def test_state_diff(self, simple_task_instance):
+        """Test state diff functionality."""
+        engine = TicTacToeEngine(simple_task_instance)
+        priv1, pub1 = await engine._reset_engine()
+        priv2, pub2 = await engine._step_engine("B2")
+        # Test public state diff
+        diff = pub2.diff(pub1)
+        assert "board" in diff
+        assert "current_player" in diff
+        assert "last_move" in diff
+        assert "move_count" in diff
+        # Test private state diff
+        priv_diff = priv2.diff(priv1)
+        # reward_last might be 0.0 in both states, so it won't appear in diff
+        # Check that diff works by modifying reward
+        priv2.reward_last = 1.0
+        priv_diff = priv2.diff(priv1)
+        assert "reward_last" in priv_diff
+class TestRewardComponents:
+    @pytest.mark.asyncio
+    async def test_win_component(self):
+        """Test win reward component."""
+        component = TicTacToeWinComponent(player_mark="X")
+        # Test win for X
+        state = TicTacToePublicState(
+            board=np.zeros(9),
+            current_player="O",
+            last_move="A3",
+            winner="X",
+            move_count=5,
+            max_moves=9,
+            terminated=True,
+        )
+        score = await component.score(state, "A3")
+        assert score == 1.0
+        # Test loss (O wins)
+        state.winner = "O"
+        score = await component.score(state, "A3")
+        assert score == -1.0
+        # Test no winner yet
+        state.winner = None
+        score = await component.score(state, "A3")
+        assert score == 0.0
+    @pytest.mark.asyncio
+    async def test_draw_component(self):
+        """Test draw reward component."""
+        component = TicTacToeDrawComponent()
+        state = TicTacToePublicState(
+            board=np.ones(9),
+            current_player="X",
+            last_move="C3",
+            winner="draw",
+            move_count=9,
+            max_moves=9,
+            terminated=True,
+        )
+        score = await component.score(state, "C3")
+        assert score == 0.0
+        # Test non-draw
+        state.winner = "X"
+        score = await component.score(state, "C3")
+        assert score == 0.0
+    @pytest.mark.asyncio
+    async def test_illegal_move_component(self):
+        """Test illegal move reward component."""
+        component = TicTacToeIllegalMoveComponent()
+        state = TicTacToePublicState(
+            board=np.zeros(9),
+            current_player="X",
+            last_move="A1",
+            winner=None,
+            move_count=1,
+            max_moves=9,
+            terminated=False,
+        )
+        # Test no illegal move
+        score = await component.score(state, "A1")
+        assert score == 0.0
+        # Test illegal move
+        component.illegal_move_attempted = True
+        score = await component.score(state, "A1")
+        assert score == -1.0
+        assert not component.illegal_move_attempted  # Should reset
+class TestConstants:
+    def test_coordinate_mappings(self):
+        """Test coordinate to index mappings."""
+        assert len(COORD_TO_IDX) == 9
+        assert len(IDX_TO_COORD) == 9
+        # Test all coordinates map correctly
+        for coord, idx in COORD_TO_IDX.items():
+            assert IDX_TO_COORD[idx] == coord
+        # Test specific mappings
+        assert COORD_TO_IDX["A1"] == 0
+        assert COORD_TO_IDX["B2"] == 4
+        assert COORD_TO_IDX["C3"] == 8
+    def test_win_patterns(self):
+        """Test win patterns cover all possibilities."""
+        assert len(WIN_PATTERNS) == 8  # 3 rows, 3 cols, 2 diagonals
+        # Test rows
+        assert [0, 1, 2] in WIN_PATTERNS
+        assert [3, 4, 5] in WIN_PATTERNS
+        assert [6, 7, 8] in WIN_PATTERNS
+        # Test columns
+        assert [0, 3, 6] in WIN_PATTERNS
+        assert [1, 4, 7] in WIN_PATTERNS
+        assert [2, 5, 8] in WIN_PATTERNS
+        # Test diagonals
+        assert [0, 4, 8] in WIN_PATTERNS
+        assert [2, 4, 6] in WIN_PATTERNS
+    def test_player_mappings(self):
+        """Test player mark mappings."""
+        assert PLAYER_MARKS["X"] == 1
+        assert PLAYER_MARKS["O"] == 2
+        assert MARK_TO_PLAYER[0] == " "
+        assert MARK_TO_PLAYER[1] == "X"
+        assert MARK_TO_PLAYER[2] == "O"

synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

synth-ai 0.1.9py3-none-any.whl → 0.2.1.dev0py3-none-any.whl