PyPI - thoughtflow - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl - Mend

thoughtflow 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

thoughtflow/__init__.py +54 -5
thoughtflow/_util.py +108 -0
thoughtflow/adapters/__init__.py +43 -0
thoughtflow/adapters/anthropic.py +119 -0
thoughtflow/adapters/base.py +140 -0
thoughtflow/adapters/local.py +133 -0
thoughtflow/adapters/openai.py +118 -0
thoughtflow/agent.py +147 -0
thoughtflow/eval/__init__.py +34 -0
thoughtflow/eval/harness.py +200 -0
thoughtflow/eval/replay.py +137 -0
thoughtflow/memory/__init__.py +27 -0
thoughtflow/memory/base.py +142 -0
thoughtflow/message.py +140 -0
thoughtflow/py.typed +2 -0
thoughtflow/tools/__init__.py +27 -0
thoughtflow/tools/base.py +145 -0
thoughtflow/tools/registry.py +122 -0
thoughtflow/trace/__init__.py +34 -0
thoughtflow/trace/events.py +183 -0
thoughtflow/trace/schema.py +111 -0
thoughtflow/trace/session.py +141 -0
thoughtflow-0.0.2.dist-info/METADATA +215 -0
thoughtflow-0.0.2.dist-info/RECORD +26 -0
{thoughtflow-0.0.1.dist-info → thoughtflow-0.0.2.dist-info}/WHEEL +1 -2
{thoughtflow-0.0.1.dist-info → thoughtflow-0.0.2.dist-info/licenses}/LICENSE +1 -1
thoughtflow/jtools1.py +0 -25
thoughtflow/jtools2.py +0 -27
thoughtflow-0.0.1.dist-info/METADATA +0 -17
thoughtflow-0.0.1.dist-info/RECORD +0 -8
thoughtflow-0.0.1.dist-info/top_level.txt +0 -1

thoughtflow/adapters/openai.py ADDED Viewed

@@ -0,0 +1,118 @@
+"""
+OpenAI adapter for ThoughtFlow.
+Provides integration with OpenAI's API (GPT-4, GPT-3.5, etc.)
+Requires: pip install thoughtflow[openai]
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+from thoughtflow.adapters.base import Adapter, AdapterConfig, AdapterResponse
+if TYPE_CHECKING:
+    from thoughtflow.message import MessageList
+class OpenAIAdapter(Adapter):
+    """Adapter for OpenAI's API.
+    Supports GPT-4, GPT-3.5-turbo, and other OpenAI models.
+    Example:
+        >>> adapter = OpenAIAdapter(api_key="sk-...")
+        >>> response = adapter.complete([
+        ...     {"role": "user", "content": "Hello!"}
+        ... ])
+        >>> print(response.content)
+    Attributes:
+        config: Adapter configuration.
+        client: OpenAI client instance (created lazily).
+    """
+    DEFAULT_MODEL = "gpt-4o"
+    def __init__(
+        self,
+        api_key: str | None = None,
+        config: AdapterConfig | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize the OpenAI adapter.
+        Args:
+            api_key: OpenAI API key. Can also be set via OPENAI_API_KEY env var.
+            config: Full adapter configuration.
+            **kwargs: Additional config options.
+        """
+        if config is None:
+            config = AdapterConfig(api_key=api_key, **kwargs)
+        super().__init__(config)
+        self._client = None
+    @property
+    def client(self) -> Any:
+        """Lazy-load the OpenAI client.
+        Returns:
+            OpenAI client instance.
+        Raises:
+            ImportError: If openai package is not installed.
+        """
+        if self._client is None:
+            try:
+                from openai import OpenAI
+            except ImportError as e:
+                raise ImportError(
+                    "OpenAI package not installed. "
+                    "Install with: pip install thoughtflow[openai]"
+                ) from e
+            self._client = OpenAI(
+                api_key=self.config.api_key,
+                base_url=self.config.base_url,
+                timeout=self.config.timeout,
+                max_retries=self.config.max_retries,
+            )
+        return self._client
+    def complete(
+        self,
+        messages: MessageList,
+        params: dict[str, Any] | None = None,
+    ) -> AdapterResponse:
+        """Generate a completion using OpenAI's API.
+        Args:
+            messages: List of message dicts.
+            params: Optional parameters (model, temperature, max_tokens, etc.)
+        Returns:
+            AdapterResponse with the generated content.
+        Raises:
+            NotImplementedError: This is a placeholder implementation.
+        """
+        # TODO: Implement actual OpenAI API call
+        raise NotImplementedError(
+            "OpenAIAdapter.complete() is not yet implemented. "
+            "This is a placeholder for the ThoughtFlow alpha release."
+        )
+    def get_capabilities(self) -> dict[str, Any]:
+        """Get OpenAI adapter capabilities.
+        Returns:
+            Dict of supported features.
+        """
+        return {
+            "streaming": True,
+            "tool_calling": True,
+            "vision": True,
+            "json_mode": True,
+            "seed": True,
+        }

thoughtflow/agent.py ADDED Viewed

@@ -0,0 +1,147 @@
+"""
+Core Agent contract for ThoughtFlow.
+The Agent is the fundamental primitive - something that can be called
+with messages and parameters. Everything else is composition.
+Example:
+    >>> adapter = OpenAIAdapter(api_key="...")
+    >>> agent = Agent(adapter)
+    >>> response = agent.call([{"role": "user", "content": "Hello"}])
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
+if TYPE_CHECKING:
+    from thoughtflow.adapters.base import Adapter
+    from thoughtflow.message import MessageList
+    from thoughtflow.trace.session import Session
+@runtime_checkable
+class AgentProtocol(Protocol):
+    """Protocol defining the Agent contract.
+    Any object implementing `call(msg_list, params)` is an Agent.
+    """
+    def call(
+        self,
+        msg_list: MessageList,
+        params: dict[str, Any] | None = None,
+    ) -> str:
+        """Call the agent with a message list.
+        Args:
+            msg_list: List of messages in the conversation.
+            params: Optional parameters (temperature, max_tokens, etc.)
+        Returns:
+            The agent's response as a string.
+        """
+        ...
+class Agent:
+    """Base Agent implementation.
+    An Agent wraps an adapter and provides a simple `call` interface.
+    This is the core primitive of ThoughtFlow - explicit, composable, testable.
+    Attributes:
+        adapter: The provider adapter to use for completions.
+    Example:
+        >>> from thoughtflow import Agent
+        >>> from thoughtflow.adapters import OpenAIAdapter
+        >>>
+        >>> agent = Agent(OpenAIAdapter(api_key="..."))
+        >>> response = agent.call([
+        ...     {"role": "system", "content": "You are helpful."},
+        ...     {"role": "user", "content": "Hello!"}
+        ... ])
+    """
+    def __init__(self, adapter: Adapter) -> None:
+        """Initialize the Agent with an adapter.
+        Args:
+            adapter: The provider adapter for making LLM calls.
+        """
+        self.adapter = adapter
+    def call(
+        self,
+        msg_list: MessageList,
+        params: dict[str, Any] | None = None,
+        session: Session | None = None,
+    ) -> str:
+        """Call the agent with a message list.
+        Args:
+            msg_list: List of message dicts with 'role' and 'content' keys.
+            params: Optional parameters (temperature, max_tokens, seed, etc.)
+            session: Optional Session for tracing the call.
+        Returns:
+            The model's response as a string.
+        Raises:
+            NotImplementedError: This is a placeholder implementation.
+        """
+        # TODO: Implement actual adapter call
+        # TODO: Add session tracing
+        raise NotImplementedError(
+            "Agent.call() is not yet implemented. "
+            "This is a placeholder for the ThoughtFlow alpha release."
+        )
+class TracedAgent:
+    """Agent wrapper that automatically traces all calls.
+    Wraps any Agent and records inputs, outputs, timing, and metadata
+    to a Session object for debugging, evaluation, and replay.
+    Example:
+        >>> from thoughtflow.trace import Session
+        >>> session = Session()
+        >>> traced = TracedAgent(agent, session)
+        >>> response = traced.call(messages)
+        >>> print(session.events)  # See all recorded events
+    """
+    def __init__(self, agent: Agent, session: Session) -> None:
+        """Initialize TracedAgent.
+        Args:
+            agent: The underlying agent to wrap.
+            session: The session to record traces to.
+        """
+        self.agent = agent
+        self.session = session
+    def call(
+        self,
+        msg_list: MessageList,
+        params: dict[str, Any] | None = None,
+    ) -> str:
+        """Call the agent and trace the execution.
+        Args:
+            msg_list: List of messages.
+            params: Optional parameters.
+        Returns:
+            The agent's response.
+        Raises:
+            NotImplementedError: This is a placeholder implementation.
+        """
+        # TODO: Implement tracing wrapper
+        raise NotImplementedError(
+            "TracedAgent.call() is not yet implemented. "
+            "This is a placeholder for the ThoughtFlow alpha release."
+        )

thoughtflow/eval/__init__.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""
+Evaluation utilities for ThoughtFlow.
+Deterministic evaluation is a first-class constraint in ThoughtFlow.
+This module provides utilities for:
+- Record/replay workflows
+- Golden tests (expected response shape/constraints)
+- Prompt/version pinning
+- Stable metrics extraction from traces
+Example:
+    >>> from thoughtflow.eval import Replay, Harness
+    >>>
+    >>> # Record a session
+    >>> session = agent.call(messages, record=True)
+    >>> session.save("golden.json")
+    >>>
+    >>> # Replay and compare
+    >>> replay = Replay.load("golden.json")
+    >>> results = replay.run(agent)
+    >>> assert results.matches_expected()
+"""
+from __future__ import annotations
+from thoughtflow.eval.replay import Replay
+from thoughtflow.eval.harness import Harness, TestCase, TestResult
+__all__ = [
+    "Replay",
+    "Harness",
+    "TestCase",
+    "TestResult",
+]

thoughtflow/eval/harness.py ADDED Viewed

@@ -0,0 +1,200 @@
+"""
+Test harness for ThoughtFlow evaluations.
+Provides structured test cases and evaluation harnesses for
+systematic agent testing.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Callable
+if TYPE_CHECKING:
+    from thoughtflow.agent import Agent
+    from thoughtflow.message import MessageList
+@dataclass
+class TestCase:
+    """A single test case for agent evaluation.
+    Attributes:
+        name: Human-readable name for the test.
+        messages: Input messages for the test.
+        params: Optional call parameters.
+        expected: Expected response (exact match or callable validator).
+        tags: Tags for filtering/grouping tests.
+        metadata: Additional test metadata.
+    """
+    name: str
+    messages: MessageList
+    params: dict[str, Any] | None = None
+    expected: str | Callable[[str], bool] | None = None
+    tags: list[str] = field(default_factory=list)
+    metadata: dict[str, Any] = field(default_factory=dict)
+    def validate(self, response: str) -> bool:
+        """Validate a response against expectations.
+        Args:
+            response: The agent's response.
+        Returns:
+            True if valid, False otherwise.
+        """
+        if self.expected is None:
+            return True
+        if callable(self.expected):
+            return self.expected(response)
+        return response == self.expected
+@dataclass
+class TestResult:
+    """Result of running a test case.
+    Attributes:
+        test_case: The test case that was run.
+        passed: Whether the test passed.
+        response: The agent's response.
+        error: Error message if the test failed.
+        duration_ms: How long the test took.
+        metadata: Additional result metadata.
+    """
+    test_case: TestCase
+    passed: bool
+    response: str | None = None
+    error: str | None = None
+    duration_ms: int | None = None
+    metadata: dict[str, Any] = field(default_factory=dict)
+class Harness:
+    """Test harness for running evaluation suites.
+    The Harness provides a structured way to:
+    - Define test cases
+    - Run them against agents
+    - Collect and analyze results
+    Example:
+        >>> harness = Harness()
+        >>>
+        >>> # Add test cases
+        >>> harness.add(TestCase(
+        ...     name="greeting",
+        ...     messages=[{"role": "user", "content": "Hello!"}],
+        ...     expected=lambda r: "hello" in r.lower()
+        ... ))
+        >>>
+        >>> # Run all tests
+        >>> results = harness.run(agent)
+        >>>
+        >>> # Check results
+        >>> print(f"Passed: {results.passed_count}/{results.total_count}")
+    """
+    def __init__(self) -> None:
+        """Initialize an empty harness."""
+        self.test_cases: list[TestCase] = []
+    def add(self, test_case: TestCase) -> None:
+        """Add a test case to the harness.
+        Args:
+            test_case: The test case to add.
+        """
+        self.test_cases.append(test_case)
+    def add_many(self, test_cases: list[TestCase]) -> None:
+        """Add multiple test cases.
+        Args:
+            test_cases: List of test cases to add.
+        """
+        self.test_cases.extend(test_cases)
+    def filter_by_tags(self, tags: list[str]) -> list[TestCase]:
+        """Filter test cases by tags.
+        Args:
+            tags: Tags to filter by.
+        Returns:
+            Test cases matching any of the specified tags.
+        """
+        return [tc for tc in self.test_cases if any(t in tc.tags for t in tags)]
+    def run(
+        self,
+        agent: Agent,
+        filter_tags: list[str] | None = None,
+    ) -> HarnessResults:
+        """Run all test cases against an agent.
+        Args:
+            agent: The agent to test.
+            filter_tags: Optional tags to filter which tests to run.
+        Returns:
+            HarnessResults with all test results.
+        Raises:
+            NotImplementedError: This is a placeholder implementation.
+        """
+        # TODO: Implement test execution
+        raise NotImplementedError(
+            "Harness.run() is not yet implemented. "
+            "This is a placeholder for the ThoughtFlow alpha release."
+        )
+@dataclass
+class HarnessResults:
+    """Results from running a test harness.
+    Attributes:
+        results: Individual test results.
+        metadata: Harness-level metadata.
+    """
+    results: list[TestResult] = field(default_factory=list)
+    metadata: dict[str, Any] = field(default_factory=dict)
+    @property
+    def total_count(self) -> int:
+        """Total number of tests run."""
+        return len(self.results)
+    @property
+    def passed_count(self) -> int:
+        """Number of tests that passed."""
+        return sum(1 for r in self.results if r.passed)
+    @property
+    def failed_count(self) -> int:
+        """Number of tests that failed."""
+        return self.total_count - self.passed_count
+    @property
+    def pass_rate(self) -> float:
+        """Percentage of tests that passed."""
+        if self.total_count == 0:
+            return 0.0
+        return self.passed_count / self.total_count
+    def summary(self) -> dict[str, Any]:
+        """Get a summary of the results.
+        Returns:
+            Dict with summary statistics.
+        """
+        return {
+            "total": self.total_count,
+            "passed": self.passed_count,
+            "failed": self.failed_count,
+            "pass_rate": self.pass_rate,
+        }

thoughtflow/eval/replay.py ADDED Viewed

@@ -0,0 +1,137 @@
+"""
+Record/replay functionality for ThoughtFlow.
+Replay enables deterministic testing by recording agent runs and
+replaying them with mocked responses.
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from thoughtflow.agent import Agent
+    from thoughtflow.trace.session import Session
+@dataclass
+class ReplayResult:
+    """Result of a replay run.
+    Attributes:
+        success: Whether the replay succeeded.
+        original_response: The recorded response.
+        replayed_response: The response from the replay.
+        differences: List of differences found.
+        metadata: Additional result metadata.
+    """
+    success: bool
+    original_response: str | None = None
+    replayed_response: str | None = None
+    differences: list[str] = field(default_factory=list)
+    metadata: dict[str, Any] = field(default_factory=dict)
+class Replay:
+    """Replay recorded sessions for testing.
+    Replay allows you to:
+    - Record agent runs to files
+    - Replay them with mocked model responses
+    - Compare outputs for regression testing
+    - Test without hitting live APIs
+    Example:
+        >>> # Save a session for replay
+        >>> session = Session()
+        >>> response = agent.call(messages, session=session)
+        >>> Replay.save(session, "test_case.json")
+        >>>
+        >>> # Later: replay the session
+        >>> replay = Replay.load("test_case.json")
+        >>> result = replay.run(agent)
+        >>>
+        >>> assert result.success
+        >>> assert result.replayed_response == result.original_response
+    """
+    def __init__(self, session_data: dict[str, Any]) -> None:
+        """Initialize a Replay from session data.
+        Args:
+            session_data: Recorded session data.
+        """
+        self.session_data = session_data
+        self._inputs = self._extract_inputs()
+        self._expected_outputs = self._extract_outputs()
+    def _extract_inputs(self) -> list[dict[str, Any]]:
+        """Extract input messages from session data.
+        Returns:
+            List of input message dicts.
+        """
+        inputs = []
+        for event in self.session_data.get("events", []):
+            if event.get("event_type") == "call_start":
+                inputs.append(event.get("data", {}).get("messages", []))
+        return inputs
+    def _extract_outputs(self) -> list[str]:
+        """Extract expected outputs from session data.
+        Returns:
+            List of expected response strings.
+        """
+        outputs = []
+        for event in self.session_data.get("events", []):
+            if event.get("event_type") == "call_end":
+                outputs.append(event.get("data", {}).get("response", ""))
+        return outputs
+    def run(self, agent: Agent) -> ReplayResult:
+        """Run the replay against an agent.
+        Args:
+            agent: The agent to test.
+        Returns:
+            ReplayResult with comparison data.
+        Raises:
+            NotImplementedError: This is a placeholder implementation.
+        """
+        # TODO: Implement replay with mocked adapter responses
+        raise NotImplementedError(
+            "Replay.run() is not yet implemented. "
+            "This is a placeholder for the ThoughtFlow alpha release."
+        )
+    @classmethod
+    def load(cls, path: str | Path) -> Replay:
+        """Load a replay from a JSON file.
+        Args:
+            path: Path to the replay file.
+        Returns:
+            Replay instance.
+        """
+        path = Path(path)
+        data = json.loads(path.read_text())
+        return cls(data)
+    @staticmethod
+    def save(session: Session, path: str | Path) -> None:
+        """Save a session for replay.
+        Args:
+            session: The session to save.
+            path: Path to save to.
+        """
+        path = Path(path)
+        path.write_text(json.dumps(session.to_dict(), indent=2))

thoughtflow/memory/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""
+Memory hooks for ThoughtFlow.
+Memory integration is handled as a service boundary, not a magical built-in.
+Memory is optional, pluggable, explicit at call-time, and recordable in traces.
+Example:
+    >>> from thoughtflow.memory import MemoryHook
+    >>>
+    >>> class VectorMemory(MemoryHook):
+    ...     def retrieve(self, query, k=5):
+    ...         # Retrieve relevant memories
+    ...         return memories
+    ...
+    ...     def store(self, content, metadata=None):
+    ...         # Store new memory
+    ...         pass
+"""
+from __future__ import annotations
+from thoughtflow.memory.base import MemoryHook, MemoryEvent
+__all__ = [
+    "MemoryHook",
+    "MemoryEvent",
+]

thoughtflow 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

thoughtflow 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl