thoughtflow 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ """
2
+ OpenAI adapter for ThoughtFlow.
3
+
4
+ Provides integration with OpenAI's API (GPT-4, GPT-3.5, etc.)
5
+
6
+ Requires: pip install thoughtflow[openai]
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ from thoughtflow.adapters.base import Adapter, AdapterConfig, AdapterResponse
14
+
15
+ if TYPE_CHECKING:
16
+ from thoughtflow.message import MessageList
17
+
18
+
19
+ class OpenAIAdapter(Adapter):
20
+ """Adapter for OpenAI's API.
21
+
22
+ Supports GPT-4, GPT-3.5-turbo, and other OpenAI models.
23
+
24
+ Example:
25
+ >>> adapter = OpenAIAdapter(api_key="sk-...")
26
+ >>> response = adapter.complete([
27
+ ... {"role": "user", "content": "Hello!"}
28
+ ... ])
29
+ >>> print(response.content)
30
+
31
+ Attributes:
32
+ config: Adapter configuration.
33
+ client: OpenAI client instance (created lazily).
34
+ """
35
+
36
+ DEFAULT_MODEL = "gpt-4o"
37
+
38
+ def __init__(
39
+ self,
40
+ api_key: str | None = None,
41
+ config: AdapterConfig | None = None,
42
+ **kwargs: Any,
43
+ ) -> None:
44
+ """Initialize the OpenAI adapter.
45
+
46
+ Args:
47
+ api_key: OpenAI API key. Can also be set via OPENAI_API_KEY env var.
48
+ config: Full adapter configuration.
49
+ **kwargs: Additional config options.
50
+ """
51
+ if config is None:
52
+ config = AdapterConfig(api_key=api_key, **kwargs)
53
+ super().__init__(config)
54
+ self._client = None
55
+
56
+ @property
57
+ def client(self) -> Any:
58
+ """Lazy-load the OpenAI client.
59
+
60
+ Returns:
61
+ OpenAI client instance.
62
+
63
+ Raises:
64
+ ImportError: If openai package is not installed.
65
+ """
66
+ if self._client is None:
67
+ try:
68
+ from openai import OpenAI
69
+ except ImportError as e:
70
+ raise ImportError(
71
+ "OpenAI package not installed. "
72
+ "Install with: pip install thoughtflow[openai]"
73
+ ) from e
74
+
75
+ self._client = OpenAI(
76
+ api_key=self.config.api_key,
77
+ base_url=self.config.base_url,
78
+ timeout=self.config.timeout,
79
+ max_retries=self.config.max_retries,
80
+ )
81
+ return self._client
82
+
83
+ def complete(
84
+ self,
85
+ messages: MessageList,
86
+ params: dict[str, Any] | None = None,
87
+ ) -> AdapterResponse:
88
+ """Generate a completion using OpenAI's API.
89
+
90
+ Args:
91
+ messages: List of message dicts.
92
+ params: Optional parameters (model, temperature, max_tokens, etc.)
93
+
94
+ Returns:
95
+ AdapterResponse with the generated content.
96
+
97
+ Raises:
98
+ NotImplementedError: This is a placeholder implementation.
99
+ """
100
+ # TODO: Implement actual OpenAI API call
101
+ raise NotImplementedError(
102
+ "OpenAIAdapter.complete() is not yet implemented. "
103
+ "This is a placeholder for the ThoughtFlow alpha release."
104
+ )
105
+
106
+ def get_capabilities(self) -> dict[str, Any]:
107
+ """Get OpenAI adapter capabilities.
108
+
109
+ Returns:
110
+ Dict of supported features.
111
+ """
112
+ return {
113
+ "streaming": True,
114
+ "tool_calling": True,
115
+ "vision": True,
116
+ "json_mode": True,
117
+ "seed": True,
118
+ }
thoughtflow/agent.py ADDED
@@ -0,0 +1,147 @@
1
+ """
2
+ Core Agent contract for ThoughtFlow.
3
+
4
+ The Agent is the fundamental primitive - something that can be called
5
+ with messages and parameters. Everything else is composition.
6
+
7
+ Example:
8
+ >>> adapter = OpenAIAdapter(api_key="...")
9
+ >>> agent = Agent(adapter)
10
+ >>> response = agent.call([{"role": "user", "content": "Hello"}])
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
16
+
17
+ if TYPE_CHECKING:
18
+ from thoughtflow.adapters.base import Adapter
19
+ from thoughtflow.message import MessageList
20
+ from thoughtflow.trace.session import Session
21
+
22
+
23
+ @runtime_checkable
24
+ class AgentProtocol(Protocol):
25
+ """Protocol defining the Agent contract.
26
+
27
+ Any object implementing `call(msg_list, params)` is an Agent.
28
+ """
29
+
30
+ def call(
31
+ self,
32
+ msg_list: MessageList,
33
+ params: dict[str, Any] | None = None,
34
+ ) -> str:
35
+ """Call the agent with a message list.
36
+
37
+ Args:
38
+ msg_list: List of messages in the conversation.
39
+ params: Optional parameters (temperature, max_tokens, etc.)
40
+
41
+ Returns:
42
+ The agent's response as a string.
43
+ """
44
+ ...
45
+
46
+
47
+ class Agent:
48
+ """Base Agent implementation.
49
+
50
+ An Agent wraps an adapter and provides a simple `call` interface.
51
+ This is the core primitive of ThoughtFlow - explicit, composable, testable.
52
+
53
+ Attributes:
54
+ adapter: The provider adapter to use for completions.
55
+
56
+ Example:
57
+ >>> from thoughtflow import Agent
58
+ >>> from thoughtflow.adapters import OpenAIAdapter
59
+ >>>
60
+ >>> agent = Agent(OpenAIAdapter(api_key="..."))
61
+ >>> response = agent.call([
62
+ ... {"role": "system", "content": "You are helpful."},
63
+ ... {"role": "user", "content": "Hello!"}
64
+ ... ])
65
+ """
66
+
67
+ def __init__(self, adapter: Adapter) -> None:
68
+ """Initialize the Agent with an adapter.
69
+
70
+ Args:
71
+ adapter: The provider adapter for making LLM calls.
72
+ """
73
+ self.adapter = adapter
74
+
75
+ def call(
76
+ self,
77
+ msg_list: MessageList,
78
+ params: dict[str, Any] | None = None,
79
+ session: Session | None = None,
80
+ ) -> str:
81
+ """Call the agent with a message list.
82
+
83
+ Args:
84
+ msg_list: List of message dicts with 'role' and 'content' keys.
85
+ params: Optional parameters (temperature, max_tokens, seed, etc.)
86
+ session: Optional Session for tracing the call.
87
+
88
+ Returns:
89
+ The model's response as a string.
90
+
91
+ Raises:
92
+ NotImplementedError: This is a placeholder implementation.
93
+ """
94
+ # TODO: Implement actual adapter call
95
+ # TODO: Add session tracing
96
+ raise NotImplementedError(
97
+ "Agent.call() is not yet implemented. "
98
+ "This is a placeholder for the ThoughtFlow alpha release."
99
+ )
100
+
101
+
102
+ class TracedAgent:
103
+ """Agent wrapper that automatically traces all calls.
104
+
105
+ Wraps any Agent and records inputs, outputs, timing, and metadata
106
+ to a Session object for debugging, evaluation, and replay.
107
+
108
+ Example:
109
+ >>> from thoughtflow.trace import Session
110
+ >>> session = Session()
111
+ >>> traced = TracedAgent(agent, session)
112
+ >>> response = traced.call(messages)
113
+ >>> print(session.events) # See all recorded events
114
+ """
115
+
116
+ def __init__(self, agent: Agent, session: Session) -> None:
117
+ """Initialize TracedAgent.
118
+
119
+ Args:
120
+ agent: The underlying agent to wrap.
121
+ session: The session to record traces to.
122
+ """
123
+ self.agent = agent
124
+ self.session = session
125
+
126
+ def call(
127
+ self,
128
+ msg_list: MessageList,
129
+ params: dict[str, Any] | None = None,
130
+ ) -> str:
131
+ """Call the agent and trace the execution.
132
+
133
+ Args:
134
+ msg_list: List of messages.
135
+ params: Optional parameters.
136
+
137
+ Returns:
138
+ The agent's response.
139
+
140
+ Raises:
141
+ NotImplementedError: This is a placeholder implementation.
142
+ """
143
+ # TODO: Implement tracing wrapper
144
+ raise NotImplementedError(
145
+ "TracedAgent.call() is not yet implemented. "
146
+ "This is a placeholder for the ThoughtFlow alpha release."
147
+ )
@@ -0,0 +1,34 @@
1
+ """
2
+ Evaluation utilities for ThoughtFlow.
3
+
4
+ Deterministic evaluation is a first-class constraint in ThoughtFlow.
5
+ This module provides utilities for:
6
+ - Record/replay workflows
7
+ - Golden tests (expected response shape/constraints)
8
+ - Prompt/version pinning
9
+ - Stable metrics extraction from traces
10
+
11
+ Example:
12
+ >>> from thoughtflow.eval import Replay, Harness
13
+ >>>
14
+ >>> # Record a session
15
+ >>> session = agent.call(messages, record=True)
16
+ >>> session.save("golden.json")
17
+ >>>
18
+ >>> # Replay and compare
19
+ >>> replay = Replay.load("golden.json")
20
+ >>> results = replay.run(agent)
21
+ >>> assert results.matches_expected()
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ from thoughtflow.eval.replay import Replay
27
+ from thoughtflow.eval.harness import Harness, TestCase, TestResult
28
+
29
+ __all__ = [
30
+ "Replay",
31
+ "Harness",
32
+ "TestCase",
33
+ "TestResult",
34
+ ]
@@ -0,0 +1,200 @@
1
+ """
2
+ Test harness for ThoughtFlow evaluations.
3
+
4
+ Provides structured test cases and evaluation harnesses for
5
+ systematic agent testing.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import TYPE_CHECKING, Any, Callable
12
+
13
+ if TYPE_CHECKING:
14
+ from thoughtflow.agent import Agent
15
+ from thoughtflow.message import MessageList
16
+
17
+
18
+ @dataclass
19
+ class TestCase:
20
+ """A single test case for agent evaluation.
21
+
22
+ Attributes:
23
+ name: Human-readable name for the test.
24
+ messages: Input messages for the test.
25
+ params: Optional call parameters.
26
+ expected: Expected response (exact match or callable validator).
27
+ tags: Tags for filtering/grouping tests.
28
+ metadata: Additional test metadata.
29
+ """
30
+
31
+ name: str
32
+ messages: MessageList
33
+ params: dict[str, Any] | None = None
34
+ expected: str | Callable[[str], bool] | None = None
35
+ tags: list[str] = field(default_factory=list)
36
+ metadata: dict[str, Any] = field(default_factory=dict)
37
+
38
+ def validate(self, response: str) -> bool:
39
+ """Validate a response against expectations.
40
+
41
+ Args:
42
+ response: The agent's response.
43
+
44
+ Returns:
45
+ True if valid, False otherwise.
46
+ """
47
+ if self.expected is None:
48
+ return True
49
+ if callable(self.expected):
50
+ return self.expected(response)
51
+ return response == self.expected
52
+
53
+
54
+ @dataclass
55
+ class TestResult:
56
+ """Result of running a test case.
57
+
58
+ Attributes:
59
+ test_case: The test case that was run.
60
+ passed: Whether the test passed.
61
+ response: The agent's response.
62
+ error: Error message if the test failed.
63
+ duration_ms: How long the test took.
64
+ metadata: Additional result metadata.
65
+ """
66
+
67
+ test_case: TestCase
68
+ passed: bool
69
+ response: str | None = None
70
+ error: str | None = None
71
+ duration_ms: int | None = None
72
+ metadata: dict[str, Any] = field(default_factory=dict)
73
+
74
+
75
+ class Harness:
76
+ """Test harness for running evaluation suites.
77
+
78
+ The Harness provides a structured way to:
79
+ - Define test cases
80
+ - Run them against agents
81
+ - Collect and analyze results
82
+
83
+ Example:
84
+ >>> harness = Harness()
85
+ >>>
86
+ >>> # Add test cases
87
+ >>> harness.add(TestCase(
88
+ ... name="greeting",
89
+ ... messages=[{"role": "user", "content": "Hello!"}],
90
+ ... expected=lambda r: "hello" in r.lower()
91
+ ... ))
92
+ >>>
93
+ >>> # Run all tests
94
+ >>> results = harness.run(agent)
95
+ >>>
96
+ >>> # Check results
97
+ >>> print(f"Passed: {results.passed_count}/{results.total_count}")
98
+ """
99
+
100
+ def __init__(self) -> None:
101
+ """Initialize an empty harness."""
102
+ self.test_cases: list[TestCase] = []
103
+
104
+ def add(self, test_case: TestCase) -> None:
105
+ """Add a test case to the harness.
106
+
107
+ Args:
108
+ test_case: The test case to add.
109
+ """
110
+ self.test_cases.append(test_case)
111
+
112
+ def add_many(self, test_cases: list[TestCase]) -> None:
113
+ """Add multiple test cases.
114
+
115
+ Args:
116
+ test_cases: List of test cases to add.
117
+ """
118
+ self.test_cases.extend(test_cases)
119
+
120
+ def filter_by_tags(self, tags: list[str]) -> list[TestCase]:
121
+ """Filter test cases by tags.
122
+
123
+ Args:
124
+ tags: Tags to filter by.
125
+
126
+ Returns:
127
+ Test cases matching any of the specified tags.
128
+ """
129
+ return [tc for tc in self.test_cases if any(t in tc.tags for t in tags)]
130
+
131
+ def run(
132
+ self,
133
+ agent: Agent,
134
+ filter_tags: list[str] | None = None,
135
+ ) -> HarnessResults:
136
+ """Run all test cases against an agent.
137
+
138
+ Args:
139
+ agent: The agent to test.
140
+ filter_tags: Optional tags to filter which tests to run.
141
+
142
+ Returns:
143
+ HarnessResults with all test results.
144
+
145
+ Raises:
146
+ NotImplementedError: This is a placeholder implementation.
147
+ """
148
+ # TODO: Implement test execution
149
+ raise NotImplementedError(
150
+ "Harness.run() is not yet implemented. "
151
+ "This is a placeholder for the ThoughtFlow alpha release."
152
+ )
153
+
154
+
155
+ @dataclass
156
+ class HarnessResults:
157
+ """Results from running a test harness.
158
+
159
+ Attributes:
160
+ results: Individual test results.
161
+ metadata: Harness-level metadata.
162
+ """
163
+
164
+ results: list[TestResult] = field(default_factory=list)
165
+ metadata: dict[str, Any] = field(default_factory=dict)
166
+
167
+ @property
168
+ def total_count(self) -> int:
169
+ """Total number of tests run."""
170
+ return len(self.results)
171
+
172
+ @property
173
+ def passed_count(self) -> int:
174
+ """Number of tests that passed."""
175
+ return sum(1 for r in self.results if r.passed)
176
+
177
+ @property
178
+ def failed_count(self) -> int:
179
+ """Number of tests that failed."""
180
+ return self.total_count - self.passed_count
181
+
182
+ @property
183
+ def pass_rate(self) -> float:
184
+ """Percentage of tests that passed."""
185
+ if self.total_count == 0:
186
+ return 0.0
187
+ return self.passed_count / self.total_count
188
+
189
+ def summary(self) -> dict[str, Any]:
190
+ """Get a summary of the results.
191
+
192
+ Returns:
193
+ Dict with summary statistics.
194
+ """
195
+ return {
196
+ "total": self.total_count,
197
+ "passed": self.passed_count,
198
+ "failed": self.failed_count,
199
+ "pass_rate": self.pass_rate,
200
+ }
@@ -0,0 +1,137 @@
1
+ """
2
+ Record/replay functionality for ThoughtFlow.
3
+
4
+ Replay enables deterministic testing by recording agent runs and
5
+ replaying them with mocked responses.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ if TYPE_CHECKING:
16
+ from thoughtflow.agent import Agent
17
+ from thoughtflow.trace.session import Session
18
+
19
+
20
+ @dataclass
21
+ class ReplayResult:
22
+ """Result of a replay run.
23
+
24
+ Attributes:
25
+ success: Whether the replay succeeded.
26
+ original_response: The recorded response.
27
+ replayed_response: The response from the replay.
28
+ differences: List of differences found.
29
+ metadata: Additional result metadata.
30
+ """
31
+
32
+ success: bool
33
+ original_response: str | None = None
34
+ replayed_response: str | None = None
35
+ differences: list[str] = field(default_factory=list)
36
+ metadata: dict[str, Any] = field(default_factory=dict)
37
+
38
+
39
+ class Replay:
40
+ """Replay recorded sessions for testing.
41
+
42
+ Replay allows you to:
43
+ - Record agent runs to files
44
+ - Replay them with mocked model responses
45
+ - Compare outputs for regression testing
46
+ - Test without hitting live APIs
47
+
48
+ Example:
49
+ >>> # Save a session for replay
50
+ >>> session = Session()
51
+ >>> response = agent.call(messages, session=session)
52
+ >>> Replay.save(session, "test_case.json")
53
+ >>>
54
+ >>> # Later: replay the session
55
+ >>> replay = Replay.load("test_case.json")
56
+ >>> result = replay.run(agent)
57
+ >>>
58
+ >>> assert result.success
59
+ >>> assert result.replayed_response == result.original_response
60
+ """
61
+
62
+ def __init__(self, session_data: dict[str, Any]) -> None:
63
+ """Initialize a Replay from session data.
64
+
65
+ Args:
66
+ session_data: Recorded session data.
67
+ """
68
+ self.session_data = session_data
69
+ self._inputs = self._extract_inputs()
70
+ self._expected_outputs = self._extract_outputs()
71
+
72
+ def _extract_inputs(self) -> list[dict[str, Any]]:
73
+ """Extract input messages from session data.
74
+
75
+ Returns:
76
+ List of input message dicts.
77
+ """
78
+ inputs = []
79
+ for event in self.session_data.get("events", []):
80
+ if event.get("event_type") == "call_start":
81
+ inputs.append(event.get("data", {}).get("messages", []))
82
+ return inputs
83
+
84
+ def _extract_outputs(self) -> list[str]:
85
+ """Extract expected outputs from session data.
86
+
87
+ Returns:
88
+ List of expected response strings.
89
+ """
90
+ outputs = []
91
+ for event in self.session_data.get("events", []):
92
+ if event.get("event_type") == "call_end":
93
+ outputs.append(event.get("data", {}).get("response", ""))
94
+ return outputs
95
+
96
+ def run(self, agent: Agent) -> ReplayResult:
97
+ """Run the replay against an agent.
98
+
99
+ Args:
100
+ agent: The agent to test.
101
+
102
+ Returns:
103
+ ReplayResult with comparison data.
104
+
105
+ Raises:
106
+ NotImplementedError: This is a placeholder implementation.
107
+ """
108
+ # TODO: Implement replay with mocked adapter responses
109
+ raise NotImplementedError(
110
+ "Replay.run() is not yet implemented. "
111
+ "This is a placeholder for the ThoughtFlow alpha release."
112
+ )
113
+
114
+ @classmethod
115
+ def load(cls, path: str | Path) -> Replay:
116
+ """Load a replay from a JSON file.
117
+
118
+ Args:
119
+ path: Path to the replay file.
120
+
121
+ Returns:
122
+ Replay instance.
123
+ """
124
+ path = Path(path)
125
+ data = json.loads(path.read_text())
126
+ return cls(data)
127
+
128
+ @staticmethod
129
+ def save(session: Session, path: str | Path) -> None:
130
+ """Save a session for replay.
131
+
132
+ Args:
133
+ session: The session to save.
134
+ path: Path to save to.
135
+ """
136
+ path = Path(path)
137
+ path.write_text(json.dumps(session.to_dict(), indent=2))
@@ -0,0 +1,27 @@
1
+ """
2
+ Memory hooks for ThoughtFlow.
3
+
4
+ Memory integration is handled as a service boundary, not a magical built-in.
5
+ Memory is optional, pluggable, explicit at call-time, and recordable in traces.
6
+
7
+ Example:
8
+ >>> from thoughtflow.memory import MemoryHook
9
+ >>>
10
+ >>> class VectorMemory(MemoryHook):
11
+ ... def retrieve(self, query, k=5):
12
+ ... # Retrieve relevant memories
13
+ ... return memories
14
+ ...
15
+ ... def store(self, content, metadata=None):
16
+ ... # Store new memory
17
+ ... pass
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from thoughtflow.memory.base import MemoryHook, MemoryEvent
23
+
24
+ __all__ = [
25
+ "MemoryHook",
26
+ "MemoryEvent",
27
+ ]