thoughtflow 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thoughtflow/action.py ADDED
@@ -0,0 +1,357 @@
1
+ """
2
+ ACTION class for ThoughtFlow.
3
+
4
+ The ACTION class encapsulates an external or internal operation that can be invoked
5
+ within a Thoughtflow agent workflow.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+
12
+ from thoughtflow._util import event_stamp
13
+
14
+
15
+ class ACTION:
16
+ """
17
+ The ACTION class encapsulates an external or internal operation that can be invoked within a Thoughtflow agent.
18
+ It is designed to represent a single, named action (such as a tool call, API request, or function) whose result
19
+ is stored in the agent's state for later inspection, branching, or retry.
20
+
21
+ An ACTION represents a discrete, named operation (function, API call, tool invocation) that can be defined once
22
+ and executed multiple times with different parameters. When executed, the ACTION handles logging, error management,
23
+ and result storage in a consistent way.
24
+
25
+ Attributes:
26
+ name (str): Identifier for this action, used for logging and storing results.
27
+ id (str): Unique identifier for this action instance (event_stamp).
28
+ fn (callable): The function to execute when this action is called.
29
+ config (dict): Default configuration parameters that will be passed to the function.
30
+ result_key (str): Key where results are stored in memory (defaults to "{name}_result").
31
+ description (str): Human-readable description of what this action does.
32
+ last_result (Any): The most recent result from executing this action.
33
+ last_error (Exception): The most recent error from executing this action, if any.
34
+ execution_count (int): Number of times this action has been executed.
35
+ execution_history (list): Full execution history with timing and success/error tracking.
36
+
37
+ Methods:
38
+ __init__(name, fn, config=None, result_key=None, description=None):
39
+ Initializes an ACTION with a name, function, and optional configuration.
40
+
41
+ __call__(memory, **kwargs):
42
+ Executes the action function with the memory object and any override parameters.
43
+ The function receives (memory, **merged_kwargs) where merged_kwargs combines
44
+ self.config with any call-specific kwargs.
45
+
46
+ Returns the memory object with results stored via set_var.
47
+ Logs execution details with JSON-formatted event data.
48
+ Tracks execution timing and history.
49
+
50
+ Handles exceptions during execution by logging them rather than raising them,
51
+ allowing the workflow to continue and decide how to handle failures.
52
+
53
+ get_last_result():
54
+ Returns the most recent result from executing this action.
55
+
56
+ was_successful():
57
+ Returns True if the last execution was successful, False otherwise.
58
+
59
+ reset_stats():
60
+ Resets execution statistics (count, last_result, last_error, execution_history).
61
+
62
+ copy():
63
+ Returns a copy of this ACTION with a new ID and reset statistics.
64
+
65
+ to_dict():
66
+ Returns a serializable dictionary representation of this action.
67
+
68
+ from_dict(cls, data, fn_registry):
69
+ Class method to reconstruct an ACTION from a dictionary representation.
70
+
71
+ Example Usage:
72
+ # Define a web search action
73
+ def search_web(memory, query, max_results=3):
74
+ # Implementation of web search
75
+ results = web_api.search(query, limit=max_results)
76
+ return {"status": "success", "hits": results}
77
+
78
+ search_action = ACTION(
79
+ name="web_search",
80
+ fn=search_web,
81
+ config={"max_results": 5},
82
+ description="Searches the web for information"
83
+ )
84
+
85
+ # Execute the action
86
+ memory = MEMORY()
87
+ memory = search_action(memory, query="thoughtflow framework")
88
+
89
+ # Access results
90
+ result = memory.get_var("web_search_result")
91
+
92
+ # Check execution history
93
+ print(search_action.execution_history[-1]['duration_ms']) # Execution time
94
+ print(search_action.execution_history[-1]['success']) # True/False
95
+
96
+ Design Principles:
97
+ 1. Explicit and inspectable operations with consistent logging
98
+ 2. Predictable result storage via memory.set_var
99
+ 3. Error handling that doesn't interrupt workflow execution
100
+ 4. Composability with other Thoughtflow components (MEMORY, THOUGHT)
101
+ 5. Serialization support for reproducibility
102
+ 6. Full execution history with timing for debugging and optimization
103
+ """
104
+
105
+ def __init__(self, name, fn, config=None, result_key=None, description=None):
106
+ """
107
+ Initialize an ACTION with a name, function, and optional configuration.
108
+
109
+ Args:
110
+ name (str): Identifier for this action, used for logging and result storage.
111
+ fn (callable): The function to execute when this action is called.
112
+ config (dict, optional): Default configuration parameters passed to the function.
113
+ result_key (str, optional): Key where results are stored in memory (defaults to "{name}_result").
114
+ description (str, optional): Human-readable description of what this action does.
115
+ """
116
+ self.name = name
117
+ self.id = event_stamp() # Unique identifier for this action instance
118
+ self.fn = fn
119
+ self.config = config or {}
120
+ self.result_key = result_key or "{}_result".format(name)
121
+ self.description = description or "Action: {}".format(name)
122
+ self.last_result = None
123
+ self.last_error = None
124
+ self.execution_count = 0
125
+ self.execution_history = [] # Full execution tracking with timing
126
+
127
+ def __call__(self, memory, **kwargs):
128
+ """
129
+ Execute the action function with the memory object and any override parameters.
130
+
131
+ Args:
132
+ memory (MEMORY): The memory object to update with results.
133
+ **kwargs: Parameters that override the default config for this execution.
134
+
135
+ Returns:
136
+ MEMORY: The updated memory object with results stored in memory.vars[result_key].
137
+
138
+ Note:
139
+ The function receives (memory, **merged_kwargs) where merged_kwargs combines
140
+ self.config with any call-specific kwargs.
141
+
142
+ Exceptions during execution are logged rather than raised, allowing the
143
+ workflow to continue and decide how to handle failures.
144
+ """
145
+ import time as time_module
146
+
147
+ start_time = time_module.time()
148
+
149
+ # Merge default config with call-specific kwargs
150
+ merged_kwargs = {**self.config, **kwargs}
151
+ self.execution_count += 1
152
+
153
+ try:
154
+ # Execute the function
155
+ result = self.fn(memory, **merged_kwargs)
156
+ self.last_result = result
157
+ self.last_error = None
158
+
159
+ # Calculate execution duration
160
+ duration_ms = (time_module.time() - start_time) * 1000
161
+
162
+ # Store result in memory using set_var (correct API)
163
+ if hasattr(memory, "set_var") and callable(getattr(memory, "set_var", None)):
164
+ memory.set_var(self.result_key, result, desc="Result of action: {}".format(self.name))
165
+
166
+ # Build execution event for logging (JSON format like THOUGHT)
167
+ execution_event = {
168
+ 'action_name': self.name,
169
+ 'action_id': self.id,
170
+ 'status': 'success',
171
+ 'duration_ms': round(duration_ms, 2),
172
+ 'result_key': self.result_key
173
+ }
174
+
175
+ # Log successful execution (single message with JSON, no invalid details param)
176
+ if hasattr(memory, "add_log") and callable(getattr(memory, "add_log", None)):
177
+ memory.add_log("Action execution complete: " + json.dumps(execution_event))
178
+
179
+ # Track execution history
180
+ self.execution_history.append({
181
+ 'stamp': event_stamp(),
182
+ 'memory_id': getattr(memory, 'id', None),
183
+ 'duration_ms': duration_ms,
184
+ 'success': True,
185
+ 'error': None
186
+ })
187
+
188
+ except Exception as e:
189
+ # Handle and log exceptions
190
+ self.last_error = e
191
+
192
+ # Calculate execution duration
193
+ duration_ms = (time_module.time() - start_time) * 1000
194
+
195
+ # Build error event for logging
196
+ error_event = {
197
+ 'action_name': self.name,
198
+ 'action_id': self.id,
199
+ 'status': 'error',
200
+ 'error': str(e),
201
+ 'duration_ms': round(duration_ms, 2),
202
+ 'result_key': self.result_key
203
+ }
204
+
205
+ # Log failed execution (single message with JSON)
206
+ if hasattr(memory, "add_log") and callable(getattr(memory, "add_log", None)):
207
+ memory.add_log("Action execution failed: " + json.dumps(error_event))
208
+
209
+ # Store error info in memory using set_var
210
+ if hasattr(memory, "set_var") and callable(getattr(memory, "set_var", None)):
211
+ memory.set_var(self.result_key, error_event, desc="Error in action: {}".format(self.name))
212
+
213
+ # Track execution history
214
+ self.execution_history.append({
215
+ 'stamp': event_stamp(),
216
+ 'memory_id': getattr(memory, 'id', None),
217
+ 'duration_ms': duration_ms,
218
+ 'success': False,
219
+ 'error': str(e)
220
+ })
221
+
222
+ return memory
223
+
224
+ def get_last_result(self):
225
+ """
226
+ Returns the most recent result from executing this action.
227
+
228
+ Returns:
229
+ Any: The last result or None if the action hasn't been executed.
230
+ """
231
+ return self.last_result
232
+
233
+ def was_successful(self):
234
+ """
235
+ Returns True if the last execution was successful, False otherwise.
236
+
237
+ Returns:
238
+ bool: True if the last execution completed without errors, False otherwise.
239
+ """
240
+ return self.last_error is None and self.execution_count > 0
241
+
242
+ def reset_stats(self):
243
+ """
244
+ Resets execution statistics (count, last_result, last_error, execution_history).
245
+
246
+ Returns:
247
+ ACTION: Self for method chaining.
248
+ """
249
+ self.execution_count = 0
250
+ self.last_result = None
251
+ self.last_error = None
252
+ self.execution_history = []
253
+ return self
254
+
255
+ def copy(self):
256
+ """
257
+ Return a copy of this ACTION with a new ID.
258
+
259
+ The function reference is shared (same callable), but config is copied.
260
+ Execution statistics are reset in the copy.
261
+
262
+ Returns:
263
+ ACTION: A new ACTION instance with copied attributes and new ID.
264
+ """
265
+ new_action = ACTION(
266
+ name=self.name,
267
+ fn=self.fn, # Same function reference
268
+ config=self.config.copy() if self.config else None,
269
+ result_key=self.result_key,
270
+ description=self.description
271
+ )
272
+ # New ID is already assigned in __init__, no need to set it
273
+ return new_action
274
+
275
+ def to_dict(self):
276
+ """
277
+ Returns a serializable dictionary representation of this action.
278
+
279
+ Note: The function itself cannot be serialized, so it's represented by name.
280
+ When deserializing, a function registry must be provided.
281
+
282
+ Returns:
283
+ dict: Serializable representation of this action.
284
+ """
285
+ return {
286
+ "name": self.name,
287
+ "id": self.id,
288
+ "fn_name": self.fn.__name__,
289
+ "config": self.config,
290
+ "result_key": self.result_key,
291
+ "description": self.description,
292
+ "execution_count": self.execution_count,
293
+ "execution_history": self.execution_history
294
+ }
295
+
296
+ @classmethod
297
+ def from_dict(cls, data, fn_registry):
298
+ """
299
+ Reconstruct an ACTION from a dictionary representation.
300
+
301
+ Args:
302
+ data (dict): Dictionary representation of an ACTION.
303
+ fn_registry (dict): Dictionary mapping function names to function objects.
304
+
305
+ Returns:
306
+ ACTION: Reconstructed ACTION object.
307
+
308
+ Raises:
309
+ KeyError: If the function name is not found in the registry.
310
+ """
311
+ if data["fn_name"] not in fn_registry:
312
+ raise KeyError("Function '{}' not found in registry".format(data['fn_name']))
313
+
314
+ action = cls(
315
+ name=data["name"],
316
+ fn=fn_registry[data["fn_name"]],
317
+ config=data["config"],
318
+ result_key=data["result_key"],
319
+ description=data["description"]
320
+ )
321
+ # Restore ID if provided, otherwise keep the new one from __init__
322
+ if data.get("id"):
323
+ action.id = data["id"]
324
+ action.execution_count = data.get("execution_count", 0)
325
+ action.execution_history = data.get("execution_history", [])
326
+ return action
327
+
328
+ def __str__(self):
329
+ """
330
+ Returns a string representation of this action.
331
+
332
+ Returns:
333
+ str: String representation.
334
+ """
335
+ return "ACTION({}, desc='{}', executions={})".format(self.name, self.description, self.execution_count)
336
+
337
+ def __repr__(self):
338
+ """
339
+ Returns a detailed string representation of this action.
340
+
341
+ Returns:
342
+ str: Detailed string representation.
343
+ """
344
+ return ("ACTION(name='{}', fn={}, "
345
+ "config={}, result_key='{}', "
346
+ "description='{}', execution_count={})".format(
347
+ self.name, self.fn.__name__, self.config,
348
+ self.result_key, self.description, self.execution_count))
349
+
350
+
351
+ ### ACTION CLASS TESTS
352
+
353
+ ActionClassTests = """
354
+ # --- ACTION Class Tests ---
355
+
356
+
357
+ """
thoughtflow/agent.py ADDED
@@ -0,0 +1,66 @@
1
+ """
2
+ DEPRECATED: Use THOUGHT class instead.
3
+
4
+ The Agent class has been replaced by the THOUGHT class which provides
5
+ a more powerful and flexible interface for LLM interactions.
6
+
7
+ Example migration:
8
+ # Old (deprecated):
9
+ agent = Agent(adapter)
10
+ response = agent.call(messages)
11
+
12
+ # New:
13
+ from thoughtflow import THOUGHT, MEMORY, LLM
14
+
15
+ llm = LLM("openai:gpt-4o", key="your-api-key")
16
+ thought = THOUGHT(name="my_thought", llm=llm, prompt="...")
17
+ memory = MEMORY()
18
+ memory = thought(memory)
19
+ result = memory.get_var("my_thought_result")
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import warnings
25
+
26
+
27
+ class Agent:
28
+ """
29
+ DEPRECATED: Use THOUGHT class instead.
30
+
31
+ The Agent class has been deprecated in favor of the THOUGHT class,
32
+ which provides a more powerful and flexible interface for LLM interactions.
33
+ """
34
+
35
+ def __init__(self, *args, **kwargs):
36
+ warnings.warn(
37
+ "Agent is deprecated. Use THOUGHT instead. "
38
+ "See the migration guide in the module docstring.",
39
+ DeprecationWarning,
40
+ stacklevel=2
41
+ )
42
+ raise NotImplementedError(
43
+ "Agent is deprecated. Use THOUGHT instead. "
44
+ "Example: thought = THOUGHT(name='my_thought', llm=llm, prompt='...')"
45
+ )
46
+
47
+
48
+ class TracedAgent:
49
+ """
50
+ DEPRECATED: Use THOUGHT class instead.
51
+
52
+ The TracedAgent class has been deprecated. THOUGHT provides built-in
53
+ execution history tracking and tracing capabilities.
54
+ """
55
+
56
+ def __init__(self, *args, **kwargs):
57
+ warnings.warn(
58
+ "TracedAgent is deprecated. Use THOUGHT instead. "
59
+ "THOUGHT provides built-in execution history tracking.",
60
+ DeprecationWarning,
61
+ stacklevel=2
62
+ )
63
+ raise NotImplementedError(
64
+ "TracedAgent is deprecated. Use THOUGHT instead. "
65
+ "THOUGHT provides built-in execution history tracking via execution_history."
66
+ )
@@ -0,0 +1,34 @@
1
+ """
2
+ Evaluation utilities for ThoughtFlow.
3
+
4
+ Deterministic evaluation is a first-class constraint in ThoughtFlow.
5
+ This module provides utilities for:
6
+ - Record/replay workflows
7
+ - Golden tests (expected response shape/constraints)
8
+ - Prompt/version pinning
9
+ - Stable metrics extraction from traces
10
+
11
+ Example:
12
+ >>> from thoughtflow.eval import Replay, Harness
13
+ >>>
14
+ >>> # Record a session
15
+ >>> session = agent.call(messages, record=True)
16
+ >>> session.save("golden.json")
17
+ >>>
18
+ >>> # Replay and compare
19
+ >>> replay = Replay.load("golden.json")
20
+ >>> results = replay.run(agent)
21
+ >>> assert results.matches_expected()
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ from thoughtflow.eval.replay import Replay
27
+ from thoughtflow.eval.harness import Harness, TestCase, TestResult
28
+
29
+ __all__ = [
30
+ "Replay",
31
+ "Harness",
32
+ "TestCase",
33
+ "TestResult",
34
+ ]
@@ -0,0 +1,200 @@
1
+ """
2
+ Test harness for ThoughtFlow evaluations.
3
+
4
+ Provides structured test cases and evaluation harnesses for
5
+ systematic agent testing.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import TYPE_CHECKING, Any, Callable
12
+
13
+ if TYPE_CHECKING:
14
+ from thoughtflow.agent import Agent
15
+ from thoughtflow.message import MessageList
16
+
17
+
18
+ @dataclass
19
+ class TestCase:
20
+ """A single test case for agent evaluation.
21
+
22
+ Attributes:
23
+ name: Human-readable name for the test.
24
+ messages: Input messages for the test.
25
+ params: Optional call parameters.
26
+ expected: Expected response (exact match or callable validator).
27
+ tags: Tags for filtering/grouping tests.
28
+ metadata: Additional test metadata.
29
+ """
30
+
31
+ name: str
32
+ messages: MessageList
33
+ params: dict[str, Any] | None = None
34
+ expected: str | Callable[[str], bool] | None = None
35
+ tags: list[str] = field(default_factory=list)
36
+ metadata: dict[str, Any] = field(default_factory=dict)
37
+
38
+ def validate(self, response: str) -> bool:
39
+ """Validate a response against expectations.
40
+
41
+ Args:
42
+ response: The agent's response.
43
+
44
+ Returns:
45
+ True if valid, False otherwise.
46
+ """
47
+ if self.expected is None:
48
+ return True
49
+ if callable(self.expected):
50
+ return self.expected(response)
51
+ return response == self.expected
52
+
53
+
54
+ @dataclass
55
+ class TestResult:
56
+ """Result of running a test case.
57
+
58
+ Attributes:
59
+ test_case: The test case that was run.
60
+ passed: Whether the test passed.
61
+ response: The agent's response.
62
+ error: Error message if the test failed.
63
+ duration_ms: How long the test took.
64
+ metadata: Additional result metadata.
65
+ """
66
+
67
+ test_case: TestCase
68
+ passed: bool
69
+ response: str | None = None
70
+ error: str | None = None
71
+ duration_ms: int | None = None
72
+ metadata: dict[str, Any] = field(default_factory=dict)
73
+
74
+
75
+ class Harness:
76
+ """Test harness for running evaluation suites.
77
+
78
+ The Harness provides a structured way to:
79
+ - Define test cases
80
+ - Run them against agents
81
+ - Collect and analyze results
82
+
83
+ Example:
84
+ >>> harness = Harness()
85
+ >>>
86
+ >>> # Add test cases
87
+ >>> harness.add(TestCase(
88
+ ... name="greeting",
89
+ ... messages=[{"role": "user", "content": "Hello!"}],
90
+ ... expected=lambda r: "hello" in r.lower()
91
+ ... ))
92
+ >>>
93
+ >>> # Run all tests
94
+ >>> results = harness.run(agent)
95
+ >>>
96
+ >>> # Check results
97
+ >>> print(f"Passed: {results.passed_count}/{results.total_count}")
98
+ """
99
+
100
+ def __init__(self) -> None:
101
+ """Initialize an empty harness."""
102
+ self.test_cases: list[TestCase] = []
103
+
104
+ def add(self, test_case: TestCase) -> None:
105
+ """Add a test case to the harness.
106
+
107
+ Args:
108
+ test_case: The test case to add.
109
+ """
110
+ self.test_cases.append(test_case)
111
+
112
+ def add_many(self, test_cases: list[TestCase]) -> None:
113
+ """Add multiple test cases.
114
+
115
+ Args:
116
+ test_cases: List of test cases to add.
117
+ """
118
+ self.test_cases.extend(test_cases)
119
+
120
+ def filter_by_tags(self, tags: list[str]) -> list[TestCase]:
121
+ """Filter test cases by tags.
122
+
123
+ Args:
124
+ tags: Tags to filter by.
125
+
126
+ Returns:
127
+ Test cases matching any of the specified tags.
128
+ """
129
+ return [tc for tc in self.test_cases if any(t in tc.tags for t in tags)]
130
+
131
+ def run(
132
+ self,
133
+ agent: Agent,
134
+ filter_tags: list[str] | None = None,
135
+ ) -> HarnessResults:
136
+ """Run all test cases against an agent.
137
+
138
+ Args:
139
+ agent: The agent to test.
140
+ filter_tags: Optional tags to filter which tests to run.
141
+
142
+ Returns:
143
+ HarnessResults with all test results.
144
+
145
+ Raises:
146
+ NotImplementedError: This is a placeholder implementation.
147
+ """
148
+ # TODO: Implement test execution
149
+ raise NotImplementedError(
150
+ "Harness.run() is not yet implemented. "
151
+ "This is a placeholder for the ThoughtFlow alpha release."
152
+ )
153
+
154
+
155
+ @dataclass
156
+ class HarnessResults:
157
+ """Results from running a test harness.
158
+
159
+ Attributes:
160
+ results: Individual test results.
161
+ metadata: Harness-level metadata.
162
+ """
163
+
164
+ results: list[TestResult] = field(default_factory=list)
165
+ metadata: dict[str, Any] = field(default_factory=dict)
166
+
167
+ @property
168
+ def total_count(self) -> int:
169
+ """Total number of tests run."""
170
+ return len(self.results)
171
+
172
+ @property
173
+ def passed_count(self) -> int:
174
+ """Number of tests that passed."""
175
+ return sum(1 for r in self.results if r.passed)
176
+
177
+ @property
178
+ def failed_count(self) -> int:
179
+ """Number of tests that failed."""
180
+ return self.total_count - self.passed_count
181
+
182
+ @property
183
+ def pass_rate(self) -> float:
184
+ """Percentage of tests that passed."""
185
+ if self.total_count == 0:
186
+ return 0.0
187
+ return self.passed_count / self.total_count
188
+
189
+ def summary(self) -> dict[str, Any]:
190
+ """Get a summary of the results.
191
+
192
+ Returns:
193
+ Dict with summary statistics.
194
+ """
195
+ return {
196
+ "total": self.total_count,
197
+ "passed": self.passed_count,
198
+ "failed": self.failed_count,
199
+ "pass_rate": self.pass_rate,
200
+ }