synkt 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synkt-0.1.0/PKG-INFO +13 -0
- synkt-0.1.0/README.md +104 -0
- synkt-0.1.0/pyproject.toml +28 -0
- synkt-0.1.0/setup.cfg +4 -0
- synkt-0.1.0/synkt/__init__.py +28 -0
- synkt-0.1.0/synkt/assertions/__init__.py +15 -0
- synkt-0.1.0/synkt/assertions/coordination.py +87 -0
- synkt-0.1.0/synkt/assertions/system.py +43 -0
- synkt-0.1.0/synkt/assertions/tools.py +55 -0
- synkt-0.1.0/synkt/interceptors/__init__.py +6 -0
- synkt-0.1.0/synkt/interceptors/base.py +13 -0
- synkt-0.1.0/synkt/interceptors/crewai.py +16 -0
- synkt-0.1.0/synkt/interceptors/langgraph.py +70 -0
- synkt-0.1.0/synkt/mocking/__init__.py +6 -0
- synkt-0.1.0/synkt/mocking/mock_agent.py +30 -0
- synkt-0.1.0/synkt/pytest_plugin.py +24 -0
- synkt-0.1.0/synkt/trace/__init__.py +17 -0
- synkt-0.1.0/synkt/trace/models.py +67 -0
- synkt-0.1.0/synkt/trace/pretty.py +87 -0
- synkt-0.1.0/synkt/trace/storage.py +28 -0
- synkt-0.1.0/synkt.egg-info/PKG-INFO +13 -0
- synkt-0.1.0/synkt.egg-info/SOURCES.txt +27 -0
- synkt-0.1.0/synkt.egg-info/dependency_links.txt +1 -0
- synkt-0.1.0/synkt.egg-info/requires.txt +9 -0
- synkt-0.1.0/synkt.egg-info/top_level.txt +1 -0
- synkt-0.1.0/tests/test_assertions.py +123 -0
- synkt-0.1.0/tests/test_langgraph_interceptor.py +52 -0
- synkt-0.1.0/tests/test_trace_models.py +35 -0
- synkt-0.1.0/tests/test_trace_pretty.py +47 -0
synkt-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: synkt
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Testing framework for multi-agent LLM systems
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: pydantic>=2.0
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
10
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
11
|
+
Requires-Dist: black>=24.0; extra == "dev"
|
|
12
|
+
Provides-Extra: langgraph
|
|
13
|
+
Requires-Dist: langgraph>=0.2; extra == "langgraph"
|
synkt-0.1.0/README.md
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# synkt
|
|
2
|
+
|
|
3
|
+
A small testing framework for multi-agent systems.
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
from synkt import assert_handoff, assert_no_loop
|
|
7
|
+
from synkt.interceptors.langgraph import LangGraphInterceptor
|
|
8
|
+
|
|
9
|
+
from examples.customer_service.system import build_customer_service_graph
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_refund_flow():
|
|
13
|
+
graph = build_customer_service_graph()
|
|
14
|
+
test_graph = LangGraphInterceptor(graph)
|
|
15
|
+
|
|
16
|
+
result = test_graph.invoke({"input": "refund for order #12345"})
|
|
17
|
+
|
|
18
|
+
assert_handoff("triage", "refunds")
|
|
19
|
+
assert_no_loop(max_iterations=5)
|
|
20
|
+
assert "12345" in result["resolution"]
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Why synkt?
|
|
24
|
+
|
|
25
|
+
Most eval tools check final output quality. That is useful, but multi-agent bugs usually happen in the middle:
|
|
26
|
+
|
|
27
|
+
- Agent A hands off to the wrong agent
|
|
28
|
+
- A tool gets called with the wrong payload
|
|
29
|
+
- A flow starts looping and burns tokens
|
|
30
|
+
- Parallel steps stop being parallel after a refactor
|
|
31
|
+
|
|
32
|
+
`synkt` is for testing those coordination paths directly.
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install synkt
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
For local development in this repo:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install -e .
|
|
44
|
+
pip install -e ".[dev,langgraph]"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
1. Build your agent graph/system as normal.
|
|
50
|
+
2. Wrap it with an interceptor (for now: LangGraph).
|
|
51
|
+
3. Run it in a test.
|
|
52
|
+
4. Assert on handoffs, loops, tools, and cost.
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from synkt import assert_handoff, assert_no_loop
|
|
56
|
+
from synkt.interceptors.langgraph import LangGraphInterceptor
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_my_flow():
|
|
60
|
+
graph = build_graph()
|
|
61
|
+
tested = LangGraphInterceptor(graph)
|
|
62
|
+
|
|
63
|
+
tested.invoke({"input": "help me with my refund"})
|
|
64
|
+
|
|
65
|
+
assert_handoff("triage", "refunds")
|
|
66
|
+
assert_no_loop(max_iterations=5)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Examples
|
|
70
|
+
|
|
71
|
+
See real working examples:
|
|
72
|
+
- [Customer Service](examples/customer_service/) - Sequential agent flow
|
|
73
|
+
- [Research Crew](examples/research_crew/) - Parallel agent execution
|
|
74
|
+
|
|
75
|
+
## Documentation
|
|
76
|
+
|
|
77
|
+
- Design doc: [docs/DESIGN.md](docs/DESIGN.md)
|
|
78
|
+
|
|
79
|
+
## Features
|
|
80
|
+
|
|
81
|
+
- Test agent handoffs
|
|
82
|
+
- Prevent infinite loops
|
|
83
|
+
- Validate tool calls
|
|
84
|
+
- Mock agents for isolation
|
|
85
|
+
- Works with LangGraph today, with CrewAI and AutoGen adapters planned
|
|
86
|
+
|
|
87
|
+
## Contributing
|
|
88
|
+
|
|
89
|
+
PRs are welcome. A good starting point:
|
|
90
|
+
|
|
91
|
+
1. Add or update tests first.
|
|
92
|
+
2. Keep error messages specific and useful.
|
|
93
|
+
3. Keep APIs typed and easy to read.
|
|
94
|
+
|
|
95
|
+
Run locally before opening a PR:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
pytest -q
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## License
|
|
102
|
+
|
|
103
|
+
MIT
|
|
104
|
+
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "synkt"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Testing framework for multi-agent LLM systems"
|
|
9
|
+
readme = "docs/DESIGN.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"pydantic>=2.0",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[project.optional-dependencies]
|
|
16
|
+
dev = [
|
|
17
|
+
"pytest>=8.0",
|
|
18
|
+
"ruff>=0.6",
|
|
19
|
+
"black>=24.0",
|
|
20
|
+
]
|
|
21
|
+
langgraph = [
|
|
22
|
+
"langgraph>=0.2",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[tool.pytest.ini_options]
|
|
26
|
+
pythonpath = ["."]
|
|
27
|
+
testpaths = ["tests", "examples"]
|
|
28
|
+
|
synkt-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""synkt - Testing framework for multi-agent LLM systems."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from synkt.assertions.coordination import assert_handoff, assert_parallel_execution
|
|
6
|
+
from synkt.assertions.system import assert_cost_under, assert_no_loop
|
|
7
|
+
from synkt.assertions.tools import assert_no_tool_called, assert_tool_called
|
|
8
|
+
from synkt.interceptors.langgraph import LangGraphInterceptor
|
|
9
|
+
from synkt.trace.models import AgentMessage, AgentTrace, ToolCall
|
|
10
|
+
from synkt.trace.pretty import format_trace, print_trace
|
|
11
|
+
from synkt.trace.storage import get_current_trace
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"assert_handoff",
|
|
15
|
+
"assert_parallel_execution",
|
|
16
|
+
"assert_tool_called",
|
|
17
|
+
"assert_no_tool_called",
|
|
18
|
+
"assert_no_loop",
|
|
19
|
+
"assert_cost_under",
|
|
20
|
+
"LangGraphInterceptor",
|
|
21
|
+
"AgentTrace",
|
|
22
|
+
"AgentMessage",
|
|
23
|
+
"ToolCall",
|
|
24
|
+
"get_current_trace",
|
|
25
|
+
"format_trace",
|
|
26
|
+
"print_trace",
|
|
27
|
+
]
|
|
28
|
+
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Assertion helpers for agent coordination tests."""
|
|
2
|
+
|
|
3
|
+
from synkt.assertions.coordination import assert_handoff, assert_parallel_execution
|
|
4
|
+
from synkt.assertions.system import assert_cost_under, assert_no_loop
|
|
5
|
+
from synkt.assertions.tools import assert_no_tool_called, assert_tool_called
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"assert_handoff",
|
|
9
|
+
"assert_parallel_execution",
|
|
10
|
+
"assert_tool_called",
|
|
11
|
+
"assert_no_tool_called",
|
|
12
|
+
"assert_no_loop",
|
|
13
|
+
"assert_cost_under",
|
|
14
|
+
]
|
|
15
|
+
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Optional, Type
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from synkt.trace.storage import get_current_trace
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def assert_handoff(
|
|
12
|
+
from_agent: str,
|
|
13
|
+
to_agent: str,
|
|
14
|
+
message_schema: Optional[Type[BaseModel]] = None,
|
|
15
|
+
**kwargs: str,
|
|
16
|
+
) -> None:
|
|
17
|
+
"""
|
|
18
|
+
Assert that one agent handed off to another.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
from_agent: Name of the agent that initiated handoff
|
|
22
|
+
to_agent: Name of the agent that received handoff
|
|
23
|
+
message_schema: Optional Pydantic model to validate message content
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
AssertionError: If handoff didn't occur or schema doesn't match
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
>>> assert_handoff("triage", "refunds")
|
|
30
|
+
"""
|
|
31
|
+
# Backward compatibility with docs/examples using from_node/to_node.
|
|
32
|
+
from_agent = kwargs.get("from_node", from_agent)
|
|
33
|
+
to_agent = kwargs.get("to_node", to_agent)
|
|
34
|
+
|
|
35
|
+
trace = get_current_trace()
|
|
36
|
+
handoffs = [
|
|
37
|
+
msg
|
|
38
|
+
for msg in trace.messages
|
|
39
|
+
if msg.from_agent == from_agent and msg.to_agent == to_agent
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
if not handoffs:
|
|
43
|
+
raise AssertionError(
|
|
44
|
+
f"No handoff from '{from_agent}' to '{to_agent}'. "
|
|
45
|
+
f"Found {len(trace.messages)} total messages: "
|
|
46
|
+
f"{[(m.from_agent, m.to_agent) for m in trace.messages]}"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if message_schema:
|
|
50
|
+
handoff = handoffs[0]
|
|
51
|
+
try:
|
|
52
|
+
message_schema(**handoff.content)
|
|
53
|
+
except Exception as exc: # pragma: no cover - exact pydantic error varies
|
|
54
|
+
raise AssertionError(
|
|
55
|
+
f"Handoff message doesn't match schema {message_schema.__name__}: {exc}"
|
|
56
|
+
) from exc
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def assert_parallel_execution(agents: list[str], max_time_delta_ms: float = 50.0) -> None:
|
|
60
|
+
"""
|
|
61
|
+
Assert that multiple agents executed in near-parallel based on timestamps.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
agents: Agent names expected to run in parallel
|
|
65
|
+
max_time_delta_ms: Maximum allowed span between earliest and latest event
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
AssertionError: If insufficient events are present or timing is too far apart
|
|
69
|
+
"""
|
|
70
|
+
trace = get_current_trace()
|
|
71
|
+
timestamps: list[datetime] = []
|
|
72
|
+
|
|
73
|
+
for message in trace.messages:
|
|
74
|
+
if message.from_agent in agents or message.to_agent in agents:
|
|
75
|
+
timestamps.append(message.timestamp)
|
|
76
|
+
|
|
77
|
+
if len(timestamps) < len(agents):
|
|
78
|
+
raise AssertionError(
|
|
79
|
+
f"Expected events for {len(agents)} agents, found {len(timestamps)} relevant messages"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
span_ms = (max(timestamps) - min(timestamps)).total_seconds() * 1000
|
|
83
|
+
if span_ms > max_time_delta_ms:
|
|
84
|
+
raise AssertionError(
|
|
85
|
+
f"Expected parallel execution within {max_time_delta_ms}ms, observed {span_ms:.2f}ms"
|
|
86
|
+
)
|
|
87
|
+
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
|
|
5
|
+
from synkt.trace.storage import get_current_trace
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def assert_no_loop(max_iterations: int = 10) -> None:
|
|
9
|
+
"""
|
|
10
|
+
Assert that no agent was called excessively (loop detection).
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
max_iterations: Max times any single agent can appear in trace
|
|
14
|
+
"""
|
|
15
|
+
trace = get_current_trace()
|
|
16
|
+
agent_counts = Counter()
|
|
17
|
+
|
|
18
|
+
for msg in trace.messages:
|
|
19
|
+
agent_counts[msg.from_agent] += 1
|
|
20
|
+
agent_counts[msg.to_agent] += 1
|
|
21
|
+
|
|
22
|
+
for agent, count in agent_counts.items():
|
|
23
|
+
if count > max_iterations:
|
|
24
|
+
raise AssertionError(
|
|
25
|
+
f"Agent '{agent}' appears {count} times (max: {max_iterations}). "
|
|
26
|
+
"Possible infinite loop detected."
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def assert_cost_under(threshold: float) -> None:
|
|
31
|
+
"""
|
|
32
|
+
Assert that total cost is under threshold.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
threshold: Maximum allowed cost in dollars
|
|
36
|
+
"""
|
|
37
|
+
trace = get_current_trace()
|
|
38
|
+
|
|
39
|
+
if trace.total_cost > threshold:
|
|
40
|
+
raise AssertionError(
|
|
41
|
+
f"Test cost ${trace.total_cost:.2f} exceeds threshold ${threshold:.2f}"
|
|
42
|
+
)
|
|
43
|
+
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
from synkt.trace.storage import get_current_trace
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def assert_tool_called(
|
|
9
|
+
tool_name: str,
|
|
10
|
+
args: Optional[dict[str, Any]] = None,
|
|
11
|
+
times: int = 1,
|
|
12
|
+
by_agent: Optional[str] = None,
|
|
13
|
+
) -> None:
|
|
14
|
+
"""
|
|
15
|
+
Assert that a tool was called.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
tool_name: Name of the tool
|
|
19
|
+
args: Optional dict of expected arguments
|
|
20
|
+
times: Expected number of calls (default 1)
|
|
21
|
+
by_agent: Optional agent name filter
|
|
22
|
+
"""
|
|
23
|
+
trace = get_current_trace()
|
|
24
|
+
calls = [tc for tc in trace.tool_calls if tc.tool_name == tool_name]
|
|
25
|
+
|
|
26
|
+
if by_agent:
|
|
27
|
+
calls = [tc for tc in calls if tc.agent == by_agent]
|
|
28
|
+
|
|
29
|
+
if len(calls) != times:
|
|
30
|
+
raise AssertionError(f"Expected {times} calls to '{tool_name}', got {len(calls)}")
|
|
31
|
+
|
|
32
|
+
if args:
|
|
33
|
+
for call in calls:
|
|
34
|
+
for key, expected_value in args.items():
|
|
35
|
+
actual_value = call.args.get(key)
|
|
36
|
+
if actual_value != expected_value:
|
|
37
|
+
raise AssertionError(
|
|
38
|
+
f"Tool '{tool_name}' called with {key}={actual_value}, "
|
|
39
|
+
f"expected {expected_value}"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def assert_no_tool_called(tool_name: str, by_agent: Optional[str] = None) -> None:
|
|
44
|
+
"""Assert that a tool was never called."""
|
|
45
|
+
trace = get_current_trace()
|
|
46
|
+
calls = [tc for tc in trace.tool_calls if tc.tool_name == tool_name]
|
|
47
|
+
|
|
48
|
+
if by_agent:
|
|
49
|
+
calls = [tc for tc in calls if tc.agent == by_agent]
|
|
50
|
+
|
|
51
|
+
if calls:
|
|
52
|
+
raise AssertionError(
|
|
53
|
+
f"Expected no calls to '{tool_name}', but found {len(calls)} calls"
|
|
54
|
+
)
|
|
55
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BaseInterceptor(ABC):
|
|
8
|
+
"""Base interface for framework interceptors."""
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def invoke(self, *args: Any, **kwargs: Any) -> Any:
|
|
12
|
+
"""Invoke the wrapped multi-agent system."""
|
|
13
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from synkt.interceptors.base import BaseInterceptor
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CrewAIInterceptor(BaseInterceptor):
|
|
9
|
+
"""Placeholder interceptor for future CrewAI integration."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, crew: Any):
|
|
12
|
+
self.crew = crew
|
|
13
|
+
|
|
14
|
+
def invoke(self, *args: Any, **kwargs: Any) -> Any:
|
|
15
|
+
raise NotImplementedError("CrewAIInterceptor is not implemented in this MVP")
|
|
16
|
+
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from synkt.interceptors.base import BaseInterceptor
|
|
7
|
+
from synkt.trace.storage import get_current_trace
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LangGraphInterceptor(BaseInterceptor):
|
|
11
|
+
"""
|
|
12
|
+
Wrapper for LangGraph StateGraph that captures node transitions.
|
|
13
|
+
|
|
14
|
+
The wrapped object is expected to expose:
|
|
15
|
+
- graph.nodes: mapping of node_name -> callable
|
|
16
|
+
- graph.compile().invoke(...): invocation API
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, graph: Any):
|
|
20
|
+
self.graph = graph
|
|
21
|
+
self._original_nodes: dict[str, Callable[..., Any]] = {}
|
|
22
|
+
self._previous_node_name = "start"
|
|
23
|
+
self._wrap_nodes()
|
|
24
|
+
|
|
25
|
+
def _wrap_nodes(self) -> None:
|
|
26
|
+
"""Wrap each node runnable/function to capture transitions."""
|
|
27
|
+
for node_name, node_obj in self.graph.nodes.items():
|
|
28
|
+
# LangGraph StateGraph stores a StateNodeSpec with a `.runnable` attribute.
|
|
29
|
+
if hasattr(node_obj, "runnable"):
|
|
30
|
+
original = node_obj.runnable
|
|
31
|
+
self._original_nodes[node_name] = original
|
|
32
|
+
node_obj.runnable = self._create_wrapper(node_name, original)
|
|
33
|
+
else:
|
|
34
|
+
# Fallback for simpler graph doubles used in local tests.
|
|
35
|
+
self._original_nodes[node_name] = node_obj
|
|
36
|
+
self.graph.nodes[node_name] = self._create_wrapper(node_name, node_obj)
|
|
37
|
+
|
|
38
|
+
def _create_wrapper(self, node_name: str, original_func: Callable[..., Any]) -> Callable[..., Any]:
|
|
39
|
+
"""Create a wrapper that logs node handoffs to trace."""
|
|
40
|
+
|
|
41
|
+
def wrapper(state: Any) -> Any:
|
|
42
|
+
trace = get_current_trace()
|
|
43
|
+
prev_node = self._previous_node_name
|
|
44
|
+
|
|
45
|
+
trace.add_message(
|
|
46
|
+
from_agent=prev_node,
|
|
47
|
+
to_agent=node_name,
|
|
48
|
+
content={"state": state},
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
if callable(original_func):
|
|
52
|
+
result = original_func(state)
|
|
53
|
+
elif hasattr(original_func, "invoke"):
|
|
54
|
+
result = original_func.invoke(state)
|
|
55
|
+
else:
|
|
56
|
+
raise TypeError(
|
|
57
|
+
f"Unsupported node runnable type for '{node_name}': {type(original_func)!r}"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
self._previous_node_name = node_name
|
|
61
|
+
|
|
62
|
+
return result
|
|
63
|
+
|
|
64
|
+
return wrapper
|
|
65
|
+
|
|
66
|
+
def invoke(self, *args: Any, **kwargs: Any) -> Any:
|
|
67
|
+
"""Invoke the graph (same API as StateGraph.invoke)."""
|
|
68
|
+
self._previous_node_name = "start"
|
|
69
|
+
return self.graph.compile().invoke(*args, **kwargs)
|
|
70
|
+
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from functools import wraps
|
|
5
|
+
from typing import Any, TypeVar
|
|
6
|
+
|
|
7
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def mock_agent(response: Any = None, side_effect: Exception | None = None) -> Callable[[F], F]:
|
|
11
|
+
"""
|
|
12
|
+
Decorator that replaces an agent function with deterministic behavior.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
response: Static return value to use instead of running the wrapped function
|
|
16
|
+
side_effect: Optional exception to raise when the function is called
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def decorator(func: F) -> F:
|
|
20
|
+
@wraps(func)
|
|
21
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
22
|
+
if side_effect is not None:
|
|
23
|
+
raise side_effect
|
|
24
|
+
if response is not None:
|
|
25
|
+
return response
|
|
26
|
+
return func(*args, **kwargs)
|
|
27
|
+
|
|
28
|
+
return wrapper # type: ignore[return-value]
|
|
29
|
+
|
|
30
|
+
return decorator
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from synkt.trace.models import AgentTrace
|
|
6
|
+
from synkt.trace.storage import clear_current_trace, set_current_trace
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture(autouse=True)
|
|
10
|
+
def synkt_trace() -> AgentTrace:
|
|
11
|
+
"""Automatically create and clean up trace for each test."""
|
|
12
|
+
trace = AgentTrace()
|
|
13
|
+
set_current_trace(trace)
|
|
14
|
+
yield trace
|
|
15
|
+
clear_current_trace()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def pytest_configure(config: pytest.Config) -> None:
|
|
19
|
+
"""Register synkt markers."""
|
|
20
|
+
config.addinivalue_line(
|
|
21
|
+
"markers",
|
|
22
|
+
"synkt: mark test as an agent coordination test",
|
|
23
|
+
)
|
|
24
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Trace models and storage utilities."""
|
|
2
|
+
|
|
3
|
+
from synkt.trace.models import AgentMessage, AgentTrace, ToolCall
|
|
4
|
+
from synkt.trace.pretty import format_trace, print_trace
|
|
5
|
+
from synkt.trace.storage import clear_current_trace, get_current_trace, set_current_trace
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"AgentMessage",
|
|
9
|
+
"AgentTrace",
|
|
10
|
+
"ToolCall",
|
|
11
|
+
"format_trace",
|
|
12
|
+
"print_trace",
|
|
13
|
+
"get_current_trace",
|
|
14
|
+
"set_current_trace",
|
|
15
|
+
"clear_current_trace",
|
|
16
|
+
]
|
|
17
|
+
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AgentMessage(BaseModel):
|
|
10
|
+
"""A message sent from one agent to another."""
|
|
11
|
+
|
|
12
|
+
from_agent: str
|
|
13
|
+
to_agent: str
|
|
14
|
+
content: dict[str, Any]
|
|
15
|
+
timestamp: datetime
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ToolCall(BaseModel):
|
|
19
|
+
"""A tool called by an agent."""
|
|
20
|
+
|
|
21
|
+
agent: str
|
|
22
|
+
tool_name: str
|
|
23
|
+
args: dict[str, Any]
|
|
24
|
+
result: Optional[Any] = None
|
|
25
|
+
timestamp: datetime
|
|
26
|
+
duration_ms: float
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class AgentTrace(BaseModel):
|
|
30
|
+
"""Complete trace of multi-agent execution."""
|
|
31
|
+
|
|
32
|
+
messages: list[AgentMessage] = Field(default_factory=list)
|
|
33
|
+
tool_calls: list[ToolCall] = Field(default_factory=list)
|
|
34
|
+
total_cost: float = 0.0
|
|
35
|
+
duration_ms: float = 0.0
|
|
36
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
37
|
+
|
|
38
|
+
def add_message(self, from_agent: str, to_agent: str, content: dict[str, Any]) -> None:
|
|
39
|
+
"""Add a message to the trace."""
|
|
40
|
+
self.messages.append(
|
|
41
|
+
AgentMessage(
|
|
42
|
+
from_agent=from_agent,
|
|
43
|
+
to_agent=to_agent,
|
|
44
|
+
content=content,
|
|
45
|
+
timestamp=datetime.now(),
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def add_tool_call(
|
|
50
|
+
self,
|
|
51
|
+
agent: str,
|
|
52
|
+
tool_name: str,
|
|
53
|
+
args: dict[str, Any],
|
|
54
|
+
result: Any = None,
|
|
55
|
+
duration_ms: float = 0.0,
|
|
56
|
+
) -> None:
|
|
57
|
+
"""Add a tool call to the trace."""
|
|
58
|
+
self.tool_calls.append(
|
|
59
|
+
ToolCall(
|
|
60
|
+
agent=agent,
|
|
61
|
+
tool_name=tool_name,
|
|
62
|
+
args=args,
|
|
63
|
+
result=result,
|
|
64
|
+
timestamp=datetime.now(),
|
|
65
|
+
duration_ms=duration_ms,
|
|
66
|
+
)
|
|
67
|
+
)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from synkt.trace.models import AgentTrace
|
|
6
|
+
from synkt.trace.storage import get_current_trace
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _shorten(value: Any, max_len: int = 120) -> str:
|
|
10
|
+
text = repr(value)
|
|
11
|
+
if len(text) <= max_len:
|
|
12
|
+
return text
|
|
13
|
+
return text[: max_len - 3] + "..."
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def format_trace(
|
|
17
|
+
trace: AgentTrace | None = None,
|
|
18
|
+
*,
|
|
19
|
+
include_content: bool = False,
|
|
20
|
+
include_tools: bool = True,
|
|
21
|
+
include_summary: bool = True,
|
|
22
|
+
) -> str:
|
|
23
|
+
"""Return a human-readable timeline for an AgentTrace.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
trace: Explicit trace. If omitted, uses the active trace from context.
|
|
27
|
+
include_content: Include message payload snippets.
|
|
28
|
+
include_tools: Include tool-call timeline entries.
|
|
29
|
+
include_summary: Include final summary counts and cost.
|
|
30
|
+
"""
|
|
31
|
+
active_trace = trace or get_current_trace()
|
|
32
|
+
lines: list[str] = []
|
|
33
|
+
|
|
34
|
+
lines.append("Agent Trace Timeline")
|
|
35
|
+
lines.append("--------------------")
|
|
36
|
+
|
|
37
|
+
if not active_trace.messages and (not include_tools or not active_trace.tool_calls):
|
|
38
|
+
lines.append("(no events captured)")
|
|
39
|
+
else:
|
|
40
|
+
for idx, msg in enumerate(active_trace.messages, start=1):
|
|
41
|
+
ts = msg.timestamp.strftime("%H:%M:%S.%f")[:-3]
|
|
42
|
+
base = f"{idx:02d}. [{ts}] {msg.from_agent} -> {msg.to_agent}"
|
|
43
|
+
if include_content:
|
|
44
|
+
base += f" | content={_shorten(msg.content)}"
|
|
45
|
+
lines.append(base)
|
|
46
|
+
|
|
47
|
+
if include_tools and active_trace.tool_calls:
|
|
48
|
+
start_idx = len(active_trace.messages) + 1
|
|
49
|
+
for offset, tool in enumerate(active_trace.tool_calls):
|
|
50
|
+
idx = start_idx + offset
|
|
51
|
+
ts = tool.timestamp.strftime("%H:%M:%S.%f")[:-3]
|
|
52
|
+
result_label = "ok" if tool.result is not None else "no-result"
|
|
53
|
+
lines.append(
|
|
54
|
+
f"{idx:02d}. [{ts}] {tool.agent} => tool:{tool.tool_name} "
|
|
55
|
+
f"args={_shorten(tool.args, max_len=80)} "
|
|
56
|
+
f"duration={tool.duration_ms:.2f}ms {result_label}"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
if include_summary:
|
|
60
|
+
lines.append("")
|
|
61
|
+
lines.append("Summary")
|
|
62
|
+
lines.append("-------")
|
|
63
|
+
lines.append(f"messages: {len(active_trace.messages)}")
|
|
64
|
+
lines.append(f"tool_calls: {len(active_trace.tool_calls)}")
|
|
65
|
+
lines.append(f"duration_ms: {active_trace.duration_ms:.2f}")
|
|
66
|
+
lines.append(f"total_cost: ${active_trace.total_cost:.4f}")
|
|
67
|
+
|
|
68
|
+
return "\n".join(lines)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def print_trace(
|
|
72
|
+
trace: AgentTrace | None = None,
|
|
73
|
+
*,
|
|
74
|
+
include_content: bool = False,
|
|
75
|
+
include_tools: bool = True,
|
|
76
|
+
include_summary: bool = True,
|
|
77
|
+
) -> None:
|
|
78
|
+
"""Print a formatted trace timeline to stdout."""
|
|
79
|
+
print(
|
|
80
|
+
format_trace(
|
|
81
|
+
trace,
|
|
82
|
+
include_content=include_content,
|
|
83
|
+
include_tools=include_tools,
|
|
84
|
+
include_summary=include_summary,
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextvars import ContextVar
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from synkt.trace.models import AgentTrace
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
_current_trace: ContextVar[Optional[AgentTrace]] = ContextVar("_current_trace", default=None)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_current_trace() -> AgentTrace:
|
|
13
|
+
"""Get the trace for the current test."""
|
|
14
|
+
trace = _current_trace.get()
|
|
15
|
+
if trace is None:
|
|
16
|
+
raise RuntimeError("No active trace. Are you inside a test?")
|
|
17
|
+
return trace
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def set_current_trace(trace: AgentTrace) -> None:
|
|
21
|
+
"""Set the trace for the current test."""
|
|
22
|
+
_current_trace.set(trace)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def clear_current_trace() -> None:
|
|
26
|
+
"""Clear the trace (called after test)."""
|
|
27
|
+
_current_trace.set(None)
|
|
28
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: synkt
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Testing framework for multi-agent LLM systems
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: pydantic>=2.0
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
10
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
11
|
+
Requires-Dist: black>=24.0; extra == "dev"
|
|
12
|
+
Provides-Extra: langgraph
|
|
13
|
+
Requires-Dist: langgraph>=0.2; extra == "langgraph"
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
synkt/__init__.py
|
|
4
|
+
synkt/pytest_plugin.py
|
|
5
|
+
synkt.egg-info/PKG-INFO
|
|
6
|
+
synkt.egg-info/SOURCES.txt
|
|
7
|
+
synkt.egg-info/dependency_links.txt
|
|
8
|
+
synkt.egg-info/requires.txt
|
|
9
|
+
synkt.egg-info/top_level.txt
|
|
10
|
+
synkt/assertions/__init__.py
|
|
11
|
+
synkt/assertions/coordination.py
|
|
12
|
+
synkt/assertions/system.py
|
|
13
|
+
synkt/assertions/tools.py
|
|
14
|
+
synkt/interceptors/__init__.py
|
|
15
|
+
synkt/interceptors/base.py
|
|
16
|
+
synkt/interceptors/crewai.py
|
|
17
|
+
synkt/interceptors/langgraph.py
|
|
18
|
+
synkt/mocking/__init__.py
|
|
19
|
+
synkt/mocking/mock_agent.py
|
|
20
|
+
synkt/trace/__init__.py
|
|
21
|
+
synkt/trace/models.py
|
|
22
|
+
synkt/trace/pretty.py
|
|
23
|
+
synkt/trace/storage.py
|
|
24
|
+
tests/test_assertions.py
|
|
25
|
+
tests/test_langgraph_interceptor.py
|
|
26
|
+
tests/test_trace_models.py
|
|
27
|
+
tests/test_trace_pretty.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
synkt
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from synkt.assertions.coordination import assert_handoff, assert_parallel_execution
|
|
7
|
+
from synkt.assertions.system import assert_cost_under, assert_no_loop
|
|
8
|
+
from synkt.assertions.tools import assert_no_tool_called, assert_tool_called
|
|
9
|
+
from synkt.trace.models import AgentTrace
|
|
10
|
+
from synkt.trace.storage import set_current_trace
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RefundRequest(BaseModel):
|
|
14
|
+
order_id: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_assert_handoff_success() -> None:
|
|
18
|
+
trace = AgentTrace()
|
|
19
|
+
trace.add_message("triage", "refunds", {"order_id": "12345"})
|
|
20
|
+
set_current_trace(trace)
|
|
21
|
+
|
|
22
|
+
assert_handoff("triage", "refunds")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_assert_handoff_missing() -> None:
|
|
26
|
+
trace = AgentTrace()
|
|
27
|
+
set_current_trace(trace)
|
|
28
|
+
|
|
29
|
+
with pytest.raises(AssertionError, match="No handoff"):
|
|
30
|
+
assert_handoff("triage", "refunds")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_assert_handoff_schema_validation() -> None:
|
|
34
|
+
trace = AgentTrace()
|
|
35
|
+
trace.add_message("triage", "refunds", {"order_id": "12345"})
|
|
36
|
+
set_current_trace(trace)
|
|
37
|
+
|
|
38
|
+
assert_handoff("triage", "refunds", message_schema=RefundRequest)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_assert_handoff_schema_validation_failure() -> None:
|
|
42
|
+
trace = AgentTrace()
|
|
43
|
+
trace.add_message("triage", "refunds", {"wrong": "field"})
|
|
44
|
+
set_current_trace(trace)
|
|
45
|
+
|
|
46
|
+
with pytest.raises(AssertionError, match="doesn't match schema"):
|
|
47
|
+
assert_handoff("triage", "refunds", message_schema=RefundRequest)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_assert_tool_called_success() -> None:
|
|
51
|
+
trace = AgentTrace()
|
|
52
|
+
trace.add_tool_call("refunds", "process_refund", {"order_id": "12345"})
|
|
53
|
+
set_current_trace(trace)
|
|
54
|
+
|
|
55
|
+
assert_tool_called("process_refund", args={"order_id": "12345"})
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_assert_tool_called_wrong_count() -> None:
|
|
59
|
+
trace = AgentTrace()
|
|
60
|
+
set_current_trace(trace)
|
|
61
|
+
|
|
62
|
+
with pytest.raises(AssertionError, match="Expected 1 calls"):
|
|
63
|
+
assert_tool_called("process_refund")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_assert_no_tool_called_success() -> None:
|
|
67
|
+
trace = AgentTrace()
|
|
68
|
+
set_current_trace(trace)
|
|
69
|
+
|
|
70
|
+
assert_no_tool_called("send_email")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_assert_no_tool_called_failure() -> None:
|
|
74
|
+
trace = AgentTrace()
|
|
75
|
+
trace.add_tool_call("notifications", "send_email", {"to": "a@example.com"})
|
|
76
|
+
set_current_trace(trace)
|
|
77
|
+
|
|
78
|
+
with pytest.raises(AssertionError, match="Expected no calls"):
|
|
79
|
+
assert_no_tool_called("send_email")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_assert_no_loop_success() -> None:
|
|
83
|
+
trace = AgentTrace()
|
|
84
|
+
trace.add_message("start", "triage", {})
|
|
85
|
+
trace.add_message("triage", "refunds", {})
|
|
86
|
+
set_current_trace(trace)
|
|
87
|
+
|
|
88
|
+
assert_no_loop(max_iterations=3)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_assert_no_loop_failure() -> None:
|
|
92
|
+
trace = AgentTrace()
|
|
93
|
+
for _ in range(6):
|
|
94
|
+
trace.add_message("triage", "triage", {})
|
|
95
|
+
set_current_trace(trace)
|
|
96
|
+
|
|
97
|
+
with pytest.raises(AssertionError, match="Possible infinite loop"):
|
|
98
|
+
assert_no_loop(max_iterations=10)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_assert_cost_under_success() -> None:
|
|
102
|
+
trace = AgentTrace(total_cost=0.25)
|
|
103
|
+
set_current_trace(trace)
|
|
104
|
+
|
|
105
|
+
assert_cost_under(1.00)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_assert_cost_under_failure() -> None:
|
|
109
|
+
trace = AgentTrace(total_cost=1.50)
|
|
110
|
+
set_current_trace(trace)
|
|
111
|
+
|
|
112
|
+
with pytest.raises(AssertionError, match="exceeds threshold"):
|
|
113
|
+
assert_cost_under(1.00)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def test_assert_parallel_execution_success() -> None:
|
|
117
|
+
trace = AgentTrace()
|
|
118
|
+
trace.add_message("router", "agent_a", {})
|
|
119
|
+
trace.add_message("router", "agent_b", {})
|
|
120
|
+
set_current_trace(trace)
|
|
121
|
+
|
|
122
|
+
assert_parallel_execution(["agent_a", "agent_b"], max_time_delta_ms=1000)
|
|
123
|
+
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from synkt.interceptors.langgraph import LangGraphInterceptor
|
|
6
|
+
from synkt.trace.storage import get_current_trace
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FakeCompiledGraph:
|
|
10
|
+
def __init__(self, graph: "FakeGraph"):
|
|
11
|
+
self.graph = graph
|
|
12
|
+
|
|
13
|
+
def invoke(self, state: dict[str, Any]) -> dict[str, Any]:
|
|
14
|
+
current_state = state
|
|
15
|
+
for node_name in ["triage", "refunds"]:
|
|
16
|
+
current_state = self.graph.nodes[node_name](current_state)
|
|
17
|
+
return current_state
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FakeGraph:
|
|
21
|
+
def __init__(self) -> None:
|
|
22
|
+
self.nodes = {
|
|
23
|
+
"triage": self._triage,
|
|
24
|
+
"refunds": self._refunds,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
def _triage(self, state: dict[str, Any]) -> dict[str, Any]:
|
|
28
|
+
state["order_id"] = "12345"
|
|
29
|
+
return state
|
|
30
|
+
|
|
31
|
+
def _refunds(self, state: dict[str, Any]) -> dict[str, Any]:
|
|
32
|
+
state["resolution"] = f"Refund processed for order {state['order_id']}"
|
|
33
|
+
return state
|
|
34
|
+
|
|
35
|
+
def compile(self) -> FakeCompiledGraph:
|
|
36
|
+
return FakeCompiledGraph(self)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_langgraph_interceptor_records_transitions() -> None:
|
|
40
|
+
graph = FakeGraph()
|
|
41
|
+
intercepted = LangGraphInterceptor(graph)
|
|
42
|
+
|
|
43
|
+
result = intercepted.invoke({"input": "refund please"})
|
|
44
|
+
|
|
45
|
+
assert "resolution" in result
|
|
46
|
+
trace = get_current_trace()
|
|
47
|
+
assert len(trace.messages) == 2
|
|
48
|
+
assert trace.messages[0].from_agent == "start"
|
|
49
|
+
assert trace.messages[0].to_agent == "triage"
|
|
50
|
+
assert trace.messages[1].from_agent == "triage"
|
|
51
|
+
assert trace.messages[1].to_agent == "refunds"
|
|
52
|
+
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from synkt.trace.models import AgentTrace
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_add_message_records_message() -> None:
|
|
7
|
+
trace = AgentTrace()
|
|
8
|
+
|
|
9
|
+
trace.add_message("triage", "refunds", {"order_id": "123"})
|
|
10
|
+
|
|
11
|
+
assert len(trace.messages) == 1
|
|
12
|
+
assert trace.messages[0].from_agent == "triage"
|
|
13
|
+
assert trace.messages[0].to_agent == "refunds"
|
|
14
|
+
assert trace.messages[0].content["order_id"] == "123"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_add_tool_call_records_call() -> None:
|
|
18
|
+
trace = AgentTrace()
|
|
19
|
+
|
|
20
|
+
trace.add_tool_call(
|
|
21
|
+
agent="refunds",
|
|
22
|
+
tool_name="process_refund",
|
|
23
|
+
args={"order_id": "123"},
|
|
24
|
+
result={"ok": True},
|
|
25
|
+
duration_ms=12.5,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
assert len(trace.tool_calls) == 1
|
|
29
|
+
call = trace.tool_calls[0]
|
|
30
|
+
assert call.agent == "refunds"
|
|
31
|
+
assert call.tool_name == "process_refund"
|
|
32
|
+
assert call.args["order_id"] == "123"
|
|
33
|
+
assert call.result == {"ok": True}
|
|
34
|
+
assert call.duration_ms == 12.5
|
|
35
|
+
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from synkt.trace.models import AgentTrace
|
|
4
|
+
from synkt.trace.pretty import format_trace, print_trace
|
|
5
|
+
from synkt.trace.storage import set_current_trace
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_format_trace_includes_messages_and_summary() -> None:
|
|
9
|
+
trace = AgentTrace(total_cost=0.1234, duration_ms=42.0)
|
|
10
|
+
trace.add_message("planner", "research", {"topic": "pricing"})
|
|
11
|
+
trace.add_tool_call(
|
|
12
|
+
agent="research",
|
|
13
|
+
tool_name="web_search",
|
|
14
|
+
args={"query": "pricing strategy"},
|
|
15
|
+
result={"hits": 3},
|
|
16
|
+
duration_ms=12.5,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
output = format_trace(trace, include_content=True)
|
|
20
|
+
|
|
21
|
+
assert "Agent Trace Timeline" in output
|
|
22
|
+
assert "planner -> research" in output
|
|
23
|
+
assert "tool:web_search" in output
|
|
24
|
+
assert "messages: 1" in output
|
|
25
|
+
assert "tool_calls: 1" in output
|
|
26
|
+
assert "total_cost: $0.1234" in output
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_format_trace_uses_current_trace_when_not_passed() -> None:
|
|
30
|
+
trace = AgentTrace()
|
|
31
|
+
trace.add_message("start", "triage", {"input": "refund"})
|
|
32
|
+
set_current_trace(trace)
|
|
33
|
+
|
|
34
|
+
output = format_trace()
|
|
35
|
+
|
|
36
|
+
assert "start -> triage" in output
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_print_trace_writes_timeline(capsys) -> None:
|
|
40
|
+
trace = AgentTrace()
|
|
41
|
+
trace.add_message("a", "b", {})
|
|
42
|
+
|
|
43
|
+
print_trace(trace)
|
|
44
|
+
|
|
45
|
+
captured = capsys.readouterr()
|
|
46
|
+
assert "a -> b" in captured.out
|
|
47
|
+
|