synkt 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
synkt-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 2.4
2
+ Name: synkt
3
+ Version: 0.1.0
4
+ Summary: Testing framework for multi-agent LLM systems
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: pydantic>=2.0
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=8.0; extra == "dev"
10
+ Requires-Dist: ruff>=0.6; extra == "dev"
11
+ Requires-Dist: black>=24.0; extra == "dev"
12
+ Provides-Extra: langgraph
13
+ Requires-Dist: langgraph>=0.2; extra == "langgraph"
synkt-0.1.0/README.md ADDED
@@ -0,0 +1,104 @@
1
+ # synkt
2
+
3
+ A small testing framework for multi-agent systems.
4
+
5
+ ```python
6
+ from synkt import assert_handoff, assert_no_loop
7
+ from synkt.interceptors.langgraph import LangGraphInterceptor
8
+
9
+ from examples.customer_service.system import build_customer_service_graph
10
+
11
+
12
+ def test_refund_flow():
13
+ graph = build_customer_service_graph()
14
+ test_graph = LangGraphInterceptor(graph)
15
+
16
+ result = test_graph.invoke({"input": "refund for order #12345"})
17
+
18
+ assert_handoff("triage", "refunds")
19
+ assert_no_loop(max_iterations=5)
20
+ assert "12345" in result["resolution"]
21
+ ```
22
+
23
+ ## Why synkt?
24
+
25
+ Most eval tools check final output quality. That is useful, but multi-agent bugs usually happen in the middle:
26
+
27
+ - Agent A hands off to the wrong agent
28
+ - A tool gets called with the wrong payload
29
+ - A flow starts looping and burns tokens
30
+ - Parallel steps stop being parallel after a refactor
31
+
32
+ `synkt` is for testing those coordination paths directly.
33
+
34
+ ## Installation
35
+
36
+ ```bash
37
+ pip install synkt
38
+ ```
39
+
40
+ For local development in this repo:
41
+
42
+ ```bash
43
+ pip install -e .
44
+ pip install -e ".[dev,langgraph]"
45
+ ```
46
+
47
+ ## Quick Start
48
+
49
+ 1. Build your agent graph/system as normal.
50
+ 2. Wrap it with an interceptor (for now: LangGraph).
51
+ 3. Run it in a test.
52
+ 4. Assert on handoffs, loops, tools, and cost.
53
+
54
+ ```python
55
+ from synkt import assert_handoff, assert_no_loop
56
+ from synkt.interceptors.langgraph import LangGraphInterceptor
57
+
58
+
59
+ def test_my_flow():
60
+ graph = build_graph()
61
+ tested = LangGraphInterceptor(graph)
62
+
63
+ tested.invoke({"input": "help me with my refund"})
64
+
65
+ assert_handoff("triage", "refunds")
66
+ assert_no_loop(max_iterations=5)
67
+ ```
68
+
69
+ ## Examples
70
+
71
+ See real working examples:
72
+ - [Customer Service](examples/customer_service/) - Sequential agent flow
73
+ - [Research Crew](examples/research_crew/) - Parallel agent execution
74
+
75
+ ## Documentation
76
+
77
+ - Design doc: [docs/DESIGN.md](docs/DESIGN.md)
78
+
79
+ ## Features
80
+
81
+ - Test agent handoffs
82
+ - Prevent infinite loops
83
+ - Validate tool calls
84
+ - Mock agents for isolation
85
+ - Works with LangGraph today, with CrewAI and AutoGen adapters planned
86
+
87
+ ## Contributing
88
+
89
+ PRs are welcome. A good starting point:
90
+
91
+ 1. Add or update tests first.
92
+ 2. Keep error messages specific and useful.
93
+ 3. Keep APIs typed and easy to read.
94
+
95
+ Run locally before opening a PR:
96
+
97
+ ```bash
98
+ pytest -q
99
+ ```
100
+
101
+ ## License
102
+
103
+ MIT
104
+
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "synkt"
7
+ version = "0.1.0"
8
+ description = "Testing framework for multi-agent LLM systems"
9
+ readme = "docs/DESIGN.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [
12
+ "pydantic>=2.0",
13
+ ]
14
+
15
+ [project.optional-dependencies]
16
+ dev = [
17
+ "pytest>=8.0",
18
+ "ruff>=0.6",
19
+ "black>=24.0",
20
+ ]
21
+ langgraph = [
22
+ "langgraph>=0.2",
23
+ ]
24
+
25
+ [tool.pytest.ini_options]
26
+ pythonpath = ["."]
27
+ testpaths = ["tests", "examples"]
28
+
synkt-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,28 @@
1
+ """synkt - Testing framework for multi-agent LLM systems."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from synkt.assertions.coordination import assert_handoff, assert_parallel_execution
6
+ from synkt.assertions.system import assert_cost_under, assert_no_loop
7
+ from synkt.assertions.tools import assert_no_tool_called, assert_tool_called
8
+ from synkt.interceptors.langgraph import LangGraphInterceptor
9
+ from synkt.trace.models import AgentMessage, AgentTrace, ToolCall
10
+ from synkt.trace.pretty import format_trace, print_trace
11
+ from synkt.trace.storage import get_current_trace
12
+
13
+ __all__ = [
14
+ "assert_handoff",
15
+ "assert_parallel_execution",
16
+ "assert_tool_called",
17
+ "assert_no_tool_called",
18
+ "assert_no_loop",
19
+ "assert_cost_under",
20
+ "LangGraphInterceptor",
21
+ "AgentTrace",
22
+ "AgentMessage",
23
+ "ToolCall",
24
+ "get_current_trace",
25
+ "format_trace",
26
+ "print_trace",
27
+ ]
28
+
@@ -0,0 +1,15 @@
1
+ """Assertion helpers for agent coordination tests."""
2
+
3
+ from synkt.assertions.coordination import assert_handoff, assert_parallel_execution
4
+ from synkt.assertions.system import assert_cost_under, assert_no_loop
5
+ from synkt.assertions.tools import assert_no_tool_called, assert_tool_called
6
+
7
+ __all__ = [
8
+ "assert_handoff",
9
+ "assert_parallel_execution",
10
+ "assert_tool_called",
11
+ "assert_no_tool_called",
12
+ "assert_no_loop",
13
+ "assert_cost_under",
14
+ ]
15
+
@@ -0,0 +1,87 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Optional, Type
5
+
6
+ from pydantic import BaseModel
7
+
8
+ from synkt.trace.storage import get_current_trace
9
+
10
+
11
+ def assert_handoff(
12
+ from_agent: str,
13
+ to_agent: str,
14
+ message_schema: Optional[Type[BaseModel]] = None,
15
+ **kwargs: str,
16
+ ) -> None:
17
+ """
18
+ Assert that one agent handed off to another.
19
+
20
+ Args:
21
+ from_agent: Name of the agent that initiated handoff
22
+ to_agent: Name of the agent that received handoff
23
+ message_schema: Optional Pydantic model to validate message content
24
+
25
+ Raises:
26
+ AssertionError: If handoff didn't occur or schema doesn't match
27
+
28
+ Examples:
29
+ >>> assert_handoff("triage", "refunds")
30
+ """
31
+ # Backward compatibility with docs/examples using from_node/to_node.
32
+ from_agent = kwargs.get("from_node", from_agent)
33
+ to_agent = kwargs.get("to_node", to_agent)
34
+
35
+ trace = get_current_trace()
36
+ handoffs = [
37
+ msg
38
+ for msg in trace.messages
39
+ if msg.from_agent == from_agent and msg.to_agent == to_agent
40
+ ]
41
+
42
+ if not handoffs:
43
+ raise AssertionError(
44
+ f"No handoff from '{from_agent}' to '{to_agent}'. "
45
+ f"Found {len(trace.messages)} total messages: "
46
+ f"{[(m.from_agent, m.to_agent) for m in trace.messages]}"
47
+ )
48
+
49
+ if message_schema:
50
+ handoff = handoffs[0]
51
+ try:
52
+ message_schema(**handoff.content)
53
+ except Exception as exc: # pragma: no cover - exact pydantic error varies
54
+ raise AssertionError(
55
+ f"Handoff message doesn't match schema {message_schema.__name__}: {exc}"
56
+ ) from exc
57
+
58
+
59
+ def assert_parallel_execution(agents: list[str], max_time_delta_ms: float = 50.0) -> None:
60
+ """
61
+ Assert that multiple agents executed in near-parallel based on timestamps.
62
+
63
+ Args:
64
+ agents: Agent names expected to run in parallel
65
+ max_time_delta_ms: Maximum allowed span between earliest and latest event
66
+
67
+ Raises:
68
+ AssertionError: If insufficient events are present or timing is too far apart
69
+ """
70
+ trace = get_current_trace()
71
+ timestamps: list[datetime] = []
72
+
73
+ for message in trace.messages:
74
+ if message.from_agent in agents or message.to_agent in agents:
75
+ timestamps.append(message.timestamp)
76
+
77
+ if len(timestamps) < len(agents):
78
+ raise AssertionError(
79
+ f"Expected events for {len(agents)} agents, found {len(timestamps)} relevant messages"
80
+ )
81
+
82
+ span_ms = (max(timestamps) - min(timestamps)).total_seconds() * 1000
83
+ if span_ms > max_time_delta_ms:
84
+ raise AssertionError(
85
+ f"Expected parallel execution within {max_time_delta_ms}ms, observed {span_ms:.2f}ms"
86
+ )
87
+
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter
4
+
5
+ from synkt.trace.storage import get_current_trace
6
+
7
+
8
+ def assert_no_loop(max_iterations: int = 10) -> None:
9
+ """
10
+ Assert that no agent was called excessively (loop detection).
11
+
12
+ Args:
13
+ max_iterations: Max times any single agent can appear in trace
14
+ """
15
+ trace = get_current_trace()
16
+ agent_counts = Counter()
17
+
18
+ for msg in trace.messages:
19
+ agent_counts[msg.from_agent] += 1
20
+ agent_counts[msg.to_agent] += 1
21
+
22
+ for agent, count in agent_counts.items():
23
+ if count > max_iterations:
24
+ raise AssertionError(
25
+ f"Agent '{agent}' appears {count} times (max: {max_iterations}). "
26
+ "Possible infinite loop detected."
27
+ )
28
+
29
+
30
+ def assert_cost_under(threshold: float) -> None:
31
+ """
32
+ Assert that total cost is under threshold.
33
+
34
+ Args:
35
+ threshold: Maximum allowed cost in dollars
36
+ """
37
+ trace = get_current_trace()
38
+
39
+ if trace.total_cost > threshold:
40
+ raise AssertionError(
41
+ f"Test cost ${trace.total_cost:.2f} exceeds threshold ${threshold:.2f}"
42
+ )
43
+
@@ -0,0 +1,55 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Optional
4
+
5
+ from synkt.trace.storage import get_current_trace
6
+
7
+
8
+ def assert_tool_called(
9
+ tool_name: str,
10
+ args: Optional[dict[str, Any]] = None,
11
+ times: int = 1,
12
+ by_agent: Optional[str] = None,
13
+ ) -> None:
14
+ """
15
+ Assert that a tool was called.
16
+
17
+ Args:
18
+ tool_name: Name of the tool
19
+ args: Optional dict of expected arguments
20
+ times: Expected number of calls (default 1)
21
+ by_agent: Optional agent name filter
22
+ """
23
+ trace = get_current_trace()
24
+ calls = [tc for tc in trace.tool_calls if tc.tool_name == tool_name]
25
+
26
+ if by_agent:
27
+ calls = [tc for tc in calls if tc.agent == by_agent]
28
+
29
+ if len(calls) != times:
30
+ raise AssertionError(f"Expected {times} calls to '{tool_name}', got {len(calls)}")
31
+
32
+ if args:
33
+ for call in calls:
34
+ for key, expected_value in args.items():
35
+ actual_value = call.args.get(key)
36
+ if actual_value != expected_value:
37
+ raise AssertionError(
38
+ f"Tool '{tool_name}' called with {key}={actual_value}, "
39
+ f"expected {expected_value}"
40
+ )
41
+
42
+
43
+ def assert_no_tool_called(tool_name: str, by_agent: Optional[str] = None) -> None:
44
+ """Assert that a tool was never called."""
45
+ trace = get_current_trace()
46
+ calls = [tc for tc in trace.tool_calls if tc.tool_name == tool_name]
47
+
48
+ if by_agent:
49
+ calls = [tc for tc in calls if tc.agent == by_agent]
50
+
51
+ if calls:
52
+ raise AssertionError(
53
+ f"Expected no calls to '{tool_name}', but found {len(calls)} calls"
54
+ )
55
+
@@ -0,0 +1,6 @@
1
+ """Framework interceptors for trace collection."""
2
+
3
+ from synkt.interceptors.langgraph import LangGraphInterceptor
4
+
5
+ __all__ = ["LangGraphInterceptor"]
6
+
@@ -0,0 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any
5
+
6
+
7
+ class BaseInterceptor(ABC):
8
+ """Base interface for framework interceptors."""
9
+
10
+ @abstractmethod
11
+ def invoke(self, *args: Any, **kwargs: Any) -> Any:
12
+ """Invoke the wrapped multi-agent system."""
13
+ raise NotImplementedError
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from synkt.interceptors.base import BaseInterceptor
6
+
7
+
8
+ class CrewAIInterceptor(BaseInterceptor):
9
+ """Placeholder interceptor for future CrewAI integration."""
10
+
11
+ def __init__(self, crew: Any):
12
+ self.crew = crew
13
+
14
+ def invoke(self, *args: Any, **kwargs: Any) -> Any:
15
+ raise NotImplementedError("CrewAIInterceptor is not implemented in this MVP")
16
+
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable
4
+ from typing import Any
5
+
6
+ from synkt.interceptors.base import BaseInterceptor
7
+ from synkt.trace.storage import get_current_trace
8
+
9
+
10
+ class LangGraphInterceptor(BaseInterceptor):
11
+ """
12
+ Wrapper for LangGraph StateGraph that captures node transitions.
13
+
14
+ The wrapped object is expected to expose:
15
+ - graph.nodes: mapping of node_name -> callable
16
+ - graph.compile().invoke(...): invocation API
17
+ """
18
+
19
+ def __init__(self, graph: Any):
20
+ self.graph = graph
21
+ self._original_nodes: dict[str, Callable[..., Any]] = {}
22
+ self._previous_node_name = "start"
23
+ self._wrap_nodes()
24
+
25
+ def _wrap_nodes(self) -> None:
26
+ """Wrap each node runnable/function to capture transitions."""
27
+ for node_name, node_obj in self.graph.nodes.items():
28
+ # LangGraph StateGraph stores a StateNodeSpec with a `.runnable` attribute.
29
+ if hasattr(node_obj, "runnable"):
30
+ original = node_obj.runnable
31
+ self._original_nodes[node_name] = original
32
+ node_obj.runnable = self._create_wrapper(node_name, original)
33
+ else:
34
+ # Fallback for simpler graph doubles used in local tests.
35
+ self._original_nodes[node_name] = node_obj
36
+ self.graph.nodes[node_name] = self._create_wrapper(node_name, node_obj)
37
+
38
+ def _create_wrapper(self, node_name: str, original_func: Callable[..., Any]) -> Callable[..., Any]:
39
+ """Create a wrapper that logs node handoffs to trace."""
40
+
41
+ def wrapper(state: Any) -> Any:
42
+ trace = get_current_trace()
43
+ prev_node = self._previous_node_name
44
+
45
+ trace.add_message(
46
+ from_agent=prev_node,
47
+ to_agent=node_name,
48
+ content={"state": state},
49
+ )
50
+
51
+ if callable(original_func):
52
+ result = original_func(state)
53
+ elif hasattr(original_func, "invoke"):
54
+ result = original_func.invoke(state)
55
+ else:
56
+ raise TypeError(
57
+ f"Unsupported node runnable type for '{node_name}': {type(original_func)!r}"
58
+ )
59
+
60
+ self._previous_node_name = node_name
61
+
62
+ return result
63
+
64
+ return wrapper
65
+
66
+ def invoke(self, *args: Any, **kwargs: Any) -> Any:
67
+ """Invoke the graph (same API as StateGraph.invoke)."""
68
+ self._previous_node_name = "start"
69
+ return self.graph.compile().invoke(*args, **kwargs)
70
+
@@ -0,0 +1,6 @@
1
+ """Mocking utilities for agent stubs."""
2
+
3
+ from synkt.mocking.mock_agent import mock_agent
4
+
5
+ __all__ = ["mock_agent"]
6
+
@@ -0,0 +1,30 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable
4
+ from functools import wraps
5
+ from typing import Any, TypeVar
6
+
7
+ F = TypeVar("F", bound=Callable[..., Any])
8
+
9
+
10
+ def mock_agent(response: Any = None, side_effect: Exception | None = None) -> Callable[[F], F]:
11
+ """
12
+ Decorator that replaces an agent function with deterministic behavior.
13
+
14
+ Args:
15
+ response: Static return value to use instead of running the wrapped function
16
+ side_effect: Optional exception to raise when the function is called
17
+ """
18
+
19
+ def decorator(func: F) -> F:
20
+ @wraps(func)
21
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
22
+ if side_effect is not None:
23
+ raise side_effect
24
+ if response is not None:
25
+ return response
26
+ return func(*args, **kwargs)
27
+
28
+ return wrapper # type: ignore[return-value]
29
+
30
+ return decorator
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+
5
+ from synkt.trace.models import AgentTrace
6
+ from synkt.trace.storage import clear_current_trace, set_current_trace
7
+
8
+
9
+ @pytest.fixture(autouse=True)
10
+ def synkt_trace() -> AgentTrace:
11
+ """Automatically create and clean up trace for each test."""
12
+ trace = AgentTrace()
13
+ set_current_trace(trace)
14
+ yield trace
15
+ clear_current_trace()
16
+
17
+
18
+ def pytest_configure(config: pytest.Config) -> None:
19
+ """Register synkt markers."""
20
+ config.addinivalue_line(
21
+ "markers",
22
+ "synkt: mark test as an agent coordination test",
23
+ )
24
+
@@ -0,0 +1,17 @@
1
+ """Trace models and storage utilities."""
2
+
3
+ from synkt.trace.models import AgentMessage, AgentTrace, ToolCall
4
+ from synkt.trace.pretty import format_trace, print_trace
5
+ from synkt.trace.storage import clear_current_trace, get_current_trace, set_current_trace
6
+
7
+ __all__ = [
8
+ "AgentMessage",
9
+ "AgentTrace",
10
+ "ToolCall",
11
+ "format_trace",
12
+ "print_trace",
13
+ "get_current_trace",
14
+ "set_current_trace",
15
+ "clear_current_trace",
16
+ ]
17
+
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Any, Optional
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class AgentMessage(BaseModel):
10
+ """A message sent from one agent to another."""
11
+
12
+ from_agent: str
13
+ to_agent: str
14
+ content: dict[str, Any]
15
+ timestamp: datetime
16
+
17
+
18
+ class ToolCall(BaseModel):
19
+ """A tool called by an agent."""
20
+
21
+ agent: str
22
+ tool_name: str
23
+ args: dict[str, Any]
24
+ result: Optional[Any] = None
25
+ timestamp: datetime
26
+ duration_ms: float
27
+
28
+
29
+ class AgentTrace(BaseModel):
30
+ """Complete trace of multi-agent execution."""
31
+
32
+ messages: list[AgentMessage] = Field(default_factory=list)
33
+ tool_calls: list[ToolCall] = Field(default_factory=list)
34
+ total_cost: float = 0.0
35
+ duration_ms: float = 0.0
36
+ metadata: dict[str, Any] = Field(default_factory=dict)
37
+
38
+ def add_message(self, from_agent: str, to_agent: str, content: dict[str, Any]) -> None:
39
+ """Add a message to the trace."""
40
+ self.messages.append(
41
+ AgentMessage(
42
+ from_agent=from_agent,
43
+ to_agent=to_agent,
44
+ content=content,
45
+ timestamp=datetime.now(),
46
+ )
47
+ )
48
+
49
+ def add_tool_call(
50
+ self,
51
+ agent: str,
52
+ tool_name: str,
53
+ args: dict[str, Any],
54
+ result: Any = None,
55
+ duration_ms: float = 0.0,
56
+ ) -> None:
57
+ """Add a tool call to the trace."""
58
+ self.tool_calls.append(
59
+ ToolCall(
60
+ agent=agent,
61
+ tool_name=tool_name,
62
+ args=args,
63
+ result=result,
64
+ timestamp=datetime.now(),
65
+ duration_ms=duration_ms,
66
+ )
67
+ )
@@ -0,0 +1,87 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from synkt.trace.models import AgentTrace
6
+ from synkt.trace.storage import get_current_trace
7
+
8
+
9
+ def _shorten(value: Any, max_len: int = 120) -> str:
10
+ text = repr(value)
11
+ if len(text) <= max_len:
12
+ return text
13
+ return text[: max_len - 3] + "..."
14
+
15
+
16
+ def format_trace(
17
+ trace: AgentTrace | None = None,
18
+ *,
19
+ include_content: bool = False,
20
+ include_tools: bool = True,
21
+ include_summary: bool = True,
22
+ ) -> str:
23
+ """Return a human-readable timeline for an AgentTrace.
24
+
25
+ Args:
26
+ trace: Explicit trace. If omitted, uses the active trace from context.
27
+ include_content: Include message payload snippets.
28
+ include_tools: Include tool-call timeline entries.
29
+ include_summary: Include final summary counts and cost.
30
+ """
31
+ active_trace = trace or get_current_trace()
32
+ lines: list[str] = []
33
+
34
+ lines.append("Agent Trace Timeline")
35
+ lines.append("--------------------")
36
+
37
+ if not active_trace.messages and (not include_tools or not active_trace.tool_calls):
38
+ lines.append("(no events captured)")
39
+ else:
40
+ for idx, msg in enumerate(active_trace.messages, start=1):
41
+ ts = msg.timestamp.strftime("%H:%M:%S.%f")[:-3]
42
+ base = f"{idx:02d}. [{ts}] {msg.from_agent} -> {msg.to_agent}"
43
+ if include_content:
44
+ base += f" | content={_shorten(msg.content)}"
45
+ lines.append(base)
46
+
47
+ if include_tools and active_trace.tool_calls:
48
+ start_idx = len(active_trace.messages) + 1
49
+ for offset, tool in enumerate(active_trace.tool_calls):
50
+ idx = start_idx + offset
51
+ ts = tool.timestamp.strftime("%H:%M:%S.%f")[:-3]
52
+ result_label = "ok" if tool.result is not None else "no-result"
53
+ lines.append(
54
+ f"{idx:02d}. [{ts}] {tool.agent} => tool:{tool.tool_name} "
55
+ f"args={_shorten(tool.args, max_len=80)} "
56
+ f"duration={tool.duration_ms:.2f}ms {result_label}"
57
+ )
58
+
59
+ if include_summary:
60
+ lines.append("")
61
+ lines.append("Summary")
62
+ lines.append("-------")
63
+ lines.append(f"messages: {len(active_trace.messages)}")
64
+ lines.append(f"tool_calls: {len(active_trace.tool_calls)}")
65
+ lines.append(f"duration_ms: {active_trace.duration_ms:.2f}")
66
+ lines.append(f"total_cost: ${active_trace.total_cost:.4f}")
67
+
68
+ return "\n".join(lines)
69
+
70
+
71
+ def print_trace(
72
+ trace: AgentTrace | None = None,
73
+ *,
74
+ include_content: bool = False,
75
+ include_tools: bool = True,
76
+ include_summary: bool = True,
77
+ ) -> None:
78
+ """Print a formatted trace timeline to stdout."""
79
+ print(
80
+ format_trace(
81
+ trace,
82
+ include_content=include_content,
83
+ include_tools=include_tools,
84
+ include_summary=include_summary,
85
+ )
86
+ )
87
+
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from contextvars import ContextVar
4
+ from typing import Optional
5
+
6
+ from synkt.trace.models import AgentTrace
7
+
8
+
9
+ _current_trace: ContextVar[Optional[AgentTrace]] = ContextVar("_current_trace", default=None)
10
+
11
+
12
+ def get_current_trace() -> AgentTrace:
13
+ """Get the trace for the current test."""
14
+ trace = _current_trace.get()
15
+ if trace is None:
16
+ raise RuntimeError("No active trace. Are you inside a test?")
17
+ return trace
18
+
19
+
20
+ def set_current_trace(trace: AgentTrace) -> None:
21
+ """Set the trace for the current test."""
22
+ _current_trace.set(trace)
23
+
24
+
25
+ def clear_current_trace() -> None:
26
+ """Clear the trace (called after test)."""
27
+ _current_trace.set(None)
28
+
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 2.4
2
+ Name: synkt
3
+ Version: 0.1.0
4
+ Summary: Testing framework for multi-agent LLM systems
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: pydantic>=2.0
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=8.0; extra == "dev"
10
+ Requires-Dist: ruff>=0.6; extra == "dev"
11
+ Requires-Dist: black>=24.0; extra == "dev"
12
+ Provides-Extra: langgraph
13
+ Requires-Dist: langgraph>=0.2; extra == "langgraph"
@@ -0,0 +1,27 @@
1
+ README.md
2
+ pyproject.toml
3
+ synkt/__init__.py
4
+ synkt/pytest_plugin.py
5
+ synkt.egg-info/PKG-INFO
6
+ synkt.egg-info/SOURCES.txt
7
+ synkt.egg-info/dependency_links.txt
8
+ synkt.egg-info/requires.txt
9
+ synkt.egg-info/top_level.txt
10
+ synkt/assertions/__init__.py
11
+ synkt/assertions/coordination.py
12
+ synkt/assertions/system.py
13
+ synkt/assertions/tools.py
14
+ synkt/interceptors/__init__.py
15
+ synkt/interceptors/base.py
16
+ synkt/interceptors/crewai.py
17
+ synkt/interceptors/langgraph.py
18
+ synkt/mocking/__init__.py
19
+ synkt/mocking/mock_agent.py
20
+ synkt/trace/__init__.py
21
+ synkt/trace/models.py
22
+ synkt/trace/pretty.py
23
+ synkt/trace/storage.py
24
+ tests/test_assertions.py
25
+ tests/test_langgraph_interceptor.py
26
+ tests/test_trace_models.py
27
+ tests/test_trace_pretty.py
@@ -0,0 +1,9 @@
1
+ pydantic>=2.0
2
+
3
+ [dev]
4
+ pytest>=8.0
5
+ ruff>=0.6
6
+ black>=24.0
7
+
8
+ [langgraph]
9
+ langgraph>=0.2
@@ -0,0 +1 @@
1
+ synkt
@@ -0,0 +1,123 @@
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+ from pydantic import BaseModel
5
+
6
+ from synkt.assertions.coordination import assert_handoff, assert_parallel_execution
7
+ from synkt.assertions.system import assert_cost_under, assert_no_loop
8
+ from synkt.assertions.tools import assert_no_tool_called, assert_tool_called
9
+ from synkt.trace.models import AgentTrace
10
+ from synkt.trace.storage import set_current_trace
11
+
12
+
13
+ class RefundRequest(BaseModel):
14
+ order_id: str
15
+
16
+
17
+ def test_assert_handoff_success() -> None:
18
+ trace = AgentTrace()
19
+ trace.add_message("triage", "refunds", {"order_id": "12345"})
20
+ set_current_trace(trace)
21
+
22
+ assert_handoff("triage", "refunds")
23
+
24
+
25
+ def test_assert_handoff_missing() -> None:
26
+ trace = AgentTrace()
27
+ set_current_trace(trace)
28
+
29
+ with pytest.raises(AssertionError, match="No handoff"):
30
+ assert_handoff("triage", "refunds")
31
+
32
+
33
+ def test_assert_handoff_schema_validation() -> None:
34
+ trace = AgentTrace()
35
+ trace.add_message("triage", "refunds", {"order_id": "12345"})
36
+ set_current_trace(trace)
37
+
38
+ assert_handoff("triage", "refunds", message_schema=RefundRequest)
39
+
40
+
41
+ def test_assert_handoff_schema_validation_failure() -> None:
42
+ trace = AgentTrace()
43
+ trace.add_message("triage", "refunds", {"wrong": "field"})
44
+ set_current_trace(trace)
45
+
46
+ with pytest.raises(AssertionError, match="doesn't match schema"):
47
+ assert_handoff("triage", "refunds", message_schema=RefundRequest)
48
+
49
+
50
+ def test_assert_tool_called_success() -> None:
51
+ trace = AgentTrace()
52
+ trace.add_tool_call("refunds", "process_refund", {"order_id": "12345"})
53
+ set_current_trace(trace)
54
+
55
+ assert_tool_called("process_refund", args={"order_id": "12345"})
56
+
57
+
58
+ def test_assert_tool_called_wrong_count() -> None:
59
+ trace = AgentTrace()
60
+ set_current_trace(trace)
61
+
62
+ with pytest.raises(AssertionError, match="Expected 1 calls"):
63
+ assert_tool_called("process_refund")
64
+
65
+
66
+ def test_assert_no_tool_called_success() -> None:
67
+ trace = AgentTrace()
68
+ set_current_trace(trace)
69
+
70
+ assert_no_tool_called("send_email")
71
+
72
+
73
+ def test_assert_no_tool_called_failure() -> None:
74
+ trace = AgentTrace()
75
+ trace.add_tool_call("notifications", "send_email", {"to": "a@example.com"})
76
+ set_current_trace(trace)
77
+
78
+ with pytest.raises(AssertionError, match="Expected no calls"):
79
+ assert_no_tool_called("send_email")
80
+
81
+
82
+ def test_assert_no_loop_success() -> None:
83
+ trace = AgentTrace()
84
+ trace.add_message("start", "triage", {})
85
+ trace.add_message("triage", "refunds", {})
86
+ set_current_trace(trace)
87
+
88
+ assert_no_loop(max_iterations=3)
89
+
90
+
91
+ def test_assert_no_loop_failure() -> None:
92
+ trace = AgentTrace()
93
+ for _ in range(6):
94
+ trace.add_message("triage", "triage", {})
95
+ set_current_trace(trace)
96
+
97
+ with pytest.raises(AssertionError, match="Possible infinite loop"):
98
+ assert_no_loop(max_iterations=10)
99
+
100
+
101
+ def test_assert_cost_under_success() -> None:
102
+ trace = AgentTrace(total_cost=0.25)
103
+ set_current_trace(trace)
104
+
105
+ assert_cost_under(1.00)
106
+
107
+
108
+ def test_assert_cost_under_failure() -> None:
109
+ trace = AgentTrace(total_cost=1.50)
110
+ set_current_trace(trace)
111
+
112
+ with pytest.raises(AssertionError, match="exceeds threshold"):
113
+ assert_cost_under(1.00)
114
+
115
+
116
+ def test_assert_parallel_execution_success() -> None:
117
+ trace = AgentTrace()
118
+ trace.add_message("router", "agent_a", {})
119
+ trace.add_message("router", "agent_b", {})
120
+ set_current_trace(trace)
121
+
122
+ assert_parallel_execution(["agent_a", "agent_b"], max_time_delta_ms=1000)
123
+
@@ -0,0 +1,52 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from synkt.interceptors.langgraph import LangGraphInterceptor
6
+ from synkt.trace.storage import get_current_trace
7
+
8
+
9
+ class FakeCompiledGraph:
10
+ def __init__(self, graph: "FakeGraph"):
11
+ self.graph = graph
12
+
13
+ def invoke(self, state: dict[str, Any]) -> dict[str, Any]:
14
+ current_state = state
15
+ for node_name in ["triage", "refunds"]:
16
+ current_state = self.graph.nodes[node_name](current_state)
17
+ return current_state
18
+
19
+
20
+ class FakeGraph:
21
+ def __init__(self) -> None:
22
+ self.nodes = {
23
+ "triage": self._triage,
24
+ "refunds": self._refunds,
25
+ }
26
+
27
+ def _triage(self, state: dict[str, Any]) -> dict[str, Any]:
28
+ state["order_id"] = "12345"
29
+ return state
30
+
31
+ def _refunds(self, state: dict[str, Any]) -> dict[str, Any]:
32
+ state["resolution"] = f"Refund processed for order {state['order_id']}"
33
+ return state
34
+
35
+ def compile(self) -> FakeCompiledGraph:
36
+ return FakeCompiledGraph(self)
37
+
38
+
39
+ def test_langgraph_interceptor_records_transitions() -> None:
40
+ graph = FakeGraph()
41
+ intercepted = LangGraphInterceptor(graph)
42
+
43
+ result = intercepted.invoke({"input": "refund please"})
44
+
45
+ assert "resolution" in result
46
+ trace = get_current_trace()
47
+ assert len(trace.messages) == 2
48
+ assert trace.messages[0].from_agent == "start"
49
+ assert trace.messages[0].to_agent == "triage"
50
+ assert trace.messages[1].from_agent == "triage"
51
+ assert trace.messages[1].to_agent == "refunds"
52
+
@@ -0,0 +1,35 @@
1
+ from __future__ import annotations
2
+
3
+ from synkt.trace.models import AgentTrace
4
+
5
+
6
+ def test_add_message_records_message() -> None:
7
+ trace = AgentTrace()
8
+
9
+ trace.add_message("triage", "refunds", {"order_id": "123"})
10
+
11
+ assert len(trace.messages) == 1
12
+ assert trace.messages[0].from_agent == "triage"
13
+ assert trace.messages[0].to_agent == "refunds"
14
+ assert trace.messages[0].content["order_id"] == "123"
15
+
16
+
17
+ def test_add_tool_call_records_call() -> None:
18
+ trace = AgentTrace()
19
+
20
+ trace.add_tool_call(
21
+ agent="refunds",
22
+ tool_name="process_refund",
23
+ args={"order_id": "123"},
24
+ result={"ok": True},
25
+ duration_ms=12.5,
26
+ )
27
+
28
+ assert len(trace.tool_calls) == 1
29
+ call = trace.tool_calls[0]
30
+ assert call.agent == "refunds"
31
+ assert call.tool_name == "process_refund"
32
+ assert call.args["order_id"] == "123"
33
+ assert call.result == {"ok": True}
34
+ assert call.duration_ms == 12.5
35
+
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ from synkt.trace.models import AgentTrace
4
+ from synkt.trace.pretty import format_trace, print_trace
5
+ from synkt.trace.storage import set_current_trace
6
+
7
+
8
+ def test_format_trace_includes_messages_and_summary() -> None:
9
+ trace = AgentTrace(total_cost=0.1234, duration_ms=42.0)
10
+ trace.add_message("planner", "research", {"topic": "pricing"})
11
+ trace.add_tool_call(
12
+ agent="research",
13
+ tool_name="web_search",
14
+ args={"query": "pricing strategy"},
15
+ result={"hits": 3},
16
+ duration_ms=12.5,
17
+ )
18
+
19
+ output = format_trace(trace, include_content=True)
20
+
21
+ assert "Agent Trace Timeline" in output
22
+ assert "planner -> research" in output
23
+ assert "tool:web_search" in output
24
+ assert "messages: 1" in output
25
+ assert "tool_calls: 1" in output
26
+ assert "total_cost: $0.1234" in output
27
+
28
+
29
+ def test_format_trace_uses_current_trace_when_not_passed() -> None:
30
+ trace = AgentTrace()
31
+ trace.add_message("start", "triage", {"input": "refund"})
32
+ set_current_trace(trace)
33
+
34
+ output = format_trace()
35
+
36
+ assert "start -> triage" in output
37
+
38
+
39
+ def test_print_trace_writes_timeline(capsys) -> None:
40
+ trace = AgentTrace()
41
+ trace.add_message("a", "b", {})
42
+
43
+ print_trace(trace)
44
+
45
+ captured = capsys.readouterr()
46
+ assert "a -> b" in captured.out
47
+