tracerazor-langgraph 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracerazor_langgraph-0.1.0/PKG-INFO +94 -0
- tracerazor_langgraph-0.1.0/README.md +71 -0
- tracerazor_langgraph-0.1.0/pyproject.toml +36 -0
- tracerazor_langgraph-0.1.0/setup.cfg +4 -0
- tracerazor_langgraph-0.1.0/setup.py +18 -0
- tracerazor_langgraph-0.1.0/tracerazor_langgraph/__init__.py +26 -0
- tracerazor_langgraph-0.1.0/tracerazor_langgraph/callback.py +177 -0
- tracerazor_langgraph-0.1.0/tracerazor_langgraph/client.py +178 -0
- tracerazor_langgraph-0.1.0/tracerazor_langgraph/trace_builder.py +170 -0
- tracerazor_langgraph-0.1.0/tracerazor_langgraph.egg-info/PKG-INFO +94 -0
- tracerazor_langgraph-0.1.0/tracerazor_langgraph.egg-info/SOURCES.txt +12 -0
- tracerazor_langgraph-0.1.0/tracerazor_langgraph.egg-info/dependency_links.txt +1 -0
- tracerazor_langgraph-0.1.0/tracerazor_langgraph.egg-info/requires.txt +4 -0
- tracerazor_langgraph-0.1.0/tracerazor_langgraph.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tracerazor-langgraph
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: TraceRazor LangGraph/LangChain callback — token efficiency auditing
|
|
5
|
+
Author: Zulfaqar Hafez
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/ZulfaqarHafez/tracerazor
|
|
8
|
+
Project-URL: Repository, https://github.com/ZulfaqarHafez/tracerazor
|
|
9
|
+
Keywords: ai,agents,langgraph,langchain,token-efficiency,tracing
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Requires-Dist: langchain-core>=0.2
|
|
20
|
+
Provides-Extra: langgraph
|
|
21
|
+
Requires-Dist: langgraph>=0.2; extra == "langgraph"
|
|
22
|
+
Dynamic: requires-python
|
|
23
|
+
|
|
24
|
+
# tracerazor-langgraph
|
|
25
|
+
|
|
26
|
+
LangGraph/LangChain callback adapter for [TraceRazor](../../README.md).
|
|
27
|
+
|
|
28
|
+
Automatically captures every LLM call and tool call from your LangGraph graph with zero manual instrumentation.
|
|
29
|
+
|
|
30
|
+
## Install
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install tracerazor-langgraph
|
|
34
|
+
pip install tracerazor-langgraph[langgraph] # includes langgraph
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Requires the `tracerazor` binary:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
cargo build --release
|
|
41
|
+
export TRACERAZOR_BIN=/path/to/TraceRazor/target/release/tracerazor
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Usage
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from tracerazor_langgraph import TraceRazorCallback
|
|
48
|
+
from langgraph.prebuilt import create_react_agent
|
|
49
|
+
from langchain_openai import ChatOpenAI
|
|
50
|
+
|
|
51
|
+
callback = TraceRazorCallback(
|
|
52
|
+
agent_name="support-agent",
|
|
53
|
+
threshold=70,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
model = ChatOpenAI(model="gpt-4o-mini")
|
|
57
|
+
agent = create_react_agent(model, tools=[...])
|
|
58
|
+
|
|
59
|
+
result = agent.invoke(
|
|
60
|
+
{"messages": [HumanMessage(content="I want a refund")]},
|
|
61
|
+
config={"callbacks": [callback]},
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# After the agent finishes:
|
|
65
|
+
report = callback.analyse()
|
|
66
|
+
print(report.markdown())
|
|
67
|
+
|
|
68
|
+
# CI/CD gate — raises AssertionError if TAS < threshold:
|
|
69
|
+
callback.assert_passes()
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## API
|
|
73
|
+
|
|
74
|
+
### `TraceRazorCallback(agent_name, framework, threshold, task_value_score, tracerazor_bin)`
|
|
75
|
+
|
|
76
|
+
| param | default | description |
|
|
77
|
+
|---|---|---|
|
|
78
|
+
| `agent_name` | `"langgraph-agent"` | shown in all reports |
|
|
79
|
+
| `framework` | `"langgraph"` | framework label |
|
|
80
|
+
| `threshold` | `70.0` | minimum TAS for `assert_passes()` |
|
|
81
|
+
| `task_value_score` | `1.0` | answer quality (0–1) |
|
|
82
|
+
| `tracerazor_bin` | auto | path to binary; falls back to `TRACERAZOR_BIN` env var |
|
|
83
|
+
|
|
84
|
+
### `callback.analyse() → TraceRazorReport`
|
|
85
|
+
|
|
86
|
+
Finalise and submit the trace. Returns the report.
|
|
87
|
+
|
|
88
|
+
### `callback.assert_passes()`
|
|
89
|
+
|
|
90
|
+
Raise `AssertionError` if TAS < threshold.
|
|
91
|
+
|
|
92
|
+
### `callback.set_task_value_score(score: float)`
|
|
93
|
+
|
|
94
|
+
Update quality score before calling `analyse()`.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# tracerazor-langgraph
|
|
2
|
+
|
|
3
|
+
LangGraph/LangChain callback adapter for [TraceRazor](../../README.md).
|
|
4
|
+
|
|
5
|
+
Automatically captures every LLM call and tool call from your LangGraph graph with zero manual instrumentation.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install tracerazor-langgraph
|
|
11
|
+
pip install tracerazor-langgraph[langgraph] # includes langgraph
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Requires the `tracerazor` binary:
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
cargo build --release
|
|
18
|
+
export TRACERAZOR_BIN=/path/to/TraceRazor/target/release/tracerazor
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from tracerazor_langgraph import TraceRazorCallback
|
|
25
|
+
from langgraph.prebuilt import create_react_agent
|
|
26
|
+
from langchain_openai import ChatOpenAI
|
|
27
|
+
|
|
28
|
+
callback = TraceRazorCallback(
|
|
29
|
+
agent_name="support-agent",
|
|
30
|
+
threshold=70,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
model = ChatOpenAI(model="gpt-4o-mini")
|
|
34
|
+
agent = create_react_agent(model, tools=[...])
|
|
35
|
+
|
|
36
|
+
result = agent.invoke(
|
|
37
|
+
{"messages": [HumanMessage(content="I want a refund")]},
|
|
38
|
+
config={"callbacks": [callback]},
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# After the agent finishes:
|
|
42
|
+
report = callback.analyse()
|
|
43
|
+
print(report.markdown())
|
|
44
|
+
|
|
45
|
+
# CI/CD gate — raises AssertionError if TAS < threshold:
|
|
46
|
+
callback.assert_passes()
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## API
|
|
50
|
+
|
|
51
|
+
### `TraceRazorCallback(agent_name, framework, threshold, task_value_score, tracerazor_bin)`
|
|
52
|
+
|
|
53
|
+
| param | default | description |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| `agent_name` | `"langgraph-agent"` | shown in all reports |
|
|
56
|
+
| `framework` | `"langgraph"` | framework label |
|
|
57
|
+
| `threshold` | `70.0` | minimum TAS for `assert_passes()` |
|
|
58
|
+
| `task_value_score` | `1.0` | answer quality (0–1) |
|
|
59
|
+
| `tracerazor_bin` | auto | path to binary; falls back to `TRACERAZOR_BIN` env var |
|
|
60
|
+
|
|
61
|
+
### `callback.analyse() → TraceRazorReport`
|
|
62
|
+
|
|
63
|
+
Finalise and submit the trace. Returns the report.
|
|
64
|
+
|
|
65
|
+
### `callback.assert_passes()`
|
|
66
|
+
|
|
67
|
+
Raise `AssertionError` if TAS < threshold.
|
|
68
|
+
|
|
69
|
+
### `callback.set_task_value_score(score: float)`
|
|
70
|
+
|
|
71
|
+
Update quality score before calling `analyse()`.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tracerazor-langgraph"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "TraceRazor LangGraph/LangChain callback — token efficiency auditing"
|
|
9
|
+
authors = [{ name = "Zulfaqar Hafez" }]
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
requires-python = ">=3.10"
|
|
13
|
+
keywords = ["ai", "agents", "langgraph", "langchain", "token-efficiency", "tracing"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Topic :: Software Development :: Libraries",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"langchain-core>=0.2",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Homepage = "https://github.com/ZulfaqarHafez/tracerazor"
|
|
29
|
+
Repository = "https://github.com/ZulfaqarHafez/tracerazor"
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
langgraph = ["langgraph>=0.2"]
|
|
33
|
+
|
|
34
|
+
[tool.setuptools.packages.find]
|
|
35
|
+
where = ["."]
|
|
36
|
+
include = ["tracerazor_langgraph*"]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="tracerazor-langgraph",
|
|
5
|
+
version="0.1.0",
|
|
6
|
+
description="TraceRazor LangGraph callback adapter",
|
|
7
|
+
author="Zulfaqar Hafez",
|
|
8
|
+
license="Apache-2.0",
|
|
9
|
+
packages=find_packages(),
|
|
10
|
+
python_requires=">=3.10",
|
|
11
|
+
install_requires=[
|
|
12
|
+
"langchain-core>=0.2",
|
|
13
|
+
],
|
|
14
|
+
extras_require={
|
|
15
|
+
"langgraph": ["langgraph>=0.2"],
|
|
16
|
+
"openai": ["openai>=1.0"],
|
|
17
|
+
},
|
|
18
|
+
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TraceRazor LangGraph integration.
|
|
3
|
+
|
|
4
|
+
Provides a BaseCallbackHandler that captures LangGraph/LangChain execution events
|
|
5
|
+
and streams them into the TraceRazor auditor for real-time and post-hoc analysis.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from tracerazor_langgraph import TraceRazorCallback
|
|
9
|
+
|
|
10
|
+
callback = TraceRazorCallback(agent_name="my-agent", threshold=75)
|
|
11
|
+
|
|
12
|
+
result = graph.invoke(inputs, config={"callbacks": [callback]})
|
|
13
|
+
|
|
14
|
+
# Get the efficiency report after execution
|
|
15
|
+
report = callback.analyse()
|
|
16
|
+
print(report.markdown())
|
|
17
|
+
|
|
18
|
+
# Or check inline during CI/CD
|
|
19
|
+
callback.assert_passes() # raises AssertionError if TAS < threshold
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .callback import TraceRazorCallback
|
|
23
|
+
from .client import TraceRazorClient, TraceRazorReport
|
|
24
|
+
|
|
25
|
+
__all__ = ["TraceRazorCallback", "TraceRazorClient", "TraceRazorReport"]
|
|
26
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TraceRazor LangGraph callback handler.
|
|
3
|
+
|
|
4
|
+
Captures LangGraph/LangChain execution events and serialises them into
|
|
5
|
+
TraceRazor's raw JSON trace format for analysis.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import time
|
|
12
|
+
import uuid
|
|
13
|
+
from typing import Any, Dict, List, Optional, Sequence, Union
|
|
14
|
+
|
|
15
|
+
from langchain_core.callbacks import BaseCallbackHandler
|
|
16
|
+
from langchain_core.outputs import LLMResult
|
|
17
|
+
|
|
18
|
+
from .client import TraceRazorClient, TraceRazorReport
|
|
19
|
+
from .trace_builder import TraceBuilder
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TraceRazorCallback(BaseCallbackHandler):
|
|
23
|
+
"""
|
|
24
|
+
LangGraph/LangChain callback that builds a TraceRazor trace from
|
|
25
|
+
execution events and submits it for analysis when the run completes.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
agent_name: Name of the agent (appears in the report header).
|
|
29
|
+
framework: Framework name (default: "langgraph").
|
|
30
|
+
threshold: Minimum TAS score for assertion checks (default: 70).
|
|
31
|
+
semantic: If True, enable OpenAI embedding-based analysis (Phase 2).
|
|
32
|
+
Requires OPENAI_API_KEY in environment.
|
|
33
|
+
cost_per_million: Token cost in USD for savings estimates.
|
|
34
|
+
tracerazor_bin: Path to the tracerazor CLI binary. Auto-detected if None.
|
|
35
|
+
task_value_score: Quality of the final answer (0.0–1.0). Set this
|
|
36
|
+
after the run if you have ground-truth validation.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
agent_name: str = "langgraph-agent",
|
|
42
|
+
framework: str = "langgraph",
|
|
43
|
+
threshold: float = 70.0,
|
|
44
|
+
tracerazor_bin: Optional[str] = None,
|
|
45
|
+
task_value_score: float = 1.0,
|
|
46
|
+
):
|
|
47
|
+
super().__init__()
|
|
48
|
+
self._builder = TraceBuilder(
|
|
49
|
+
agent_name=agent_name,
|
|
50
|
+
framework=framework,
|
|
51
|
+
task_value_score=task_value_score,
|
|
52
|
+
)
|
|
53
|
+
self._threshold = threshold
|
|
54
|
+
self._client = TraceRazorClient(bin_path=tracerazor_bin)
|
|
55
|
+
self._report: Optional[TraceRazorReport] = None
|
|
56
|
+
|
|
57
|
+
# ── LLM events ──────────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
def on_llm_start(
|
|
60
|
+
self,
|
|
61
|
+
serialized: Dict[str, Any],
|
|
62
|
+
prompts: List[str],
|
|
63
|
+
**kwargs: Any,
|
|
64
|
+
) -> None:
|
|
65
|
+
"""Called when an LLM starts generating."""
|
|
66
|
+
context = prompts[0] if prompts else ""
|
|
67
|
+
self._builder.start_reasoning_step(input_context=context)
|
|
68
|
+
|
|
69
|
+
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
|
70
|
+
"""Called when an LLM finishes generating."""
|
|
71
|
+
output_text = ""
|
|
72
|
+
total_tokens = 0
|
|
73
|
+
|
|
74
|
+
if response.generations:
|
|
75
|
+
first = response.generations[0]
|
|
76
|
+
if first:
|
|
77
|
+
output_text = getattr(first[0], "text", "") or ""
|
|
78
|
+
|
|
79
|
+
# Extract token usage from LLMResult metadata.
|
|
80
|
+
if response.llm_output:
|
|
81
|
+
usage = response.llm_output.get("usage", response.llm_output.get("token_usage", {}))
|
|
82
|
+
total_tokens = (
|
|
83
|
+
usage.get("total_tokens")
|
|
84
|
+
or usage.get("prompt_tokens", 0) + usage.get("completion_tokens", 0)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
self._builder.end_reasoning_step(output=output_text, tokens=total_tokens)
|
|
88
|
+
|
|
89
|
+
def on_llm_error(self, error: Exception, **kwargs: Any) -> None:
|
|
90
|
+
self._builder.abort_current_step(error=str(error))
|
|
91
|
+
|
|
92
|
+
# ── Tool events ──────────────────────────────────────────────────────────
|
|
93
|
+
|
|
94
|
+
def on_tool_start(
|
|
95
|
+
self,
|
|
96
|
+
serialized: Dict[str, Any],
|
|
97
|
+
input_str: str,
|
|
98
|
+
**kwargs: Any,
|
|
99
|
+
) -> None:
|
|
100
|
+
"""Called when a tool starts executing."""
|
|
101
|
+
tool_name = serialized.get("name", "unknown_tool")
|
|
102
|
+
try:
|
|
103
|
+
params = json.loads(input_str) if input_str.strip().startswith("{") else {"input": input_str}
|
|
104
|
+
except json.JSONDecodeError:
|
|
105
|
+
params = {"input": input_str}
|
|
106
|
+
self._builder.start_tool_step(tool_name=tool_name, tool_params=params)
|
|
107
|
+
|
|
108
|
+
def on_tool_end(self, output: str, **kwargs: Any) -> None:
|
|
109
|
+
"""Called when a tool finishes successfully."""
|
|
110
|
+
self._builder.end_tool_step(output=output, success=True)
|
|
111
|
+
|
|
112
|
+
def on_tool_error(self, error: Exception, **kwargs: Any) -> None:
|
|
113
|
+
"""Called when a tool raises an error."""
|
|
114
|
+
self._builder.end_tool_step(
|
|
115
|
+
output="",
|
|
116
|
+
success=False,
|
|
117
|
+
error=str(error),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# ── Chain events (LangGraph node transitions) ────────────────────────────
|
|
121
|
+
|
|
122
|
+
def on_chain_start(
|
|
123
|
+
self,
|
|
124
|
+
serialized: Dict[str, Any],
|
|
125
|
+
inputs: Dict[str, Any],
|
|
126
|
+
**kwargs: Any,
|
|
127
|
+
) -> None:
|
|
128
|
+
"""LangGraph node start — we use this for agent handoffs."""
|
|
129
|
+
node_name = serialized.get("name", "")
|
|
130
|
+
# Top-level chain start = beginning of the run; skip.
|
|
131
|
+
if node_name and node_name not in ("RunnableSequence", "AgentExecutor", "CompiledGraph"):
|
|
132
|
+
self._builder.note_agent_transition(node_name)
|
|
133
|
+
|
|
134
|
+
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
# ── Analysis ─────────────────────────────────────────────────────────────
|
|
138
|
+
|
|
139
|
+
def analyse(self) -> TraceRazorReport:
|
|
140
|
+
"""
|
|
141
|
+
Finalise the trace and submit it to the TraceRazor CLI for analysis.
|
|
142
|
+
Returns a TraceRazorReport with the TAS score and full report.
|
|
143
|
+
|
|
144
|
+
Call this after your LangGraph invocation completes.
|
|
145
|
+
"""
|
|
146
|
+
trace_dict = self._builder.build()
|
|
147
|
+
self._report = self._client.analyse(
|
|
148
|
+
trace=trace_dict,
|
|
149
|
+
threshold=self._threshold,
|
|
150
|
+
)
|
|
151
|
+
return self._report
|
|
152
|
+
|
|
153
|
+
def assert_passes(self) -> None:
|
|
154
|
+
"""
|
|
155
|
+
Analyse the trace (if not already done) and raise AssertionError
|
|
156
|
+
if TAS is below threshold. Useful for CI/CD and test assertions.
|
|
157
|
+
"""
|
|
158
|
+
if self._report is None:
|
|
159
|
+
self.analyse()
|
|
160
|
+
assert self._report is not None
|
|
161
|
+
if not self._report.passes:
|
|
162
|
+
raise AssertionError(
|
|
163
|
+
f"TraceRazor: TAS {self._report.tas_score:.1f} is below "
|
|
164
|
+
f"threshold {self._threshold}.\n\n{self._report.summary()}"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def report(self) -> Optional[TraceRazorReport]:
|
|
169
|
+
"""The most recent analysis report, or None if not yet analysed."""
|
|
170
|
+
return self._report
|
|
171
|
+
|
|
172
|
+
def set_task_value_score(self, score: float) -> None:
|
|
173
|
+
"""
|
|
174
|
+
Update the task value score (0.0–1.0) based on ground-truth validation.
|
|
175
|
+
Must be called before analyse() for it to take effect.
|
|
176
|
+
"""
|
|
177
|
+
self._builder.task_value_score = max(0.0, min(1.0, score))
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TraceRazor CLI client.
|
|
3
|
+
|
|
4
|
+
Serialises a trace dict to a temp JSON file and invokes the tracerazor
|
|
5
|
+
CLI binary, then parses the resulting JSON report.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import shutil
|
|
13
|
+
import subprocess
|
|
14
|
+
import tempfile
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import Any, Dict, List, Optional
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class TraceRazorReport:
|
|
21
|
+
"""Parsed output from a tracerazor audit run."""
|
|
22
|
+
|
|
23
|
+
trace_id: str
|
|
24
|
+
agent_name: str
|
|
25
|
+
framework: str
|
|
26
|
+
total_steps: int
|
|
27
|
+
total_tokens: int
|
|
28
|
+
tas_score: float
|
|
29
|
+
grade: str
|
|
30
|
+
vae_score: float
|
|
31
|
+
passes: bool
|
|
32
|
+
metrics: Dict[str, Any] = field(default_factory=dict)
|
|
33
|
+
diff: List[Dict] = field(default_factory=list)
|
|
34
|
+
savings: Dict[str, Any] = field(default_factory=dict)
|
|
35
|
+
raw: Dict[str, Any] = field(default_factory=dict)
|
|
36
|
+
|
|
37
|
+
def markdown(self) -> str:
|
|
38
|
+
"""Re-generate a concise markdown summary from the parsed report."""
|
|
39
|
+
sep = "-" * 54
|
|
40
|
+
lines = [
|
|
41
|
+
"TRACERAZOR REPORT",
|
|
42
|
+
sep,
|
|
43
|
+
f"Trace: {self.trace_id}",
|
|
44
|
+
f"Agent: {self.agent_name}",
|
|
45
|
+
f"Steps: {self.total_steps} Tokens: {self.total_tokens}",
|
|
46
|
+
sep,
|
|
47
|
+
f"TRACERAZOR SCORE: {self.tas_score:.1f} / 100 [{self.grade.upper()}]",
|
|
48
|
+
f"VAE SCORE: {self.vae_score:.2f}",
|
|
49
|
+
sep,
|
|
50
|
+
]
|
|
51
|
+
if self.savings:
|
|
52
|
+
lines += [
|
|
53
|
+
"SAVINGS ESTIMATE",
|
|
54
|
+
f" Tokens saved: {self.savings.get('tokens_saved', 0)} "
|
|
55
|
+
f"({self.savings.get('reduction_pct', 0):.1f}% reduction)",
|
|
56
|
+
f" At 50K/month: ${self.savings.get('monthly_savings_usd', 0):.2f}/month",
|
|
57
|
+
]
|
|
58
|
+
return "\n".join(lines)
|
|
59
|
+
|
|
60
|
+
def summary(self) -> str:
|
|
61
|
+
return (
|
|
62
|
+
f"TAS {self.tas_score:.1f}/100 [{self.grade}] | "
|
|
63
|
+
f"VAE {self.vae_score:.2f} | "
|
|
64
|
+
f"Saved {self.savings.get('reduction_pct', 0):.0f}% tokens"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class TraceRazorClient:
|
|
69
|
+
"""
|
|
70
|
+
Thin wrapper around the tracerazor CLI binary.
|
|
71
|
+
|
|
72
|
+
Writes the trace to a temp file, invokes the binary, parses JSON output.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, bin_path: Optional[str] = None):
|
|
76
|
+
self._bin = bin_path or self._find_binary()
|
|
77
|
+
|
|
78
|
+
def analyse(
|
|
79
|
+
self,
|
|
80
|
+
trace: Dict[str, Any],
|
|
81
|
+
threshold: float = 70.0,
|
|
82
|
+
) -> TraceRazorReport:
|
|
83
|
+
"""
|
|
84
|
+
Write the trace to a temp file and run tracerazor audit on it.
|
|
85
|
+
|
|
86
|
+
Returns a TraceRazorReport with the full parsed result.
|
|
87
|
+
Raises RuntimeError if the binary fails unexpectedly.
|
|
88
|
+
"""
|
|
89
|
+
with tempfile.NamedTemporaryFile(
|
|
90
|
+
mode="w", suffix=".json", delete=False, encoding="utf-8"
|
|
91
|
+
) as f:
|
|
92
|
+
json.dump(trace, f, indent=2)
|
|
93
|
+
tmp_path = f.name
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
cmd = [
|
|
97
|
+
self._bin,
|
|
98
|
+
"audit",
|
|
99
|
+
tmp_path,
|
|
100
|
+
"--format", "json",
|
|
101
|
+
"--threshold", str(threshold),
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
result = subprocess.run(
|
|
105
|
+
cmd,
|
|
106
|
+
capture_output=True,
|
|
107
|
+
text=True,
|
|
108
|
+
timeout=60,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# Exit code 1 = below threshold (still valid output). Other codes = error.
|
|
112
|
+
if result.returncode not in (0, 1):
|
|
113
|
+
raise RuntimeError(
|
|
114
|
+
f"tracerazor exited with code {result.returncode}:\n{result.stderr}"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
report_json = json.loads(result.stdout)
|
|
118
|
+
return self._parse_report(report_json, threshold)
|
|
119
|
+
|
|
120
|
+
finally:
|
|
121
|
+
try:
|
|
122
|
+
os.unlink(tmp_path)
|
|
123
|
+
except OSError:
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
@staticmethod
|
|
127
|
+
def _parse_report(data: Dict[str, Any], threshold: float) -> TraceRazorReport:
|
|
128
|
+
score = data.get("score", {})
|
|
129
|
+
return TraceRazorReport(
|
|
130
|
+
trace_id=data.get("trace_id", ""),
|
|
131
|
+
agent_name=data.get("agent_name", ""),
|
|
132
|
+
framework=data.get("framework", ""),
|
|
133
|
+
total_steps=data.get("total_steps", 0),
|
|
134
|
+
total_tokens=data.get("total_tokens", 0),
|
|
135
|
+
tas_score=score.get("score", 0.0),
|
|
136
|
+
grade=score.get("grade", "Unknown"),
|
|
137
|
+
vae_score=score.get("vae", 0.0),
|
|
138
|
+
passes=score.get("score", 0.0) >= threshold,
|
|
139
|
+
metrics=score,
|
|
140
|
+
diff=data.get("diff", []),
|
|
141
|
+
savings=data.get("savings", {}),
|
|
142
|
+
raw=data,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
@staticmethod
|
|
146
|
+
def _find_binary() -> str:
|
|
147
|
+
"""
|
|
148
|
+
Locate the tracerazor binary.
|
|
149
|
+
Search order:
|
|
150
|
+
1. TRACERAZOR_BIN environment variable
|
|
151
|
+
2. PATH (system-wide install)
|
|
152
|
+
3. Relative paths from this file (dev repo layout)
|
|
153
|
+
"""
|
|
154
|
+
env_path = os.environ.get("TRACERAZOR_BIN")
|
|
155
|
+
if env_path and os.path.isfile(env_path):
|
|
156
|
+
return env_path
|
|
157
|
+
|
|
158
|
+
path_bin = shutil.which("tracerazor") or shutil.which("tracerazor.exe")
|
|
159
|
+
if path_bin:
|
|
160
|
+
return path_bin
|
|
161
|
+
|
|
162
|
+
# Dev layout: integrations/langgraph/ → ../../target/release/
|
|
163
|
+
here = os.path.dirname(os.path.abspath(__file__))
|
|
164
|
+
for rel in [
|
|
165
|
+
"../../../../target/release/tracerazor.exe",
|
|
166
|
+
"../../../../target/release/tracerazor",
|
|
167
|
+
"../../../../target/debug/tracerazor.exe",
|
|
168
|
+
"../../../../target/debug/tracerazor",
|
|
169
|
+
]:
|
|
170
|
+
candidate = os.path.normpath(os.path.join(here, rel))
|
|
171
|
+
if os.path.isfile(candidate):
|
|
172
|
+
return candidate
|
|
173
|
+
|
|
174
|
+
raise FileNotFoundError(
|
|
175
|
+
"tracerazor binary not found. Set TRACERAZOR_BIN environment variable "
|
|
176
|
+
"or add 'tracerazor' to PATH.\n"
|
|
177
|
+
"Build with: cargo build --release"
|
|
178
|
+
)
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Builds a TraceRazor-format trace dict from LangGraph callback events.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import time
|
|
8
|
+
import uuid
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class _PendingStep:
|
|
13
|
+
"""Mutable step being built from start/end event pairs."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, step_type: str, step_id: int, start_time: float):
|
|
16
|
+
self.step_id = step_id
|
|
17
|
+
self.step_type = step_type
|
|
18
|
+
self.start_time = start_time
|
|
19
|
+
self.content: str = ""
|
|
20
|
+
self.tokens: int = 0
|
|
21
|
+
self.tool_name: Optional[str] = None
|
|
22
|
+
self.tool_params: Optional[Dict] = None
|
|
23
|
+
self.tool_success: Optional[bool] = None
|
|
24
|
+
self.tool_error: Optional[str] = None
|
|
25
|
+
self.input_context: Optional[str] = None
|
|
26
|
+
self.output: Optional[str] = None
|
|
27
|
+
self.agent_id: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TraceBuilder:
|
|
31
|
+
"""
|
|
32
|
+
Stateful builder that converts LangGraph callback events into a
|
|
33
|
+
TraceRazor raw JSON trace.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
agent_name: str,
|
|
39
|
+
framework: str,
|
|
40
|
+
task_value_score: float = 1.0,
|
|
41
|
+
):
|
|
42
|
+
self.agent_name = agent_name
|
|
43
|
+
self.framework = framework
|
|
44
|
+
self.task_value_score = task_value_score
|
|
45
|
+
self._trace_id: str = str(uuid.uuid4())
|
|
46
|
+
self._steps: List[Dict] = []
|
|
47
|
+
self._step_counter: int = 1
|
|
48
|
+
self._pending: Optional[_PendingStep] = None
|
|
49
|
+
self._current_agent: Optional[str] = None
|
|
50
|
+
|
|
51
|
+
# ── Step lifecycle ───────────────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
def start_reasoning_step(self, input_context: str = "") -> None:
|
|
54
|
+
self._pending = _PendingStep("reasoning", self._step_counter, time.time())
|
|
55
|
+
self._pending.input_context = input_context or None
|
|
56
|
+
self._pending.agent_id = self._current_agent
|
|
57
|
+
|
|
58
|
+
def end_reasoning_step(self, output: str = "", tokens: int = 0) -> None:
|
|
59
|
+
if self._pending is None or self._pending.step_type != "reasoning":
|
|
60
|
+
return
|
|
61
|
+
p = self._pending
|
|
62
|
+
p.output = output or None
|
|
63
|
+
p.tokens = tokens or self._estimate_tokens(p.input_context or "", output)
|
|
64
|
+
p.content = self._extract_content(p.input_context or "", output)
|
|
65
|
+
self._commit(p)
|
|
66
|
+
self._pending = None
|
|
67
|
+
|
|
68
|
+
def start_tool_step(self, tool_name: str, tool_params: Dict) -> None:
|
|
69
|
+
self._pending = _PendingStep("tool_call", self._step_counter, time.time())
|
|
70
|
+
self._pending.tool_name = tool_name
|
|
71
|
+
self._pending.tool_params = tool_params
|
|
72
|
+
self._pending.content = f"Calling {tool_name}"
|
|
73
|
+
self._pending.agent_id = self._current_agent
|
|
74
|
+
|
|
75
|
+
def end_tool_step(
|
|
76
|
+
self, output: str, success: bool, error: Optional[str] = None
|
|
77
|
+
) -> None:
|
|
78
|
+
if self._pending is None or self._pending.step_type != "tool_call":
|
|
79
|
+
return
|
|
80
|
+
p = self._pending
|
|
81
|
+
p.tool_success = success
|
|
82
|
+
p.tool_error = error
|
|
83
|
+
p.output = output or None
|
|
84
|
+
p.tokens = self._estimate_tokens(str(p.tool_params or ""), output)
|
|
85
|
+
self._commit(p)
|
|
86
|
+
self._pending = None
|
|
87
|
+
|
|
88
|
+
def abort_current_step(self, error: str) -> None:
|
|
89
|
+
"""Called when an LLM or tool raises an error mid-step."""
|
|
90
|
+
if self._pending is None:
|
|
91
|
+
return
|
|
92
|
+
p = self._pending
|
|
93
|
+
if p.step_type == "tool_call":
|
|
94
|
+
p.tool_success = False
|
|
95
|
+
p.tool_error = error
|
|
96
|
+
p.tokens = p.tokens or 50
|
|
97
|
+
else:
|
|
98
|
+
p.content = f"[ERROR] {error}"
|
|
99
|
+
p.tokens = p.tokens or 50
|
|
100
|
+
self._commit(p)
|
|
101
|
+
self._pending = None
|
|
102
|
+
|
|
103
|
+
def note_agent_transition(self, agent_id: str) -> None:
|
|
104
|
+
"""Record a LangGraph node transition (agent handoff)."""
|
|
105
|
+
self._current_agent = agent_id
|
|
106
|
+
|
|
107
|
+
# ── Build ────────────────────────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
def build(self) -> Dict:
|
|
110
|
+
"""Return the complete trace as a dict ready for JSON serialisation."""
|
|
111
|
+
# Commit any pending step.
|
|
112
|
+
if self._pending is not None:
|
|
113
|
+
p = self._pending
|
|
114
|
+
p.tokens = p.tokens or 100
|
|
115
|
+
p.content = p.content or "incomplete step"
|
|
116
|
+
self._commit(p)
|
|
117
|
+
self._pending = None
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
"trace_id": self._trace_id,
|
|
121
|
+
"agent_name": self.agent_name,
|
|
122
|
+
"framework": self.framework,
|
|
123
|
+
"task_value_score": self.task_value_score,
|
|
124
|
+
"steps": self._steps,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
# ── Helpers ──────────────────────────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
def _commit(self, p: _PendingStep) -> None:
|
|
130
|
+
step: Dict[str, Any] = {
|
|
131
|
+
"id": p.step_id,
|
|
132
|
+
"type": p.step_type,
|
|
133
|
+
"content": p.content or "",
|
|
134
|
+
"tokens": max(p.tokens, 1),
|
|
135
|
+
}
|
|
136
|
+
if p.tool_name is not None:
|
|
137
|
+
step["tool_name"] = p.tool_name
|
|
138
|
+
if p.tool_params is not None:
|
|
139
|
+
step["tool_params"] = p.tool_params
|
|
140
|
+
if p.tool_success is not None:
|
|
141
|
+
step["tool_success"] = p.tool_success
|
|
142
|
+
if p.tool_error is not None:
|
|
143
|
+
step["tool_error"] = p.tool_error
|
|
144
|
+
if p.input_context:
|
|
145
|
+
step["input_context"] = p.input_context
|
|
146
|
+
if p.output:
|
|
147
|
+
step["output"] = p.output
|
|
148
|
+
if p.agent_id:
|
|
149
|
+
step["agent_id"] = p.agent_id
|
|
150
|
+
self._steps.append(step)
|
|
151
|
+
self._step_counter += 1
|
|
152
|
+
|
|
153
|
+
@staticmethod
|
|
154
|
+
def _estimate_tokens(input_text: str, output_text: str) -> int:
|
|
155
|
+
"""
|
|
156
|
+
Rough token estimate: ~4 characters per token.
|
|
157
|
+
Actual counts come from LLM usage metadata when available.
|
|
158
|
+
"""
|
|
159
|
+
total_chars = len(input_text) + len(output_text)
|
|
160
|
+
return max(int(total_chars / 4), 10)
|
|
161
|
+
|
|
162
|
+
@staticmethod
|
|
163
|
+
def _extract_content(input_context: str, output: str) -> str:
|
|
164
|
+
"""Build a concise content string from input/output."""
|
|
165
|
+
if output:
|
|
166
|
+
return output[:200]
|
|
167
|
+
if input_context:
|
|
168
|
+
# Use last 200 chars of input as the content summary.
|
|
169
|
+
return input_context[-200:]
|
|
170
|
+
return ""
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tracerazor-langgraph
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: TraceRazor LangGraph/LangChain callback — token efficiency auditing
|
|
5
|
+
Author: Zulfaqar Hafez
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/ZulfaqarHafez/tracerazor
|
|
8
|
+
Project-URL: Repository, https://github.com/ZulfaqarHafez/tracerazor
|
|
9
|
+
Keywords: ai,agents,langgraph,langchain,token-efficiency,tracing
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Requires-Dist: langchain-core>=0.2
|
|
20
|
+
Provides-Extra: langgraph
|
|
21
|
+
Requires-Dist: langgraph>=0.2; extra == "langgraph"
|
|
22
|
+
Dynamic: requires-python
|
|
23
|
+
|
|
24
|
+
# tracerazor-langgraph
|
|
25
|
+
|
|
26
|
+
LangGraph/LangChain callback adapter for [TraceRazor](../../README.md).
|
|
27
|
+
|
|
28
|
+
Automatically captures every LLM call and tool call from your LangGraph graph with zero manual instrumentation.
|
|
29
|
+
|
|
30
|
+
## Install
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install tracerazor-langgraph
|
|
34
|
+
pip install tracerazor-langgraph[langgraph] # includes langgraph
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Requires the `tracerazor` binary:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
cargo build --release
|
|
41
|
+
export TRACERAZOR_BIN=/path/to/TraceRazor/target/release/tracerazor
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Usage
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from tracerazor_langgraph import TraceRazorCallback
|
|
48
|
+
from langgraph.prebuilt import create_react_agent
|
|
49
|
+
from langchain_openai import ChatOpenAI
|
|
50
|
+
|
|
51
|
+
callback = TraceRazorCallback(
|
|
52
|
+
agent_name="support-agent",
|
|
53
|
+
threshold=70,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
model = ChatOpenAI(model="gpt-4o-mini")
|
|
57
|
+
agent = create_react_agent(model, tools=[...])
|
|
58
|
+
|
|
59
|
+
result = agent.invoke(
|
|
60
|
+
{"messages": [HumanMessage(content="I want a refund")]},
|
|
61
|
+
config={"callbacks": [callback]},
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# After the agent finishes:
|
|
65
|
+
report = callback.analyse()
|
|
66
|
+
print(report.markdown())
|
|
67
|
+
|
|
68
|
+
# CI/CD gate — raises AssertionError if TAS < threshold:
|
|
69
|
+
callback.assert_passes()
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## API
|
|
73
|
+
|
|
74
|
+
### `TraceRazorCallback(agent_name, framework, threshold, task_value_score, tracerazor_bin)`
|
|
75
|
+
|
|
76
|
+
| param | default | description |
|
|
77
|
+
|---|---|---|
|
|
78
|
+
| `agent_name` | `"langgraph-agent"` | shown in all reports |
|
|
79
|
+
| `framework` | `"langgraph"` | framework label |
|
|
80
|
+
| `threshold` | `70.0` | minimum TAS for `assert_passes()` |
|
|
81
|
+
| `task_value_score` | `1.0` | answer quality (0–1) |
|
|
82
|
+
| `tracerazor_bin` | auto | path to binary; falls back to `TRACERAZOR_BIN` env var |
|
|
83
|
+
|
|
84
|
+
### `callback.analyse() → TraceRazorReport`
|
|
85
|
+
|
|
86
|
+
Finalise and submit the trace. Returns the report.
|
|
87
|
+
|
|
88
|
+
### `callback.assert_passes()`
|
|
89
|
+
|
|
90
|
+
Raise `AssertionError` if TAS < threshold.
|
|
91
|
+
|
|
92
|
+
### `callback.set_task_value_score(score: float)`
|
|
93
|
+
|
|
94
|
+
Update quality score before calling `analyse()`.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
setup.py
|
|
4
|
+
tracerazor_langgraph/__init__.py
|
|
5
|
+
tracerazor_langgraph/callback.py
|
|
6
|
+
tracerazor_langgraph/client.py
|
|
7
|
+
tracerazor_langgraph/trace_builder.py
|
|
8
|
+
tracerazor_langgraph.egg-info/PKG-INFO
|
|
9
|
+
tracerazor_langgraph.egg-info/SOURCES.txt
|
|
10
|
+
tracerazor_langgraph.egg-info/dependency_links.txt
|
|
11
|
+
tracerazor_langgraph.egg-info/requires.txt
|
|
12
|
+
tracerazor_langgraph.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tracerazor_langgraph
|