tracerazor 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracerazor-0.1.0/PKG-INFO +155 -0
- tracerazor-0.1.0/README.md +129 -0
- tracerazor-0.1.0/pyproject.toml +37 -0
- tracerazor-0.1.0/setup.cfg +4 -0
- tracerazor-0.1.0/tracerazor.egg-info/PKG-INFO +155 -0
- tracerazor-0.1.0/tracerazor.egg-info/SOURCES.txt +11 -0
- tracerazor-0.1.0/tracerazor.egg-info/dependency_links.txt +1 -0
- tracerazor-0.1.0/tracerazor.egg-info/requires.txt +9 -0
- tracerazor-0.1.0/tracerazor.egg-info/top_level.txt +1 -0
- tracerazor-0.1.0/tracerazor_sdk/__init__.py +37 -0
- tracerazor-0.1.0/tracerazor_sdk/client.py +264 -0
- tracerazor-0.1.0/tracerazor_sdk/trace.py +68 -0
- tracerazor-0.1.0/tracerazor_sdk/tracer.py +180 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tracerazor
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: TraceRazor Python SDK — token efficiency auditing for AI agents
|
|
5
|
+
Author: Zulfaqar Hafez
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/ZulfaqarHafez/tracerazor
|
|
8
|
+
Project-URL: Repository, https://github.com/ZulfaqarHafez/tracerazor
|
|
9
|
+
Project-URL: Issues, https://github.com/ZulfaqarHafez/tracerazor/issues
|
|
10
|
+
Keywords: ai,agents,llm,token-efficiency,observability,tracing
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Provides-Extra: http
|
|
21
|
+
Requires-Dist: requests>=2.28; extra == "http"
|
|
22
|
+
Provides-Extra: openai
|
|
23
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
|
24
|
+
Provides-Extra: anthropic
|
|
25
|
+
Requires-Dist: anthropic>=0.20; extra == "anthropic"
|
|
26
|
+
|
|
27
|
+
# tracerazor
|
|
28
|
+
|
|
29
|
+
Python SDK for [TraceRazor](../../README.md) — token efficiency auditing for AI agents.
|
|
30
|
+
|
|
31
|
+
Works with any Python agent: OpenAI, Anthropic, LangGraph, CrewAI, AutoGen, or raw code.
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install tracerazor
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Requires the `tracerazor` binary to be built and accessible. Either:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Option A: build from source (one-time)
|
|
43
|
+
cargo build --release
|
|
44
|
+
export TRACERAZOR_BIN=/path/to/TraceRazor/target/release/tracerazor
|
|
45
|
+
|
|
46
|
+
# Option B: use HTTP mode against a running server (no binary on client)
|
|
47
|
+
# docker compose up (in the TraceRazor repo)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Quickstart
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from tracerazor_sdk import Tracer
|
|
54
|
+
|
|
55
|
+
tracer = Tracer(agent_name="my-agent", framework="openai")
|
|
56
|
+
|
|
57
|
+
# After each LLM call, record the reasoning step:
|
|
58
|
+
tracer.reasoning(
|
|
59
|
+
content=llm_response.text,
|
|
60
|
+
tokens=llm_response.usage.total_tokens,
|
|
61
|
+
input_context=prompt, # optional, improves CCE detection
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# After each tool call:
|
|
65
|
+
tracer.tool(
|
|
66
|
+
name="get_order_details",
|
|
67
|
+
params={"order_id": "ORD-9182"},
|
|
68
|
+
output=tool_result,
|
|
69
|
+
success=True,
|
|
70
|
+
tokens=120,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# After the agent finishes:
|
|
74
|
+
report = tracer.analyse()
|
|
75
|
+
print(report.summary())
|
|
76
|
+
# → TAS 74.3/100 [Good] | 6 steps, 3200 tokens | Saved 1100 tokens (34%)
|
|
77
|
+
|
|
78
|
+
print(report.markdown()) # full formatted report
|
|
79
|
+
report.assert_passes() # raises AssertionError if TAS < threshold (CI use)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## HTTP mode
|
|
83
|
+
|
|
84
|
+
If you'd rather not put the binary on every machine, run the server once and POST from anywhere:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from tracerazor_sdk import Tracer
|
|
88
|
+
|
|
89
|
+
tracer = Tracer(
|
|
90
|
+
agent_name="my-agent",
|
|
91
|
+
server="http://localhost:8080", # tracerazor-server URL
|
|
92
|
+
)
|
|
93
|
+
# Record steps the same way, then:
|
|
94
|
+
report = tracer.analyse()
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Install with HTTP support:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
pip install tracerazor[http]
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Context manager
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
with Tracer(agent_name="my-agent") as t:
|
|
107
|
+
t.reasoning("...", tokens=500)
|
|
108
|
+
t.tool("search", params={}, output="...", success=True, tokens=100)
|
|
109
|
+
|
|
110
|
+
report = t.analyse()
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## API
|
|
114
|
+
|
|
115
|
+
### `Tracer(agent_name, framework, threshold, task_value_score, bin_path, server)`
|
|
116
|
+
|
|
117
|
+
| param | default | description |
|
|
118
|
+
|---|---|---|
|
|
119
|
+
| `agent_name` | required | shown in reports and used for baseline tracking |
|
|
120
|
+
| `framework` | `"custom"` | any string: `"openai"`, `"anthropic"`, `"crewai"`, etc. |
|
|
121
|
+
| `threshold` | `70.0` | minimum TAS for `assert_passes()` |
|
|
122
|
+
| `task_value_score` | `1.0` | answer quality (0–1), update with `set_task_value()` |
|
|
123
|
+
| `bin_path` | auto | path to `tracerazor` binary; falls back to `TRACERAZOR_BIN` env var |
|
|
124
|
+
| `server` | `None` | if set, use HTTP mode |
|
|
125
|
+
|
|
126
|
+
### `tracer.reasoning(content, tokens, input_context, output)`
|
|
127
|
+
|
|
128
|
+
Record one LLM reasoning step. `input_context` is the full prompt — include it for accurate CCE bloat detection.
|
|
129
|
+
|
|
130
|
+
### `tracer.tool(name, params, output, success, error, tokens, input_context)`
|
|
131
|
+
|
|
132
|
+
Record one tool call. `success=False` triggers misfire detection (TCA) and auto-fix generation.
|
|
133
|
+
|
|
134
|
+
### `tracer.set_task_value(score: float)`
|
|
135
|
+
|
|
136
|
+
Update the task quality score after validating the agent's answer. Call before `analyse()`.
|
|
137
|
+
|
|
138
|
+
### `tracer.analyse() → TraceRazorReport`
|
|
139
|
+
|
|
140
|
+
Submit the trace and return the report.
|
|
141
|
+
|
|
142
|
+
### `TraceRazorReport`
|
|
143
|
+
|
|
144
|
+
| attribute | type | description |
|
|
145
|
+
|---|---|---|
|
|
146
|
+
| `tas_score` | `float` | 0–100 composite score |
|
|
147
|
+
| `grade` | `str` | `Excellent`, `Good`, `Fair`, `Poor` |
|
|
148
|
+
| `passes` | `bool` | `tas_score >= threshold` |
|
|
149
|
+
| `savings` | `dict` | `tokens_saved`, `reduction_pct`, `monthly_savings_usd` |
|
|
150
|
+
| `fixes` | `list` | auto-generated fix patches |
|
|
151
|
+
| `anomalies` | `list` | z-score alerts vs. agent baseline (after 5+ runs) |
|
|
152
|
+
| `metrics` | `dict` | raw per-metric scores (SRR, LDI, TCA, RDA, ISR, TUR, CCE, DBO) |
|
|
153
|
+
| `.summary()` | method | one-line string |
|
|
154
|
+
| `.markdown()` | method | full formatted report |
|
|
155
|
+
| `.assert_passes()` | method | raises `AssertionError` if TAS < threshold |
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# tracerazor
|
|
2
|
+
|
|
3
|
+
Python SDK for [TraceRazor](../../README.md) — token efficiency auditing for AI agents.
|
|
4
|
+
|
|
5
|
+
Works with any Python agent: OpenAI, Anthropic, LangGraph, CrewAI, AutoGen, or raw code.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install tracerazor
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Requires the `tracerazor` binary to be built and accessible. Either:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# Option A: build from source (one-time)
|
|
17
|
+
cargo build --release
|
|
18
|
+
export TRACERAZOR_BIN=/path/to/TraceRazor/target/release/tracerazor
|
|
19
|
+
|
|
20
|
+
# Option B: use HTTP mode against a running server (no binary on client)
|
|
21
|
+
# docker compose up (in the TraceRazor repo)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Quickstart
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from tracerazor_sdk import Tracer
|
|
28
|
+
|
|
29
|
+
tracer = Tracer(agent_name="my-agent", framework="openai")
|
|
30
|
+
|
|
31
|
+
# After each LLM call, record the reasoning step:
|
|
32
|
+
tracer.reasoning(
|
|
33
|
+
content=llm_response.text,
|
|
34
|
+
tokens=llm_response.usage.total_tokens,
|
|
35
|
+
input_context=prompt, # optional, improves CCE detection
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# After each tool call:
|
|
39
|
+
tracer.tool(
|
|
40
|
+
name="get_order_details",
|
|
41
|
+
params={"order_id": "ORD-9182"},
|
|
42
|
+
output=tool_result,
|
|
43
|
+
success=True,
|
|
44
|
+
tokens=120,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# After the agent finishes:
|
|
48
|
+
report = tracer.analyse()
|
|
49
|
+
print(report.summary())
|
|
50
|
+
# → TAS 74.3/100 [Good] | 6 steps, 3200 tokens | Saved 1100 tokens (34%)
|
|
51
|
+
|
|
52
|
+
print(report.markdown()) # full formatted report
|
|
53
|
+
report.assert_passes() # raises AssertionError if TAS < threshold (CI use)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## HTTP mode
|
|
57
|
+
|
|
58
|
+
If you'd rather not put the binary on every machine, run the server once and POST from anywhere:
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from tracerazor_sdk import Tracer
|
|
62
|
+
|
|
63
|
+
tracer = Tracer(
|
|
64
|
+
agent_name="my-agent",
|
|
65
|
+
server="http://localhost:8080", # tracerazor-server URL
|
|
66
|
+
)
|
|
67
|
+
# Record steps the same way, then:
|
|
68
|
+
report = tracer.analyse()
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Install with HTTP support:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pip install tracerazor[http]
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Context manager
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
with Tracer(agent_name="my-agent") as t:
|
|
81
|
+
t.reasoning("...", tokens=500)
|
|
82
|
+
t.tool("search", params={}, output="...", success=True, tokens=100)
|
|
83
|
+
|
|
84
|
+
report = t.analyse()
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## API
|
|
88
|
+
|
|
89
|
+
### `Tracer(agent_name, framework, threshold, task_value_score, bin_path, server)`
|
|
90
|
+
|
|
91
|
+
| param | default | description |
|
|
92
|
+
|---|---|---|
|
|
93
|
+
| `agent_name` | required | shown in reports and used for baseline tracking |
|
|
94
|
+
| `framework` | `"custom"` | any string: `"openai"`, `"anthropic"`, `"crewai"`, etc. |
|
|
95
|
+
| `threshold` | `70.0` | minimum TAS for `assert_passes()` |
|
|
96
|
+
| `task_value_score` | `1.0` | answer quality (0–1), update with `set_task_value()` |
|
|
97
|
+
| `bin_path` | auto | path to `tracerazor` binary; falls back to `TRACERAZOR_BIN` env var |
|
|
98
|
+
| `server` | `None` | if set, use HTTP mode |
|
|
99
|
+
|
|
100
|
+
### `tracer.reasoning(content, tokens, input_context, output)`
|
|
101
|
+
|
|
102
|
+
Record one LLM reasoning step. `input_context` is the full prompt — include it for accurate CCE bloat detection.
|
|
103
|
+
|
|
104
|
+
### `tracer.tool(name, params, output, success, error, tokens, input_context)`
|
|
105
|
+
|
|
106
|
+
Record one tool call. `success=False` triggers misfire detection (TCA) and auto-fix generation.
|
|
107
|
+
|
|
108
|
+
### `tracer.set_task_value(score: float)`
|
|
109
|
+
|
|
110
|
+
Update the task quality score after validating the agent's answer. Call before `analyse()`.
|
|
111
|
+
|
|
112
|
+
### `tracer.analyse() → TraceRazorReport`
|
|
113
|
+
|
|
114
|
+
Submit the trace and return the report.
|
|
115
|
+
|
|
116
|
+
### `TraceRazorReport`
|
|
117
|
+
|
|
118
|
+
| attribute | type | description |
|
|
119
|
+
|---|---|---|
|
|
120
|
+
| `tas_score` | `float` | 0–100 composite score |
|
|
121
|
+
| `grade` | `str` | `Excellent`, `Good`, `Fair`, `Poor` |
|
|
122
|
+
| `passes` | `bool` | `tas_score >= threshold` |
|
|
123
|
+
| `savings` | `dict` | `tokens_saved`, `reduction_pct`, `monthly_savings_usd` |
|
|
124
|
+
| `fixes` | `list` | auto-generated fix patches |
|
|
125
|
+
| `anomalies` | `list` | z-score alerts vs. agent baseline (after 5+ runs) |
|
|
126
|
+
| `metrics` | `dict` | raw per-metric scores (SRR, LDI, TCA, RDA, ISR, TUR, CCE, DBO) |
|
|
127
|
+
| `.summary()` | method | one-line string |
|
|
128
|
+
| `.markdown()` | method | full formatted report |
|
|
129
|
+
| `.assert_passes()` | method | raises `AssertionError` if TAS < threshold |
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tracerazor"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "TraceRazor Python SDK — token efficiency auditing for AI agents"
|
|
9
|
+
authors = [{ name = "Zulfaqar Hafez" }]
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
requires-python = ">=3.10"
|
|
13
|
+
keywords = ["ai", "agents", "llm", "token-efficiency", "observability", "tracing"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Topic :: Software Development :: Libraries",
|
|
22
|
+
]
|
|
23
|
+
dependencies = []
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://github.com/ZulfaqarHafez/tracerazor"
|
|
27
|
+
Repository = "https://github.com/ZulfaqarHafez/tracerazor"
|
|
28
|
+
Issues = "https://github.com/ZulfaqarHafez/tracerazor/issues"
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
http = ["requests>=2.28"]
|
|
32
|
+
openai = ["openai>=1.0"]
|
|
33
|
+
anthropic = ["anthropic>=0.20"]
|
|
34
|
+
|
|
35
|
+
[tool.setuptools.packages.find]
|
|
36
|
+
where = ["."]
|
|
37
|
+
include = ["tracerazor_sdk*"]
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tracerazor
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: TraceRazor Python SDK — token efficiency auditing for AI agents
|
|
5
|
+
Author: Zulfaqar Hafez
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/ZulfaqarHafez/tracerazor
|
|
8
|
+
Project-URL: Repository, https://github.com/ZulfaqarHafez/tracerazor
|
|
9
|
+
Project-URL: Issues, https://github.com/ZulfaqarHafez/tracerazor/issues
|
|
10
|
+
Keywords: ai,agents,llm,token-efficiency,observability,tracing
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Provides-Extra: http
|
|
21
|
+
Requires-Dist: requests>=2.28; extra == "http"
|
|
22
|
+
Provides-Extra: openai
|
|
23
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
|
24
|
+
Provides-Extra: anthropic
|
|
25
|
+
Requires-Dist: anthropic>=0.20; extra == "anthropic"
|
|
26
|
+
|
|
27
|
+
# tracerazor
|
|
28
|
+
|
|
29
|
+
Python SDK for [TraceRazor](../../README.md) — token efficiency auditing for AI agents.
|
|
30
|
+
|
|
31
|
+
Works with any Python agent: OpenAI, Anthropic, LangGraph, CrewAI, AutoGen, or raw code.
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install tracerazor
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Requires the `tracerazor` binary to be built and accessible. Either:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Option A: build from source (one-time)
|
|
43
|
+
cargo build --release
|
|
44
|
+
export TRACERAZOR_BIN=/path/to/TraceRazor/target/release/tracerazor
|
|
45
|
+
|
|
46
|
+
# Option B: use HTTP mode against a running server (no binary on client)
|
|
47
|
+
# docker compose up (in the TraceRazor repo)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Quickstart
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from tracerazor_sdk import Tracer
|
|
54
|
+
|
|
55
|
+
tracer = Tracer(agent_name="my-agent", framework="openai")
|
|
56
|
+
|
|
57
|
+
# After each LLM call, record the reasoning step:
|
|
58
|
+
tracer.reasoning(
|
|
59
|
+
content=llm_response.text,
|
|
60
|
+
tokens=llm_response.usage.total_tokens,
|
|
61
|
+
input_context=prompt, # optional, improves CCE detection
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# After each tool call:
|
|
65
|
+
tracer.tool(
|
|
66
|
+
name="get_order_details",
|
|
67
|
+
params={"order_id": "ORD-9182"},
|
|
68
|
+
output=tool_result,
|
|
69
|
+
success=True,
|
|
70
|
+
tokens=120,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# After the agent finishes:
|
|
74
|
+
report = tracer.analyse()
|
|
75
|
+
print(report.summary())
|
|
76
|
+
# → TAS 74.3/100 [Good] | 6 steps, 3200 tokens | Saved 1100 tokens (34%)
|
|
77
|
+
|
|
78
|
+
print(report.markdown()) # full formatted report
|
|
79
|
+
report.assert_passes() # raises AssertionError if TAS < threshold (CI use)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## HTTP mode
|
|
83
|
+
|
|
84
|
+
If you'd rather not put the binary on every machine, run the server once and POST from anywhere:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from tracerazor_sdk import Tracer
|
|
88
|
+
|
|
89
|
+
tracer = Tracer(
|
|
90
|
+
agent_name="my-agent",
|
|
91
|
+
server="http://localhost:8080", # tracerazor-server URL
|
|
92
|
+
)
|
|
93
|
+
# Record steps the same way, then:
|
|
94
|
+
report = tracer.analyse()
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Install with HTTP support:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
pip install tracerazor[http]
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Context manager
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
with Tracer(agent_name="my-agent") as t:
|
|
107
|
+
t.reasoning("...", tokens=500)
|
|
108
|
+
t.tool("search", params={}, output="...", success=True, tokens=100)
|
|
109
|
+
|
|
110
|
+
report = t.analyse()
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## API
|
|
114
|
+
|
|
115
|
+
### `Tracer(agent_name, framework, threshold, task_value_score, bin_path, server)`
|
|
116
|
+
|
|
117
|
+
| param | default | description |
|
|
118
|
+
|---|---|---|
|
|
119
|
+
| `agent_name` | required | shown in reports and used for baseline tracking |
|
|
120
|
+
| `framework` | `"custom"` | any string: `"openai"`, `"anthropic"`, `"crewai"`, etc. |
|
|
121
|
+
| `threshold` | `70.0` | minimum TAS for `assert_passes()` |
|
|
122
|
+
| `task_value_score` | `1.0` | answer quality (0–1), update with `set_task_value()` |
|
|
123
|
+
| `bin_path` | auto | path to `tracerazor` binary; falls back to `TRACERAZOR_BIN` env var |
|
|
124
|
+
| `server` | `None` | if set, use HTTP mode |
|
|
125
|
+
|
|
126
|
+
### `tracer.reasoning(content, tokens, input_context, output)`
|
|
127
|
+
|
|
128
|
+
Record one LLM reasoning step. `input_context` is the full prompt — include it for accurate CCE bloat detection.
|
|
129
|
+
|
|
130
|
+
### `tracer.tool(name, params, output, success, error, tokens, input_context)`
|
|
131
|
+
|
|
132
|
+
Record one tool call. `success=False` triggers misfire detection (TCA) and auto-fix generation.
|
|
133
|
+
|
|
134
|
+
### `tracer.set_task_value(score: float)`
|
|
135
|
+
|
|
136
|
+
Update the task quality score after validating the agent's answer. Call before `analyse()`.
|
|
137
|
+
|
|
138
|
+
### `tracer.analyse() → TraceRazorReport`
|
|
139
|
+
|
|
140
|
+
Submit the trace and return the report.
|
|
141
|
+
|
|
142
|
+
### `TraceRazorReport`
|
|
143
|
+
|
|
144
|
+
| attribute | type | description |
|
|
145
|
+
|---|---|---|
|
|
146
|
+
| `tas_score` | `float` | 0–100 composite score |
|
|
147
|
+
| `grade` | `str` | `Excellent`, `Good`, `Fair`, `Poor` |
|
|
148
|
+
| `passes` | `bool` | `tas_score >= threshold` |
|
|
149
|
+
| `savings` | `dict` | `tokens_saved`, `reduction_pct`, `monthly_savings_usd` |
|
|
150
|
+
| `fixes` | `list` | auto-generated fix patches |
|
|
151
|
+
| `anomalies` | `list` | z-score alerts vs. agent baseline (after 5+ runs) |
|
|
152
|
+
| `metrics` | `dict` | raw per-metric scores (SRR, LDI, TCA, RDA, ISR, TUR, CCE, DBO) |
|
|
153
|
+
| `.summary()` | method | one-line string |
|
|
154
|
+
| `.markdown()` | method | full formatted report |
|
|
155
|
+
| `.assert_passes()` | method | raises `AssertionError` if TAS < threshold |
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
tracerazor.egg-info/PKG-INFO
|
|
4
|
+
tracerazor.egg-info/SOURCES.txt
|
|
5
|
+
tracerazor.egg-info/dependency_links.txt
|
|
6
|
+
tracerazor.egg-info/requires.txt
|
|
7
|
+
tracerazor.egg-info/top_level.txt
|
|
8
|
+
tracerazor_sdk/__init__.py
|
|
9
|
+
tracerazor_sdk/client.py
|
|
10
|
+
tracerazor_sdk/trace.py
|
|
11
|
+
tracerazor_sdk/tracer.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tracerazor_sdk
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TraceRazor Python SDK — framework-agnostic token efficiency auditing.
|
|
3
|
+
|
|
4
|
+
Works with any Python agent: OpenAI, Anthropic, CrewAI, AutoGen, or raw code.
|
|
5
|
+
|
|
6
|
+
Two modes:
|
|
7
|
+
- CLI mode (default): calls the local tracerazor binary — no server needed.
|
|
8
|
+
- HTTP mode: POSTs to a running tracerazor-server — no binary needed.
|
|
9
|
+
|
|
10
|
+
Quickstart (CLI mode):
|
|
11
|
+
from tracerazor_sdk import Tracer
|
|
12
|
+
|
|
13
|
+
with Tracer(agent_name="my-agent") as t:
|
|
14
|
+
response = llm.invoke(prompt)
|
|
15
|
+
t.reasoning(response.text, tokens=response.usage.total_tokens)
|
|
16
|
+
|
|
17
|
+
result = my_tool(arg)
|
|
18
|
+
t.tool("my_tool", params={"arg": arg}, output=result, success=True, tokens=120)
|
|
19
|
+
|
|
20
|
+
report = t.analyse()
|
|
21
|
+
print(report.summary())
|
|
22
|
+
|
|
23
|
+
Quickstart (HTTP mode):
|
|
24
|
+
from tracerazor_sdk import Tracer
|
|
25
|
+
|
|
26
|
+
with Tracer(agent_name="my-agent", server="http://localhost:8080") as t:
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
report = t.analyse()
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from .tracer import Tracer
|
|
33
|
+
from .client import TraceRazorClient, TraceRazorReport
|
|
34
|
+
from .trace import TraceStep
|
|
35
|
+
|
|
36
|
+
__all__ = ["Tracer", "TraceRazorClient", "TraceRazorReport", "TraceStep"]
|
|
37
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TraceRazor client — CLI subprocess mode and HTTP mode.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import shutil
|
|
10
|
+
import subprocess
|
|
11
|
+
import tempfile
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class TraceRazorReport:
|
|
18
|
+
"""Parsed result of a tracerazor audit."""
|
|
19
|
+
|
|
20
|
+
trace_id: str
|
|
21
|
+
agent_name: str
|
|
22
|
+
framework: str
|
|
23
|
+
total_steps: int
|
|
24
|
+
total_tokens: int
|
|
25
|
+
tas_score: float
|
|
26
|
+
grade: str
|
|
27
|
+
passes: bool
|
|
28
|
+
threshold: float
|
|
29
|
+
metrics: Dict[str, Any] = field(default_factory=dict)
|
|
30
|
+
savings: Dict[str, Any] = field(default_factory=dict)
|
|
31
|
+
fixes: List[Dict] = field(default_factory=list)
|
|
32
|
+
anomalies: List[Dict] = field(default_factory=list)
|
|
33
|
+
raw: Dict[str, Any] = field(default_factory=dict)
|
|
34
|
+
|
|
35
|
+
def summary(self) -> str:
|
|
36
|
+
"""One-line summary."""
|
|
37
|
+
saved = self.savings.get("tokens_saved", 0)
|
|
38
|
+
pct = self.savings.get("reduction_pct", 0.0)
|
|
39
|
+
return (
|
|
40
|
+
f"TAS {self.tas_score:.1f}/100 [{self.grade}] | "
|
|
41
|
+
f"{self.total_steps} steps, {self.total_tokens} tokens | "
|
|
42
|
+
f"Saved {saved} tokens ({pct:.0f}%)"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def markdown(self) -> str:
|
|
46
|
+
"""Full markdown report (same as CLI output)."""
|
|
47
|
+
return self.raw.get("report_markdown") or self._build_markdown()
|
|
48
|
+
|
|
49
|
+
def _build_markdown(self) -> str:
|
|
50
|
+
sep = "-" * 54
|
|
51
|
+
s = self.metrics
|
|
52
|
+
lines = [
|
|
53
|
+
"TRACERAZOR REPORT",
|
|
54
|
+
sep,
|
|
55
|
+
f"Trace: {self.trace_id}",
|
|
56
|
+
f"Agent: {self.agent_name}",
|
|
57
|
+
f"Steps: {self.total_steps} Tokens: {self.total_tokens}",
|
|
58
|
+
sep,
|
|
59
|
+
f"TRACERAZOR SCORE: {self.tas_score:.0f} / 100 [{self.grade.upper()}]",
|
|
60
|
+
sep,
|
|
61
|
+
]
|
|
62
|
+
for code in ("srr", "ldi", "tca", "rda", "isr", "tur", "cce", "dbo"):
|
|
63
|
+
m = s.get(code, {})
|
|
64
|
+
if m:
|
|
65
|
+
status = "PASS" if m.get("pass") else "FAIL"
|
|
66
|
+
lines.append(f"{code.upper():<6} {m.get('score', 0):.3f} {status}")
|
|
67
|
+
if self.savings:
|
|
68
|
+
lines += [
|
|
69
|
+
sep,
|
|
70
|
+
"SAVINGS ESTIMATE",
|
|
71
|
+
f" Tokens saved: {self.savings.get('tokens_saved', 0)} "
|
|
72
|
+
f"({self.savings.get('reduction_pct', 0):.1f}% reduction)",
|
|
73
|
+
f" Cost saved: ${self.savings.get('cost_saved_per_run_usd', 0):.4f}/run",
|
|
74
|
+
f" At 50K/month: ${self.savings.get('monthly_savings_usd', 0):.2f}/month",
|
|
75
|
+
]
|
|
76
|
+
if self.fixes:
|
|
77
|
+
lines += [sep, "AUTO-GENERATED FIXES"]
|
|
78
|
+
for i, fix in enumerate(self.fixes, 1):
|
|
79
|
+
lines.append(
|
|
80
|
+
f" Fix {i}: [{fix.get('fix_type')}] → {fix.get('target')}\n"
|
|
81
|
+
f" Patch: {fix.get('patch', '')[:120]}\n"
|
|
82
|
+
f" Est. savings: {fix.get('estimated_token_savings', 0)} tokens/run"
|
|
83
|
+
)
|
|
84
|
+
if self.anomalies:
|
|
85
|
+
lines += [sep, "ANOMALY ALERTS"]
|
|
86
|
+
for a in self.anomalies:
|
|
87
|
+
direction = "REGRESSION" if a.get("z_score", 0) < 0 else "IMPROVEMENT"
|
|
88
|
+
lines.append(
|
|
89
|
+
f" [{direction}] {a.get('metric')}: {a.get('value'):.1f} "
|
|
90
|
+
f"(z={a.get('z_score'):.1f})"
|
|
91
|
+
)
|
|
92
|
+
lines.append(sep)
|
|
93
|
+
return "\n".join(lines)
|
|
94
|
+
|
|
95
|
+
def assert_passes(self) -> None:
|
|
96
|
+
"""Raise AssertionError if TAS is below threshold. Use in CI/CD."""
|
|
97
|
+
if not self.passes:
|
|
98
|
+
raise AssertionError(
|
|
99
|
+
f"TraceRazor: TAS {self.tas_score:.1f} is below "
|
|
100
|
+
f"threshold {self.threshold}.\n\n{self.summary()}"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class TraceRazorClient:
|
|
105
|
+
"""
|
|
106
|
+
Submit a trace for analysis.
|
|
107
|
+
|
|
108
|
+
CLI mode (default): calls the local tracerazor binary.
|
|
109
|
+
- No server needed.
|
|
110
|
+
- Requires the binary to be built or installed.
|
|
111
|
+
|
|
112
|
+
HTTP mode: POSTs to a running tracerazor-server.
|
|
113
|
+
- No binary needed on the machine running your agent.
|
|
114
|
+
- Start the server once: ./tracerazor-server
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
bin_path: Path to the tracerazor binary. Auto-detected if None.
|
|
118
|
+
Ignored when `server` is set.
|
|
119
|
+
server: Base URL of a running tracerazor-server, e.g.
|
|
120
|
+
"http://localhost:8080". When set, HTTP mode is used.
|
|
121
|
+
threshold: Minimum TAS score for assert_passes() (default 70).
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
def __init__(
|
|
125
|
+
self,
|
|
126
|
+
bin_path: Optional[str] = None,
|
|
127
|
+
server: Optional[str] = None,
|
|
128
|
+
threshold: float = 70.0,
|
|
129
|
+
):
|
|
130
|
+
self._server = server.rstrip("/") if server else None
|
|
131
|
+
self._bin = None if self._server else (bin_path or self._find_binary())
|
|
132
|
+
self._threshold = threshold
|
|
133
|
+
|
|
134
|
+
def analyse(self, trace: Dict[str, Any]) -> TraceRazorReport:
|
|
135
|
+
"""Submit the trace and return a TraceRazorReport."""
|
|
136
|
+
if self._server:
|
|
137
|
+
return self._analyse_http(trace)
|
|
138
|
+
return self._analyse_cli(trace)
|
|
139
|
+
|
|
140
|
+
# ── CLI mode ──────────────────────────────────────────────────────────────
|
|
141
|
+
|
|
142
|
+
def _analyse_cli(self, trace: Dict[str, Any]) -> TraceRazorReport:
|
|
143
|
+
with tempfile.NamedTemporaryFile(
|
|
144
|
+
mode="w", suffix=".json", delete=False, encoding="utf-8"
|
|
145
|
+
) as f:
|
|
146
|
+
json.dump(trace, f, indent=2)
|
|
147
|
+
tmp_path = f.name
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
result = subprocess.run(
|
|
151
|
+
[
|
|
152
|
+
self._bin,
|
|
153
|
+
"audit",
|
|
154
|
+
tmp_path,
|
|
155
|
+
"--format", "json",
|
|
156
|
+
"--threshold", str(self._threshold),
|
|
157
|
+
],
|
|
158
|
+
capture_output=True,
|
|
159
|
+
text=True,
|
|
160
|
+
timeout=60,
|
|
161
|
+
)
|
|
162
|
+
# Exit code 1 = below threshold (still valid JSON output).
|
|
163
|
+
if result.returncode not in (0, 1):
|
|
164
|
+
raise RuntimeError(
|
|
165
|
+
f"tracerazor exited with code {result.returncode}:\n{result.stderr}"
|
|
166
|
+
)
|
|
167
|
+
data = json.loads(result.stdout)
|
|
168
|
+
return self._parse_cli_report(data)
|
|
169
|
+
finally:
|
|
170
|
+
try:
|
|
171
|
+
os.unlink(tmp_path)
|
|
172
|
+
except OSError:
|
|
173
|
+
pass
|
|
174
|
+
|
|
175
|
+
def _parse_cli_report(self, data: Dict[str, Any]) -> TraceRazorReport:
|
|
176
|
+
score = data.get("score", {})
|
|
177
|
+
tas = score.get("score", 0.0)
|
|
178
|
+
return TraceRazorReport(
|
|
179
|
+
trace_id=data.get("trace_id", ""),
|
|
180
|
+
agent_name=data.get("agent_name", ""),
|
|
181
|
+
framework=data.get("framework", ""),
|
|
182
|
+
total_steps=data.get("total_steps", 0),
|
|
183
|
+
total_tokens=data.get("total_tokens", 0),
|
|
184
|
+
tas_score=tas,
|
|
185
|
+
grade=str(score.get("grade", "Unknown")),
|
|
186
|
+
passes=tas >= self._threshold,
|
|
187
|
+
threshold=self._threshold,
|
|
188
|
+
metrics=score,
|
|
189
|
+
savings=data.get("savings", {}),
|
|
190
|
+
fixes=data.get("fixes", []),
|
|
191
|
+
anomalies=data.get("anomalies", []),
|
|
192
|
+
raw=data,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# ── HTTP mode ─────────────────────────────────────────────────────────────
|
|
196
|
+
|
|
197
|
+
def _analyse_http(self, trace: Dict[str, Any]) -> TraceRazorReport:
|
|
198
|
+
try:
|
|
199
|
+
import requests
|
|
200
|
+
except ImportError:
|
|
201
|
+
raise ImportError(
|
|
202
|
+
"HTTP mode requires the 'requests' library.\n"
|
|
203
|
+
"Install with: pip install tracerazor[http]"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
resp = requests.post(
|
|
207
|
+
f"{self._server}/api/audit",
|
|
208
|
+
json={"trace": trace},
|
|
209
|
+
timeout=60,
|
|
210
|
+
)
|
|
211
|
+
resp.raise_for_status()
|
|
212
|
+
data = resp.json()
|
|
213
|
+
return self._parse_http_report(data)
|
|
214
|
+
|
|
215
|
+
def _parse_http_report(self, data: Dict[str, Any]) -> TraceRazorReport:
|
|
216
|
+
tas = data.get("tas_score", 0.0)
|
|
217
|
+
return TraceRazorReport(
|
|
218
|
+
trace_id=data.get("trace_id", ""),
|
|
219
|
+
agent_name=data.get("agent_name", ""),
|
|
220
|
+
framework=data.get("framework", ""),
|
|
221
|
+
total_steps=0, # not in HTTP response top-level
|
|
222
|
+
total_tokens=0,
|
|
223
|
+
tas_score=tas,
|
|
224
|
+
grade=data.get("grade", "Unknown"),
|
|
225
|
+
passes=tas >= self._threshold,
|
|
226
|
+
threshold=self._threshold,
|
|
227
|
+
metrics={},
|
|
228
|
+
savings={},
|
|
229
|
+
fixes=[],
|
|
230
|
+
anomalies=data.get("anomalies", []),
|
|
231
|
+
raw=data,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# ── Binary discovery ──────────────────────────────────────────────────────
|
|
235
|
+
|
|
236
|
+
@staticmethod
|
|
237
|
+
def _find_binary() -> str:
|
|
238
|
+
env_path = os.environ.get("TRACERAZOR_BIN")
|
|
239
|
+
if env_path and os.path.isfile(env_path):
|
|
240
|
+
return env_path
|
|
241
|
+
|
|
242
|
+
found = shutil.which("tracerazor") or shutil.which("tracerazor.exe")
|
|
243
|
+
if found:
|
|
244
|
+
return found
|
|
245
|
+
|
|
246
|
+
# Dev layout: integrations/tracerazor/tracerazor_sdk/ → ../../../target/release/
|
|
247
|
+
here = os.path.dirname(os.path.abspath(__file__))
|
|
248
|
+
for rel in [
|
|
249
|
+
"../../../../target/release/tracerazor.exe",
|
|
250
|
+
"../../../../target/release/tracerazor",
|
|
251
|
+
"../../../../target/debug/tracerazor.exe",
|
|
252
|
+
"../../../../target/debug/tracerazor",
|
|
253
|
+
]:
|
|
254
|
+
candidate = os.path.normpath(os.path.join(here, rel))
|
|
255
|
+
if os.path.isfile(candidate):
|
|
256
|
+
return candidate
|
|
257
|
+
|
|
258
|
+
raise FileNotFoundError(
|
|
259
|
+
"tracerazor binary not found.\n"
|
|
260
|
+
"Options:\n"
|
|
261
|
+
" 1. Set TRACERAZOR_BIN=/path/to/tracerazor\n"
|
|
262
|
+
" 2. Build from source: cargo build --release\n"
|
|
263
|
+
" 3. Use HTTP mode: Tracer(server='http://localhost:8080')"
|
|
264
|
+
)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lightweight trace data structures. No external dependencies.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import uuid
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class TraceStep:
|
|
14
|
+
id: int
|
|
15
|
+
type: str # "reasoning" or "tool_call"
|
|
16
|
+
content: str
|
|
17
|
+
tokens: int
|
|
18
|
+
tool_name: Optional[str] = None
|
|
19
|
+
tool_params: Optional[Dict[str, Any]] = None
|
|
20
|
+
tool_success: Optional[bool] = None
|
|
21
|
+
tool_error: Optional[str] = None
|
|
22
|
+
input_context: Optional[str] = None
|
|
23
|
+
output: Optional[str] = None
|
|
24
|
+
agent_id: Optional[str] = None
|
|
25
|
+
|
|
26
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
27
|
+
d: Dict[str, Any] = {
|
|
28
|
+
"id": self.id,
|
|
29
|
+
"type": self.type,
|
|
30
|
+
"content": self.content,
|
|
31
|
+
"tokens": self.tokens,
|
|
32
|
+
}
|
|
33
|
+
if self.tool_name is not None:
|
|
34
|
+
d["tool_name"] = self.tool_name
|
|
35
|
+
if self.tool_params is not None:
|
|
36
|
+
d["tool_params"] = self.tool_params
|
|
37
|
+
if self.tool_success is not None:
|
|
38
|
+
d["tool_success"] = self.tool_success
|
|
39
|
+
if self.tool_error is not None:
|
|
40
|
+
d["tool_error"] = self.tool_error
|
|
41
|
+
if self.input_context is not None:
|
|
42
|
+
d["input_context"] = self.input_context
|
|
43
|
+
if self.output is not None:
|
|
44
|
+
d["output"] = self.output
|
|
45
|
+
if self.agent_id is not None:
|
|
46
|
+
d["agent_id"] = self.agent_id
|
|
47
|
+
return d
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class Trace:
|
|
52
|
+
agent_name: str
|
|
53
|
+
framework: str
|
|
54
|
+
task_value_score: float = 1.0
|
|
55
|
+
trace_id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
|
56
|
+
steps: List[TraceStep] = field(default_factory=list)
|
|
57
|
+
|
|
58
|
+
def add_step(self, step: TraceStep) -> None:
|
|
59
|
+
self.steps.append(step)
|
|
60
|
+
|
|
61
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
62
|
+
return {
|
|
63
|
+
"trace_id": self.trace_id,
|
|
64
|
+
"agent_name": self.agent_name,
|
|
65
|
+
"framework": self.framework,
|
|
66
|
+
"task_value_score": self.task_value_score,
|
|
67
|
+
"steps": [s.to_dict() for s in self.steps],
|
|
68
|
+
}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tracer — the main entry point for manual instrumentation.
|
|
3
|
+
|
|
4
|
+
Use as a context manager or call step methods directly.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
from tracerazor_sdk import Tracer
|
|
8
|
+
|
|
9
|
+
with Tracer(agent_name="my-agent") as t:
|
|
10
|
+
# After each LLM call:
|
|
11
|
+
t.reasoning("model output text", tokens=820, input_context="full prompt")
|
|
12
|
+
|
|
13
|
+
# After each tool call:
|
|
14
|
+
t.tool("search_web", params={"q": "..."}, output="results", success=True, tokens=200)
|
|
15
|
+
|
|
16
|
+
report = t.analyse()
|
|
17
|
+
print(report.summary())
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from typing import Any, Dict, Optional
|
|
23
|
+
|
|
24
|
+
from .client import TraceRazorClient, TraceRazorReport
|
|
25
|
+
from .trace import Trace, TraceStep
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Tracer:
|
|
29
|
+
"""
|
|
30
|
+
Manual instrumentation wrapper. Records reasoning steps and tool calls,
|
|
31
|
+
then submits the trace for analysis.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
agent_name: Name of the agent (appears in all reports).
|
|
35
|
+
framework: Framework identifier, e.g. "openai", "anthropic",
|
|
36
|
+
"crewai", "autogen", "custom".
|
|
37
|
+
threshold: Minimum TAS score for assert_passes() (default 70).
|
|
38
|
+
task_value_score: Quality of the final answer (0.0–1.0). Update via
|
|
39
|
+
set_task_value() after ground-truth validation.
|
|
40
|
+
bin_path: Path to tracerazor binary (CLI mode). Auto-detected.
|
|
41
|
+
server: URL of tracerazor-server (HTTP mode). When set,
|
|
42
|
+
bin_path is ignored.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
agent_name: str,
|
|
48
|
+
framework: str = "custom",
|
|
49
|
+
threshold: float = 70.0,
|
|
50
|
+
task_value_score: float = 1.0,
|
|
51
|
+
bin_path: Optional[str] = None,
|
|
52
|
+
server: Optional[str] = None,
|
|
53
|
+
):
|
|
54
|
+
self._trace = Trace(
|
|
55
|
+
agent_name=agent_name,
|
|
56
|
+
framework=framework,
|
|
57
|
+
task_value_score=task_value_score,
|
|
58
|
+
)
|
|
59
|
+
self._client = TraceRazorClient(
|
|
60
|
+
bin_path=bin_path,
|
|
61
|
+
server=server,
|
|
62
|
+
threshold=threshold,
|
|
63
|
+
)
|
|
64
|
+
self._report: Optional[TraceRazorReport] = None
|
|
65
|
+
|
|
66
|
+
# ── Context manager ───────────────────────────────────────────────────────
|
|
67
|
+
|
|
68
|
+
def __enter__(self) -> "Tracer":
|
|
69
|
+
return self
|
|
70
|
+
|
|
71
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
72
|
+
# Don't auto-analyse on exception — the trace may be incomplete.
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
# ── Step recording ────────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
def reasoning(
|
|
78
|
+
self,
|
|
79
|
+
content: str,
|
|
80
|
+
tokens: int,
|
|
81
|
+
input_context: Optional[str] = None,
|
|
82
|
+
output: Optional[str] = None,
|
|
83
|
+
) -> None:
|
|
84
|
+
"""
|
|
85
|
+
Record one LLM reasoning step.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
content: The model's output text (or a summary of it).
|
|
89
|
+
tokens: Total token count for this LLM call.
|
|
90
|
+
input_context: The full prompt sent to the LLM (optional but
|
|
91
|
+
improves CCE and SRR accuracy).
|
|
92
|
+
output: The model's raw output (optional).
|
|
93
|
+
"""
|
|
94
|
+
step = TraceStep(
|
|
95
|
+
id=len(self._trace.steps) + 1,
|
|
96
|
+
type="reasoning",
|
|
97
|
+
content=content[:500],
|
|
98
|
+
tokens=max(tokens, 1),
|
|
99
|
+
input_context=input_context,
|
|
100
|
+
output=output,
|
|
101
|
+
)
|
|
102
|
+
self._trace.add_step(step)
|
|
103
|
+
|
|
104
|
+
def tool(
|
|
105
|
+
self,
|
|
106
|
+
name: str,
|
|
107
|
+
params: Optional[Dict[str, Any]] = None,
|
|
108
|
+
output: Optional[str] = None,
|
|
109
|
+
success: bool = True,
|
|
110
|
+
error: Optional[str] = None,
|
|
111
|
+
tokens: int = 0,
|
|
112
|
+
input_context: Optional[str] = None,
|
|
113
|
+
) -> None:
|
|
114
|
+
"""
|
|
115
|
+
Record one tool call step.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
name: Tool name (e.g. "search_web", "get_order_details").
|
|
119
|
+
params: Dict of parameters passed to the tool.
|
|
120
|
+
output: String output returned by the tool.
|
|
121
|
+
success: False if the tool raised an error or returned a failure.
|
|
122
|
+
Setting this accurately enables TCA misfire detection.
|
|
123
|
+
error: Error message if success=False.
|
|
124
|
+
tokens: Token count. If unknown, omit and a rough estimate is used.
|
|
125
|
+
input_context: The LLM input that triggered this tool call.
|
|
126
|
+
"""
|
|
127
|
+
estimated = tokens or self._estimate_tool_tokens(params, output)
|
|
128
|
+
step = TraceStep(
|
|
129
|
+
id=len(self._trace.steps) + 1,
|
|
130
|
+
type="tool_call",
|
|
131
|
+
content=f"Calling {name}",
|
|
132
|
+
tokens=max(estimated, 1),
|
|
133
|
+
tool_name=name,
|
|
134
|
+
tool_params=params,
|
|
135
|
+
tool_success=success,
|
|
136
|
+
tool_error=error if not success else None,
|
|
137
|
+
output=output,
|
|
138
|
+
input_context=input_context,
|
|
139
|
+
)
|
|
140
|
+
self._trace.add_step(step)
|
|
141
|
+
|
|
142
|
+
def set_task_value(self, score: float) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Set the task value score (0.0–1.0) based on outcome quality.
|
|
145
|
+
Call this after you have validated the agent's answer.
|
|
146
|
+
1.0 = correct answer, 0.0 = wrong answer.
|
|
147
|
+
"""
|
|
148
|
+
self._trace.task_value_score = max(0.0, min(1.0, score))
|
|
149
|
+
|
|
150
|
+
# ── Analysis ──────────────────────────────────────────────────────────────
|
|
151
|
+
|
|
152
|
+
def analyse(self) -> TraceRazorReport:
|
|
153
|
+
"""
|
|
154
|
+
Submit the collected trace for analysis and return the report.
|
|
155
|
+
Call this after your agent finishes.
|
|
156
|
+
"""
|
|
157
|
+
self._report = self._client.analyse(self._trace.to_dict())
|
|
158
|
+
return self._report
|
|
159
|
+
|
|
160
|
+
def assert_passes(self) -> None:
|
|
161
|
+
"""Analyse (if not done) and raise AssertionError if TAS < threshold."""
|
|
162
|
+
if self._report is None:
|
|
163
|
+
self.analyse()
|
|
164
|
+
assert self._report is not None
|
|
165
|
+
self._report.assert_passes()
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def report(self) -> Optional[TraceRazorReport]:
|
|
169
|
+
"""The most recent report, or None if analyse() hasn't been called."""
|
|
170
|
+
return self._report
|
|
171
|
+
|
|
172
|
+
# ── Helpers ───────────────────────────────────────────────────────────────
|
|
173
|
+
|
|
174
|
+
@staticmethod
|
|
175
|
+
def _estimate_tool_tokens(
|
|
176
|
+
params: Optional[Dict], output: Optional[str]
|
|
177
|
+
) -> int:
|
|
178
|
+
param_chars = len(str(params)) if params else 0
|
|
179
|
+
output_chars = len(output) if output else 0
|
|
180
|
+
return max(int((param_chars + output_chars) / 4), 10)
|