tokenjam 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenjam/__init__.py +1 -0
- tokenjam/api/__init__.py +0 -0
- tokenjam/api/app.py +104 -0
- tokenjam/api/deps.py +18 -0
- tokenjam/api/middleware.py +28 -0
- tokenjam/api/routes/__init__.py +0 -0
- tokenjam/api/routes/agents.py +33 -0
- tokenjam/api/routes/alerts.py +77 -0
- tokenjam/api/routes/budget.py +96 -0
- tokenjam/api/routes/cost.py +43 -0
- tokenjam/api/routes/drift.py +63 -0
- tokenjam/api/routes/logs.py +511 -0
- tokenjam/api/routes/metrics.py +81 -0
- tokenjam/api/routes/otlp.py +63 -0
- tokenjam/api/routes/spans.py +202 -0
- tokenjam/api/routes/status.py +84 -0
- tokenjam/api/routes/tools.py +22 -0
- tokenjam/api/routes/traces.py +92 -0
- tokenjam/cli/__init__.py +0 -0
- tokenjam/cli/cmd_alerts.py +94 -0
- tokenjam/cli/cmd_budget.py +119 -0
- tokenjam/cli/cmd_cost.py +90 -0
- tokenjam/cli/cmd_demo.py +82 -0
- tokenjam/cli/cmd_doctor.py +173 -0
- tokenjam/cli/cmd_drift.py +238 -0
- tokenjam/cli/cmd_export.py +200 -0
- tokenjam/cli/cmd_mcp.py +78 -0
- tokenjam/cli/cmd_onboard.py +779 -0
- tokenjam/cli/cmd_serve.py +85 -0
- tokenjam/cli/cmd_status.py +153 -0
- tokenjam/cli/cmd_stop.py +87 -0
- tokenjam/cli/cmd_tools.py +45 -0
- tokenjam/cli/cmd_traces.py +161 -0
- tokenjam/cli/cmd_uninstall.py +159 -0
- tokenjam/cli/main.py +110 -0
- tokenjam/core/__init__.py +0 -0
- tokenjam/core/alerts.py +619 -0
- tokenjam/core/api_backend.py +235 -0
- tokenjam/core/config.py +360 -0
- tokenjam/core/cost.py +102 -0
- tokenjam/core/db.py +718 -0
- tokenjam/core/drift.py +256 -0
- tokenjam/core/ingest.py +265 -0
- tokenjam/core/models.py +225 -0
- tokenjam/core/pricing.py +54 -0
- tokenjam/core/retention.py +21 -0
- tokenjam/core/schema_validator.py +156 -0
- tokenjam/demo/__init__.py +0 -0
- tokenjam/demo/env.py +96 -0
- tokenjam/mcp/__init__.py +0 -0
- tokenjam/mcp/server.py +1067 -0
- tokenjam/otel/__init__.py +0 -0
- tokenjam/otel/exporters.py +26 -0
- tokenjam/otel/provider.py +207 -0
- tokenjam/otel/semconv.py +144 -0
- tokenjam/pricing/models.toml +70 -0
- tokenjam/py.typed +0 -0
- tokenjam/sdk/__init__.py +21 -0
- tokenjam/sdk/agent.py +206 -0
- tokenjam/sdk/bootstrap.py +120 -0
- tokenjam/sdk/http_exporter.py +109 -0
- tokenjam/sdk/integrations/__init__.py +0 -0
- tokenjam/sdk/integrations/anthropic.py +200 -0
- tokenjam/sdk/integrations/autogen.py +97 -0
- tokenjam/sdk/integrations/base.py +27 -0
- tokenjam/sdk/integrations/bedrock.py +103 -0
- tokenjam/sdk/integrations/crewai.py +96 -0
- tokenjam/sdk/integrations/gemini.py +131 -0
- tokenjam/sdk/integrations/langchain.py +156 -0
- tokenjam/sdk/integrations/langgraph.py +101 -0
- tokenjam/sdk/integrations/litellm.py +323 -0
- tokenjam/sdk/integrations/llamaindex.py +52 -0
- tokenjam/sdk/integrations/nemoclaw.py +139 -0
- tokenjam/sdk/integrations/openai.py +159 -0
- tokenjam/sdk/integrations/openai_agents_sdk.py +47 -0
- tokenjam/sdk/transport.py +98 -0
- tokenjam/ui/index.html +1213 -0
- tokenjam/utils/__init__.py +0 -0
- tokenjam/utils/formatting.py +43 -0
- tokenjam/utils/ids.py +15 -0
- tokenjam/utils/time_parse.py +54 -0
- tokenjam-0.2.0.dist-info/METADATA +622 -0
- tokenjam-0.2.0.dist-info/RECORD +86 -0
- tokenjam-0.2.0.dist-info/WHEEL +4 -0
- tokenjam-0.2.0.dist-info/entry_points.txt +2 -0
- tokenjam-0.2.0.dist-info/licenses/LICENSE +21 -0
tokenjam/cli/cmd_cost.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import click
|
|
2
|
+
import json
|
|
3
|
+
from tokenjam.core.models import CostFilters
|
|
4
|
+
from tokenjam.utils.formatting import console, make_table, format_cost, format_tokens
|
|
5
|
+
from tokenjam.utils.time_parse import parse_since
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@click.command("cost")
|
|
9
|
+
@click.option("--agent", default=None, help="Filter to specific agent_id")
|
|
10
|
+
@click.option("--since", default="7d", help="Time window (e.g. 1h, 7d, 2026-03-01)")
|
|
11
|
+
@click.option("--group-by", "group_by",
|
|
12
|
+
type=click.Choice(["agent", "model", "day", "tool"]),
|
|
13
|
+
default="day")
|
|
14
|
+
@click.option("--json", "output_json", is_flag=True)
|
|
15
|
+
@click.pass_context
|
|
16
|
+
def cmd_cost(ctx: click.Context, agent: str | None, since: str,
|
|
17
|
+
group_by: str, output_json: bool) -> None:
|
|
18
|
+
"""Show cost breakdown by agent, model, day, or tool."""
|
|
19
|
+
db = ctx.obj["db"]
|
|
20
|
+
try:
|
|
21
|
+
since_dt = parse_since(since)
|
|
22
|
+
except ValueError as exc:
|
|
23
|
+
raise click.BadParameter(str(exc), param_hint="'--since'") from exc
|
|
24
|
+
filters = CostFilters(
|
|
25
|
+
agent_id=agent,
|
|
26
|
+
since=since_dt,
|
|
27
|
+
group_by=group_by,
|
|
28
|
+
)
|
|
29
|
+
rows = db.get_cost_summary(filters)
|
|
30
|
+
total = sum(r.cost_usd for r in rows)
|
|
31
|
+
|
|
32
|
+
if output_json:
|
|
33
|
+
click.echo(json.dumps({
|
|
34
|
+
"rows": [vars(r) for r in rows],
|
|
35
|
+
"total_cost_usd": total,
|
|
36
|
+
}, default=str))
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
if not rows:
|
|
40
|
+
console.print("[dim]No cost data found for the given filters.[/dim]")
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
if group_by == "day":
|
|
44
|
+
table = make_table("DATE", "AGENT", "MODEL", "TOKENS IN", "TOKENS OUT", "COST")
|
|
45
|
+
for r in rows:
|
|
46
|
+
table.add_row(
|
|
47
|
+
r.group,
|
|
48
|
+
r.agent_id or "-",
|
|
49
|
+
r.model or "-",
|
|
50
|
+
format_tokens(r.input_tokens),
|
|
51
|
+
format_tokens(r.output_tokens),
|
|
52
|
+
format_cost(r.cost_usd),
|
|
53
|
+
)
|
|
54
|
+
elif group_by == "agent":
|
|
55
|
+
table = make_table("AGENT", "MODEL", "TOKENS IN", "TOKENS OUT", "COST")
|
|
56
|
+
for r in rows:
|
|
57
|
+
table.add_row(
|
|
58
|
+
r.group,
|
|
59
|
+
r.model or "-",
|
|
60
|
+
format_tokens(r.input_tokens),
|
|
61
|
+
format_tokens(r.output_tokens),
|
|
62
|
+
format_cost(r.cost_usd),
|
|
63
|
+
)
|
|
64
|
+
elif group_by == "model":
|
|
65
|
+
table = make_table("MODEL", "TOKENS IN", "TOKENS OUT", "COST")
|
|
66
|
+
for r in rows:
|
|
67
|
+
table.add_row(
|
|
68
|
+
r.group,
|
|
69
|
+
format_tokens(r.input_tokens),
|
|
70
|
+
format_tokens(r.output_tokens),
|
|
71
|
+
format_cost(r.cost_usd),
|
|
72
|
+
)
|
|
73
|
+
elif group_by == "tool":
|
|
74
|
+
table = make_table("TOOL", "COST")
|
|
75
|
+
for r in rows:
|
|
76
|
+
table.add_row(
|
|
77
|
+
r.group,
|
|
78
|
+
format_cost(r.cost_usd),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if group_by == "day":
|
|
82
|
+
table.add_row("", "", "", "", "[bold]TOTAL[/bold]", f"[bold]{format_cost(total)}[/bold]")
|
|
83
|
+
elif group_by == "agent":
|
|
84
|
+
table.add_row("", "", "", "[bold]TOTAL[/bold]", f"[bold]{format_cost(total)}[/bold]")
|
|
85
|
+
elif group_by == "model":
|
|
86
|
+
table.add_row("", "", "[bold]TOTAL[/bold]", f"[bold]{format_cost(total)}[/bold]")
|
|
87
|
+
elif group_by == "tool":
|
|
88
|
+
table.add_row("[bold]TOTAL[/bold]", f"[bold]{format_cost(total)}[/bold]")
|
|
89
|
+
|
|
90
|
+
console.print(table)
|
tokenjam/cli/cmd_demo.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""tj demo — Agent Incident Library CLI command."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import importlib.util
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from types import ModuleType
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
|
|
10
|
+
from tokenjam.utils.formatting import console
|
|
11
|
+
|
|
12
|
+
# incidents/ lives two levels above this file (tj/cli/ -> tj/ -> repo/site-packages root)
|
|
13
|
+
_INCIDENTS_DIR = Path(__file__).parent.parent.parent / "incidents"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _discover_scenarios() -> dict[str, ModuleType]:
|
|
17
|
+
"""
|
|
18
|
+
Scan incidents/*/scenario.py for modules exposing a `run` callable.
|
|
19
|
+
Returns a dict mapping scenario slug to loaded module.
|
|
20
|
+
"""
|
|
21
|
+
scenarios: dict[str, ModuleType] = {}
|
|
22
|
+
if not _INCIDENTS_DIR.exists():
|
|
23
|
+
return scenarios
|
|
24
|
+
for scenario_file in sorted(_INCIDENTS_DIR.glob("*/scenario.py")):
|
|
25
|
+
slug = scenario_file.parent.name
|
|
26
|
+
spec = importlib.util.spec_from_file_location(
|
|
27
|
+
f"incidents.{slug}.scenario", scenario_file
|
|
28
|
+
)
|
|
29
|
+
if spec is None or spec.loader is None:
|
|
30
|
+
continue
|
|
31
|
+
mod = importlib.util.module_from_spec(spec)
|
|
32
|
+
spec.loader.exec_module(mod) # type: ignore[union-attr]
|
|
33
|
+
if callable(getattr(mod, "run", None)):
|
|
34
|
+
scenarios[slug] = mod
|
|
35
|
+
return scenarios
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@click.command("demo")
|
|
39
|
+
@click.argument("scenario", required=False, default=None)
|
|
40
|
+
@click.option("--json", "output_json", is_flag=True, help="Output JSON instead of Rich panels")
|
|
41
|
+
@click.pass_context
|
|
42
|
+
def cmd_demo(ctx: click.Context, scenario: str | None, output_json: bool) -> None:
|
|
43
|
+
"""Run a reproducible AI agent incident scenario.
|
|
44
|
+
|
|
45
|
+
\b
|
|
46
|
+
tj demo List available scenarios
|
|
47
|
+
tj demo retry-loop Run a specific scenario
|
|
48
|
+
tj demo retry-loop --json Machine-readable output
|
|
49
|
+
"""
|
|
50
|
+
scenarios = _discover_scenarios()
|
|
51
|
+
|
|
52
|
+
if scenario is None:
|
|
53
|
+
_list_scenarios(scenarios)
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
if scenario not in scenarios:
|
|
57
|
+
click.echo(
|
|
58
|
+
f"Unknown scenario '{scenario}'. Run `tj demo` to see available scenarios.",
|
|
59
|
+
err=True,
|
|
60
|
+
)
|
|
61
|
+
raise SystemExit(1)
|
|
62
|
+
|
|
63
|
+
scenarios[scenario].run()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _list_scenarios(scenarios: dict[str, ModuleType]) -> None:
|
|
67
|
+
from rich import box
|
|
68
|
+
from rich.table import Table
|
|
69
|
+
|
|
70
|
+
console.print()
|
|
71
|
+
console.print(
|
|
72
|
+
"[bold]OCW Agent Incident Library[/bold]\n"
|
|
73
|
+
"Reproducible AI agent failures — no API keys, no config needed.\n"
|
|
74
|
+
)
|
|
75
|
+
table = Table(box=box.SIMPLE, show_header=True, header_style="bold")
|
|
76
|
+
table.add_column("Scenario", style="cyan", no_wrap=True)
|
|
77
|
+
table.add_column("Description")
|
|
78
|
+
for slug, mod in scenarios.items():
|
|
79
|
+
table.add_row(slug, getattr(mod, "DESCRIPTION", ""))
|
|
80
|
+
console.print(table)
|
|
81
|
+
console.print("[dim]Usage:[/dim] tj demo <scenario> [dim]|[/dim] tj demo <scenario> --json")
|
|
82
|
+
console.print()
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
import duckdb
|
|
7
|
+
|
|
8
|
+
from tokenjam.core.config import find_config_file, load_config
|
|
9
|
+
from tokenjam.utils.formatting import console
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command("doctor")
|
|
13
|
+
@click.option("--json", "output_json", is_flag=True)
|
|
14
|
+
@click.pass_context
|
|
15
|
+
def cmd_doctor(ctx: click.Context, output_json: bool) -> None:
|
|
16
|
+
"""Run health checks on tj configuration and environment."""
|
|
17
|
+
config = ctx.obj["config"]
|
|
18
|
+
checks: list[dict] = []
|
|
19
|
+
|
|
20
|
+
# 1. Config file found and valid
|
|
21
|
+
checks.append(_check_config())
|
|
22
|
+
|
|
23
|
+
# 2. DuckDB file writable
|
|
24
|
+
checks.append(_check_db(config))
|
|
25
|
+
|
|
26
|
+
# 3. Ingest secret set
|
|
27
|
+
checks.append(_check_ingest_secret(config))
|
|
28
|
+
|
|
29
|
+
# 4. Prometheus configured
|
|
30
|
+
checks.append(_check_prometheus(config))
|
|
31
|
+
|
|
32
|
+
# 5. Schema validation vs capture
|
|
33
|
+
checks.append(_check_schema_vs_capture(config))
|
|
34
|
+
|
|
35
|
+
# 6. Drift configured but inactive
|
|
36
|
+
checks.append(_check_drift_inactive(config, ctx.obj["db"]))
|
|
37
|
+
|
|
38
|
+
# 7. Webhook URL security
|
|
39
|
+
checks.extend(_check_webhook_security(config))
|
|
40
|
+
|
|
41
|
+
# 8. Webhook domain allowlist
|
|
42
|
+
checks.extend(_check_webhook_allowlist(config))
|
|
43
|
+
|
|
44
|
+
if output_json:
|
|
45
|
+
click.echo(json.dumps(checks, default=str))
|
|
46
|
+
else:
|
|
47
|
+
for c in checks:
|
|
48
|
+
_print_check(c)
|
|
49
|
+
|
|
50
|
+
has_errors = any(c["level"] == "error" for c in checks)
|
|
51
|
+
has_warnings = any(c["level"] == "warning" for c in checks)
|
|
52
|
+
if has_errors:
|
|
53
|
+
ctx.exit(2)
|
|
54
|
+
elif has_warnings:
|
|
55
|
+
ctx.exit(1)
|
|
56
|
+
else:
|
|
57
|
+
ctx.exit(0)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _check_config() -> dict:
|
|
61
|
+
try:
|
|
62
|
+
path = find_config_file()
|
|
63
|
+
if path is None:
|
|
64
|
+
return {"name": "Config file", "level": "error",
|
|
65
|
+
"message": "No config file found. Run `tj onboard` to create one."}
|
|
66
|
+
load_config(str(path))
|
|
67
|
+
return {"name": "Config file", "level": "ok",
|
|
68
|
+
"message": f"Found and valid: {path}"}
|
|
69
|
+
except Exception as e:
|
|
70
|
+
return {"name": "Config file", "level": "error",
|
|
71
|
+
"message": f"Config parse error: {e}"}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _check_db(config: object) -> dict:
|
|
75
|
+
try:
|
|
76
|
+
from pathlib import Path
|
|
77
|
+
db_path = Path(config.storage.path).expanduser()
|
|
78
|
+
conn = duckdb.connect(str(db_path))
|
|
79
|
+
conn.close()
|
|
80
|
+
return {"name": "DuckDB writable", "level": "ok",
|
|
81
|
+
"message": f"Database accessible: {db_path}"}
|
|
82
|
+
except Exception as e:
|
|
83
|
+
return {"name": "DuckDB writable", "level": "error",
|
|
84
|
+
"message": f"Cannot open database: {e}"}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _check_ingest_secret(config: object) -> dict:
|
|
88
|
+
if config.security.ingest_secret:
|
|
89
|
+
return {"name": "Ingest secret", "level": "ok",
|
|
90
|
+
"message": "Ingest secret is configured."}
|
|
91
|
+
return {"name": "Ingest secret", "level": "warning",
|
|
92
|
+
"message": "No ingest secret set. API ingest endpoint is unprotected."}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _check_prometheus(config: object) -> dict:
|
|
96
|
+
if config.export.prometheus.enabled:
|
|
97
|
+
return {"name": "Prometheus", "level": "ok",
|
|
98
|
+
"message": f"Enabled on port {config.export.prometheus.port}"}
|
|
99
|
+
return {"name": "Prometheus", "level": "info",
|
|
100
|
+
"message": "Prometheus export disabled."}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _check_schema_vs_capture(config: object) -> dict:
|
|
104
|
+
has_schema = any(
|
|
105
|
+
ac.output_schema for ac in config.agents.values()
|
|
106
|
+
)
|
|
107
|
+
if has_schema and not config.capture.tool_outputs:
|
|
108
|
+
return {"name": "Schema vs capture", "level": "warning",
|
|
109
|
+
"message": "Agent has output_schema but capture.tool_outputs is false. "
|
|
110
|
+
"Schema validation will have no data to validate."}
|
|
111
|
+
return {"name": "Schema vs capture", "level": "ok",
|
|
112
|
+
"message": "Schema and capture settings are consistent."}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _check_drift_inactive(config: object, db: object) -> dict:
|
|
116
|
+
for agent_id, ac in config.agents.items():
|
|
117
|
+
if ac.drift.enabled:
|
|
118
|
+
count = db.get_completed_session_count(agent_id)
|
|
119
|
+
if count < ac.drift.baseline_sessions:
|
|
120
|
+
return {"name": "Drift detection", "level": "warning",
|
|
121
|
+
"message": f"Agent '{agent_id}' has drift enabled but only "
|
|
122
|
+
f"{count}/{ac.drift.baseline_sessions} baseline sessions."}
|
|
123
|
+
return {"name": "Drift detection", "level": "ok",
|
|
124
|
+
"message": "Drift detection status is consistent."}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _check_webhook_security(config: object) -> list[dict]:
|
|
128
|
+
results = []
|
|
129
|
+
for ch in config.alerts.channels:
|
|
130
|
+
url = ch.url or ch.webhook_url
|
|
131
|
+
if url and not url.startswith("https://") and not _is_local_url(url):
|
|
132
|
+
results.append({
|
|
133
|
+
"name": "Webhook security",
|
|
134
|
+
"level": "warning",
|
|
135
|
+
"message": f"Non-HTTPS, non-local webhook URL: {url}",
|
|
136
|
+
})
|
|
137
|
+
if not results:
|
|
138
|
+
results.append({"name": "Webhook security", "level": "ok",
|
|
139
|
+
"message": "All webhook URLs are secure or local."})
|
|
140
|
+
return results
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _check_webhook_allowlist(config: object) -> list[dict]:
|
|
144
|
+
allowed = config.security.webhook_allowed_domains
|
|
145
|
+
if not allowed:
|
|
146
|
+
return []
|
|
147
|
+
results = []
|
|
148
|
+
for ch in config.alerts.channels:
|
|
149
|
+
url = ch.url or ch.webhook_url
|
|
150
|
+
if url:
|
|
151
|
+
from urllib.parse import urlparse
|
|
152
|
+
domain = urlparse(url).hostname
|
|
153
|
+
if domain and domain not in allowed:
|
|
154
|
+
results.append({
|
|
155
|
+
"name": "Webhook allowlist",
|
|
156
|
+
"level": "error",
|
|
157
|
+
"message": f"Webhook domain '{domain}' not in allowed list.",
|
|
158
|
+
})
|
|
159
|
+
return results
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _is_local_url(url: str) -> bool:
|
|
163
|
+
from urllib.parse import urlparse
|
|
164
|
+
hostname = urlparse(url).hostname
|
|
165
|
+
return hostname in ("localhost", "127.0.0.1", "::1", "0.0.0.0") if hostname else False
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _print_check(check: dict) -> None:
|
|
169
|
+
level = check["level"]
|
|
170
|
+
icons = {"ok": "[green]\u2713[/green]", "warning": "[yellow]\u26a0[/yellow]",
|
|
171
|
+
"error": "[red]\u2717[/red]", "info": "[blue]i[/blue]"}
|
|
172
|
+
icon = icons.get(level, "?")
|
|
173
|
+
console.print(f" {icon} {check['name']}: {check['message']}")
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""tj drift — show behavioral drift baselines and Z-scores."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json as json_mod
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
|
|
9
|
+
from tokenjam.core.drift import evaluate_drift
|
|
10
|
+
from tokenjam.utils.formatting import console
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.command("drift")
|
|
14
|
+
@click.option("--agent", default=None, help="Filter to specific agent_id")
|
|
15
|
+
@click.option("--json", "output_json", is_flag=True, help="JSON output")
|
|
16
|
+
@click.pass_context
|
|
17
|
+
def cmd_drift(ctx: click.Context, agent: str | None, output_json: bool) -> None:
|
|
18
|
+
"""Show drift baselines and Z-scores for recent sessions."""
|
|
19
|
+
db = ctx.obj["db"]
|
|
20
|
+
config = ctx.obj["config"]
|
|
21
|
+
agent_filter = agent or ctx.obj.get("agent")
|
|
22
|
+
|
|
23
|
+
# Discover agents with baselines
|
|
24
|
+
if agent_filter:
|
|
25
|
+
agent_ids = [agent_filter]
|
|
26
|
+
elif hasattr(db, "conn"):
|
|
27
|
+
rows = db.conn.execute(
|
|
28
|
+
"SELECT DISTINCT agent_id FROM drift_baselines ORDER BY agent_id"
|
|
29
|
+
).fetchall()
|
|
30
|
+
agent_ids = [r[0] for r in rows]
|
|
31
|
+
else:
|
|
32
|
+
agent_ids = []
|
|
33
|
+
|
|
34
|
+
if not agent_ids:
|
|
35
|
+
if output_json:
|
|
36
|
+
click.echo(json_mod.dumps({"agents": [], "drifted": False}))
|
|
37
|
+
else:
|
|
38
|
+
console.print(
|
|
39
|
+
"[dim]No drift baselines found. "
|
|
40
|
+
"Need at least 10 completed sessions to build a baseline.[/dim]"
|
|
41
|
+
)
|
|
42
|
+
ctx.exit(0)
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
all_results = []
|
|
46
|
+
any_drifted = False
|
|
47
|
+
|
|
48
|
+
for aid in agent_ids:
|
|
49
|
+
baseline = db.get_baseline(aid)
|
|
50
|
+
if baseline is None:
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
sessions = db.get_completed_sessions(aid, limit=1)
|
|
54
|
+
if not sessions:
|
|
55
|
+
continue
|
|
56
|
+
latest = sessions[0]
|
|
57
|
+
|
|
58
|
+
agent_cfg = config.agents.get(aid)
|
|
59
|
+
threshold = agent_cfg.drift.token_threshold if agent_cfg else 2.0
|
|
60
|
+
seq_threshold = agent_cfg.drift.tool_sequence_diff if agent_cfg else 0.4
|
|
61
|
+
|
|
62
|
+
result = evaluate_drift(
|
|
63
|
+
session=latest,
|
|
64
|
+
baseline=baseline,
|
|
65
|
+
config_threshold=threshold,
|
|
66
|
+
sequence_diff_threshold=seq_threshold,
|
|
67
|
+
db=db,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
if result.drifted:
|
|
71
|
+
any_drifted = True
|
|
72
|
+
|
|
73
|
+
agent_data = {
|
|
74
|
+
"agent_id": aid,
|
|
75
|
+
"baseline_sessions": baseline.sessions_sampled,
|
|
76
|
+
"drifted": result.drifted,
|
|
77
|
+
"violations": [
|
|
78
|
+
{
|
|
79
|
+
"dimension": v.dimension,
|
|
80
|
+
"z_score": v.z_score,
|
|
81
|
+
"expected": v.expected,
|
|
82
|
+
"observed": v.observed,
|
|
83
|
+
"detail": v.detail,
|
|
84
|
+
}
|
|
85
|
+
for v in result.violations
|
|
86
|
+
],
|
|
87
|
+
"metrics": _build_metrics(baseline, latest, result, threshold),
|
|
88
|
+
}
|
|
89
|
+
all_results.append(agent_data)
|
|
90
|
+
|
|
91
|
+
if not output_json:
|
|
92
|
+
_print_drift_table(aid, baseline, latest, result, threshold, seq_threshold)
|
|
93
|
+
|
|
94
|
+
if output_json:
|
|
95
|
+
click.echo(json_mod.dumps(
|
|
96
|
+
{"agents": all_results, "drifted": any_drifted},
|
|
97
|
+
default=str,
|
|
98
|
+
))
|
|
99
|
+
|
|
100
|
+
ctx.exit(1 if any_drifted else 0)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _build_metrics(baseline, session, result, threshold: float) -> list[dict]:
|
|
104
|
+
"""Return per-dimension metric dicts for JSON output."""
|
|
105
|
+
from tokenjam.core.drift import z_score
|
|
106
|
+
|
|
107
|
+
violated_dims = {v.dimension for v in result.violations}
|
|
108
|
+
metrics = []
|
|
109
|
+
|
|
110
|
+
def _add(dimension: str, mean, stddev, current) -> None:
|
|
111
|
+
if mean is None or stddev is None:
|
|
112
|
+
return
|
|
113
|
+
z = z_score(float(current), float(mean), float(stddev))
|
|
114
|
+
metrics.append({
|
|
115
|
+
"dimension": dimension,
|
|
116
|
+
"baseline_mean": mean,
|
|
117
|
+
"baseline_stddev": stddev,
|
|
118
|
+
"current_value": current,
|
|
119
|
+
"z_score": z,
|
|
120
|
+
"status": "DRIFT" if dimension in violated_dims else "ok",
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
_add("input_tokens", baseline.avg_input_tokens, baseline.stddev_input_tokens,
|
|
124
|
+
session.input_tokens)
|
|
125
|
+
_add("output_tokens", baseline.avg_output_tokens, baseline.stddev_output_tokens,
|
|
126
|
+
session.output_tokens)
|
|
127
|
+
if session.duration_seconds is not None:
|
|
128
|
+
_add("session_duration", baseline.avg_session_duration_s,
|
|
129
|
+
baseline.stddev_session_duration, session.duration_seconds)
|
|
130
|
+
_add("tool_call_count", baseline.avg_tool_call_count, baseline.stddev_tool_call_count,
|
|
131
|
+
session.tool_call_count)
|
|
132
|
+
|
|
133
|
+
# tool_sequence is special (Jaccard, no z-score)
|
|
134
|
+
if "tool_sequence" in violated_dims:
|
|
135
|
+
seq_viol = next((v for v in result.violations if v.dimension == "tool_sequence"), None)
|
|
136
|
+
if seq_viol:
|
|
137
|
+
metrics.append({
|
|
138
|
+
"dimension": "tool_sequence",
|
|
139
|
+
"baseline_mean": None,
|
|
140
|
+
"baseline_stddev": None,
|
|
141
|
+
"current_value": seq_viol.observed,
|
|
142
|
+
"z_score": None,
|
|
143
|
+
"status": "DRIFT",
|
|
144
|
+
})
|
|
145
|
+
|
|
146
|
+
return metrics
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _print_drift_table(aid, baseline, session, result, threshold: float, seq_threshold: float = 0.4) -> None:
|
|
150
|
+
"""Render a Rich table for a single agent's drift state."""
|
|
151
|
+
from tokenjam.core.drift import z_score
|
|
152
|
+
|
|
153
|
+
violated_dims = {v.dimension for v in result.violations}
|
|
154
|
+
status_label = "[bold red]DRIFTED[/bold red]" if result.drifted else "[green]ok[/green]"
|
|
155
|
+
|
|
156
|
+
console.print()
|
|
157
|
+
console.print(
|
|
158
|
+
f"[bold]Agent:[/bold] {aid} | "
|
|
159
|
+
f"[bold]Baseline:[/bold] {baseline.sessions_sampled} sessions | "
|
|
160
|
+
f"[bold]Status:[/bold] {status_label}"
|
|
161
|
+
)
|
|
162
|
+
console.print()
|
|
163
|
+
|
|
164
|
+
table = Table(show_header=True, header_style="bold")
|
|
165
|
+
table.add_column("Dimension", style="dim")
|
|
166
|
+
table.add_column("Baseline")
|
|
167
|
+
table.add_column("Current")
|
|
168
|
+
table.add_column("Z-Score", justify="right")
|
|
169
|
+
table.add_column("Status")
|
|
170
|
+
|
|
171
|
+
def _z_color(z: float | None) -> str:
|
|
172
|
+
if z is None:
|
|
173
|
+
return "--"
|
|
174
|
+
az = abs(z)
|
|
175
|
+
if az < 1.0:
|
|
176
|
+
return f"[green]{z:.2f}[/green]"
|
|
177
|
+
if az <= threshold:
|
|
178
|
+
return f"[yellow]{z:.2f}[/yellow]"
|
|
179
|
+
return f"[red]{z:.2f}[/red]"
|
|
180
|
+
|
|
181
|
+
def _status_cell(dimension: str) -> str:
|
|
182
|
+
if dimension in violated_dims:
|
|
183
|
+
return "[bold red]DRIFT[/bold red]"
|
|
184
|
+
return "[green]ok[/green]"
|
|
185
|
+
|
|
186
|
+
def _add_row(dimension: str, mean, stddev, current, fmt_baseline: str, fmt_current: str) -> None:
|
|
187
|
+
z = z_score(float(current), float(mean), float(stddev)) if (mean is not None and stddev is not None) else None
|
|
188
|
+
table.add_row(dimension, fmt_baseline, fmt_current, _z_color(z), _status_cell(dimension))
|
|
189
|
+
|
|
190
|
+
if baseline.avg_input_tokens is not None and baseline.stddev_input_tokens is not None:
|
|
191
|
+
_add_row(
|
|
192
|
+
"input_tokens",
|
|
193
|
+
baseline.avg_input_tokens, baseline.stddev_input_tokens, session.input_tokens,
|
|
194
|
+
f"{baseline.avg_input_tokens:,.0f} +/- {baseline.stddev_input_tokens:,.0f}",
|
|
195
|
+
f"{session.input_tokens:,}",
|
|
196
|
+
)
|
|
197
|
+
if baseline.avg_output_tokens is not None and baseline.stddev_output_tokens is not None:
|
|
198
|
+
_add_row(
|
|
199
|
+
"output_tokens",
|
|
200
|
+
baseline.avg_output_tokens, baseline.stddev_output_tokens, session.output_tokens,
|
|
201
|
+
f"{baseline.avg_output_tokens:,.0f} +/- {baseline.stddev_output_tokens:,.0f}",
|
|
202
|
+
f"{session.output_tokens:,}",
|
|
203
|
+
)
|
|
204
|
+
if (
|
|
205
|
+
session.duration_seconds is not None
|
|
206
|
+
and baseline.avg_session_duration_s is not None
|
|
207
|
+
and baseline.stddev_session_duration is not None
|
|
208
|
+
):
|
|
209
|
+
_add_row(
|
|
210
|
+
"session_duration",
|
|
211
|
+
baseline.avg_session_duration_s, baseline.stddev_session_duration,
|
|
212
|
+
session.duration_seconds,
|
|
213
|
+
f"{baseline.avg_session_duration_s:.1f}s +/- {baseline.stddev_session_duration:.1f}s",
|
|
214
|
+
f"{session.duration_seconds:.1f}s",
|
|
215
|
+
)
|
|
216
|
+
if baseline.avg_tool_call_count is not None and baseline.stddev_tool_call_count is not None:
|
|
217
|
+
_add_row(
|
|
218
|
+
"tool_call_count",
|
|
219
|
+
baseline.avg_tool_call_count, baseline.stddev_tool_call_count, session.tool_call_count,
|
|
220
|
+
f"{baseline.avg_tool_call_count:.0f} +/- {baseline.stddev_tool_call_count:.0f}",
|
|
221
|
+
str(session.tool_call_count),
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Tool sequence row (Jaccard, no z-score)
|
|
225
|
+
seq_viol = next((v for v in result.violations if v.dimension == "tool_sequence"), None)
|
|
226
|
+
if seq_viol:
|
|
227
|
+
table.add_row(
|
|
228
|
+
"tool_sequence",
|
|
229
|
+
seq_viol.expected or "",
|
|
230
|
+
seq_viol.observed or "",
|
|
231
|
+
"--",
|
|
232
|
+
"[bold red]DRIFT[/bold red]",
|
|
233
|
+
)
|
|
234
|
+
elif baseline.common_tool_sequences:
|
|
235
|
+
min_sim = 1.0 - seq_threshold
|
|
236
|
+
table.add_row("tool_sequence", f"similarity >= {min_sim:.2f}", "--", "--", "[green]ok[/green]")
|
|
237
|
+
|
|
238
|
+
console.print(table)
|