tokenjam 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. tokenjam/__init__.py +1 -0
  2. tokenjam/api/__init__.py +0 -0
  3. tokenjam/api/app.py +104 -0
  4. tokenjam/api/deps.py +18 -0
  5. tokenjam/api/middleware.py +28 -0
  6. tokenjam/api/routes/__init__.py +0 -0
  7. tokenjam/api/routes/agents.py +33 -0
  8. tokenjam/api/routes/alerts.py +77 -0
  9. tokenjam/api/routes/budget.py +96 -0
  10. tokenjam/api/routes/cost.py +43 -0
  11. tokenjam/api/routes/drift.py +63 -0
  12. tokenjam/api/routes/logs.py +511 -0
  13. tokenjam/api/routes/metrics.py +81 -0
  14. tokenjam/api/routes/otlp.py +63 -0
  15. tokenjam/api/routes/spans.py +202 -0
  16. tokenjam/api/routes/status.py +84 -0
  17. tokenjam/api/routes/tools.py +22 -0
  18. tokenjam/api/routes/traces.py +92 -0
  19. tokenjam/cli/__init__.py +0 -0
  20. tokenjam/cli/cmd_alerts.py +94 -0
  21. tokenjam/cli/cmd_budget.py +119 -0
  22. tokenjam/cli/cmd_cost.py +90 -0
  23. tokenjam/cli/cmd_demo.py +82 -0
  24. tokenjam/cli/cmd_doctor.py +173 -0
  25. tokenjam/cli/cmd_drift.py +238 -0
  26. tokenjam/cli/cmd_export.py +200 -0
  27. tokenjam/cli/cmd_mcp.py +78 -0
  28. tokenjam/cli/cmd_onboard.py +779 -0
  29. tokenjam/cli/cmd_serve.py +85 -0
  30. tokenjam/cli/cmd_status.py +153 -0
  31. tokenjam/cli/cmd_stop.py +87 -0
  32. tokenjam/cli/cmd_tools.py +45 -0
  33. tokenjam/cli/cmd_traces.py +161 -0
  34. tokenjam/cli/cmd_uninstall.py +159 -0
  35. tokenjam/cli/main.py +110 -0
  36. tokenjam/core/__init__.py +0 -0
  37. tokenjam/core/alerts.py +619 -0
  38. tokenjam/core/api_backend.py +235 -0
  39. tokenjam/core/config.py +360 -0
  40. tokenjam/core/cost.py +102 -0
  41. tokenjam/core/db.py +718 -0
  42. tokenjam/core/drift.py +256 -0
  43. tokenjam/core/ingest.py +265 -0
  44. tokenjam/core/models.py +225 -0
  45. tokenjam/core/pricing.py +54 -0
  46. tokenjam/core/retention.py +21 -0
  47. tokenjam/core/schema_validator.py +156 -0
  48. tokenjam/demo/__init__.py +0 -0
  49. tokenjam/demo/env.py +96 -0
  50. tokenjam/mcp/__init__.py +0 -0
  51. tokenjam/mcp/server.py +1067 -0
  52. tokenjam/otel/__init__.py +0 -0
  53. tokenjam/otel/exporters.py +26 -0
  54. tokenjam/otel/provider.py +207 -0
  55. tokenjam/otel/semconv.py +144 -0
  56. tokenjam/pricing/models.toml +70 -0
  57. tokenjam/py.typed +0 -0
  58. tokenjam/sdk/__init__.py +21 -0
  59. tokenjam/sdk/agent.py +206 -0
  60. tokenjam/sdk/bootstrap.py +120 -0
  61. tokenjam/sdk/http_exporter.py +109 -0
  62. tokenjam/sdk/integrations/__init__.py +0 -0
  63. tokenjam/sdk/integrations/anthropic.py +200 -0
  64. tokenjam/sdk/integrations/autogen.py +97 -0
  65. tokenjam/sdk/integrations/base.py +27 -0
  66. tokenjam/sdk/integrations/bedrock.py +103 -0
  67. tokenjam/sdk/integrations/crewai.py +96 -0
  68. tokenjam/sdk/integrations/gemini.py +131 -0
  69. tokenjam/sdk/integrations/langchain.py +156 -0
  70. tokenjam/sdk/integrations/langgraph.py +101 -0
  71. tokenjam/sdk/integrations/litellm.py +323 -0
  72. tokenjam/sdk/integrations/llamaindex.py +52 -0
  73. tokenjam/sdk/integrations/nemoclaw.py +139 -0
  74. tokenjam/sdk/integrations/openai.py +159 -0
  75. tokenjam/sdk/integrations/openai_agents_sdk.py +47 -0
  76. tokenjam/sdk/transport.py +98 -0
  77. tokenjam/ui/index.html +1213 -0
  78. tokenjam/utils/__init__.py +0 -0
  79. tokenjam/utils/formatting.py +43 -0
  80. tokenjam/utils/ids.py +15 -0
  81. tokenjam/utils/time_parse.py +54 -0
  82. tokenjam-0.2.0.dist-info/METADATA +622 -0
  83. tokenjam-0.2.0.dist-info/RECORD +86 -0
  84. tokenjam-0.2.0.dist-info/WHEEL +4 -0
  85. tokenjam-0.2.0.dist-info/entry_points.txt +2 -0
  86. tokenjam-0.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,90 @@
1
+ import click
2
+ import json
3
+ from tokenjam.core.models import CostFilters
4
+ from tokenjam.utils.formatting import console, make_table, format_cost, format_tokens
5
+ from tokenjam.utils.time_parse import parse_since
6
+
7
+
8
+ @click.command("cost")
9
+ @click.option("--agent", default=None, help="Filter to specific agent_id")
10
+ @click.option("--since", default="7d", help="Time window (e.g. 1h, 7d, 2026-03-01)")
11
+ @click.option("--group-by", "group_by",
12
+ type=click.Choice(["agent", "model", "day", "tool"]),
13
+ default="day")
14
+ @click.option("--json", "output_json", is_flag=True)
15
+ @click.pass_context
16
+ def cmd_cost(ctx: click.Context, agent: str | None, since: str,
17
+ group_by: str, output_json: bool) -> None:
18
+ """Show cost breakdown by agent, model, day, or tool."""
19
+ db = ctx.obj["db"]
20
+ try:
21
+ since_dt = parse_since(since)
22
+ except ValueError as exc:
23
+ raise click.BadParameter(str(exc), param_hint="'--since'") from exc
24
+ filters = CostFilters(
25
+ agent_id=agent,
26
+ since=since_dt,
27
+ group_by=group_by,
28
+ )
29
+ rows = db.get_cost_summary(filters)
30
+ total = sum(r.cost_usd for r in rows)
31
+
32
+ if output_json:
33
+ click.echo(json.dumps({
34
+ "rows": [vars(r) for r in rows],
35
+ "total_cost_usd": total,
36
+ }, default=str))
37
+ return
38
+
39
+ if not rows:
40
+ console.print("[dim]No cost data found for the given filters.[/dim]")
41
+ return
42
+
43
+ if group_by == "day":
44
+ table = make_table("DATE", "AGENT", "MODEL", "TOKENS IN", "TOKENS OUT", "COST")
45
+ for r in rows:
46
+ table.add_row(
47
+ r.group,
48
+ r.agent_id or "-",
49
+ r.model or "-",
50
+ format_tokens(r.input_tokens),
51
+ format_tokens(r.output_tokens),
52
+ format_cost(r.cost_usd),
53
+ )
54
+ elif group_by == "agent":
55
+ table = make_table("AGENT", "MODEL", "TOKENS IN", "TOKENS OUT", "COST")
56
+ for r in rows:
57
+ table.add_row(
58
+ r.group,
59
+ r.model or "-",
60
+ format_tokens(r.input_tokens),
61
+ format_tokens(r.output_tokens),
62
+ format_cost(r.cost_usd),
63
+ )
64
+ elif group_by == "model":
65
+ table = make_table("MODEL", "TOKENS IN", "TOKENS OUT", "COST")
66
+ for r in rows:
67
+ table.add_row(
68
+ r.group,
69
+ format_tokens(r.input_tokens),
70
+ format_tokens(r.output_tokens),
71
+ format_cost(r.cost_usd),
72
+ )
73
+ elif group_by == "tool":
74
+ table = make_table("TOOL", "COST")
75
+ for r in rows:
76
+ table.add_row(
77
+ r.group,
78
+ format_cost(r.cost_usd),
79
+ )
80
+
81
+ if group_by == "day":
82
+ table.add_row("", "", "", "", "[bold]TOTAL[/bold]", f"[bold]{format_cost(total)}[/bold]")
83
+ elif group_by == "agent":
84
+ table.add_row("", "", "", "[bold]TOTAL[/bold]", f"[bold]{format_cost(total)}[/bold]")
85
+ elif group_by == "model":
86
+ table.add_row("", "", "[bold]TOTAL[/bold]", f"[bold]{format_cost(total)}[/bold]")
87
+ elif group_by == "tool":
88
+ table.add_row("[bold]TOTAL[/bold]", f"[bold]{format_cost(total)}[/bold]")
89
+
90
+ console.print(table)
@@ -0,0 +1,82 @@
1
+ """tj demo — Agent Incident Library CLI command."""
2
+ from __future__ import annotations
3
+
4
+ import importlib.util
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+
8
+ import click
9
+
10
+ from tokenjam.utils.formatting import console
11
+
12
+ # incidents/ lives two levels above this file (tj/cli/ -> tj/ -> repo/site-packages root)
13
+ _INCIDENTS_DIR = Path(__file__).parent.parent.parent / "incidents"
14
+
15
+
16
+ def _discover_scenarios() -> dict[str, ModuleType]:
17
+ """
18
+ Scan incidents/*/scenario.py for modules exposing a `run` callable.
19
+ Returns a dict mapping scenario slug to loaded module.
20
+ """
21
+ scenarios: dict[str, ModuleType] = {}
22
+ if not _INCIDENTS_DIR.exists():
23
+ return scenarios
24
+ for scenario_file in sorted(_INCIDENTS_DIR.glob("*/scenario.py")):
25
+ slug = scenario_file.parent.name
26
+ spec = importlib.util.spec_from_file_location(
27
+ f"incidents.{slug}.scenario", scenario_file
28
+ )
29
+ if spec is None or spec.loader is None:
30
+ continue
31
+ mod = importlib.util.module_from_spec(spec)
32
+ spec.loader.exec_module(mod) # type: ignore[union-attr]
33
+ if callable(getattr(mod, "run", None)):
34
+ scenarios[slug] = mod
35
+ return scenarios
36
+
37
+
38
+ @click.command("demo")
39
+ @click.argument("scenario", required=False, default=None)
40
+ @click.option("--json", "output_json", is_flag=True, help="Output JSON instead of Rich panels")
41
+ @click.pass_context
42
+ def cmd_demo(ctx: click.Context, scenario: str | None, output_json: bool) -> None:
43
+ """Run a reproducible AI agent incident scenario.
44
+
45
+ \b
46
+ tj demo List available scenarios
47
+ tj demo retry-loop Run a specific scenario
48
+ tj demo retry-loop --json Machine-readable output
49
+ """
50
+ scenarios = _discover_scenarios()
51
+
52
+ if scenario is None:
53
+ _list_scenarios(scenarios)
54
+ return
55
+
56
+ if scenario not in scenarios:
57
+ click.echo(
58
+ f"Unknown scenario '{scenario}'. Run `tj demo` to see available scenarios.",
59
+ err=True,
60
+ )
61
+ raise SystemExit(1)
62
+
63
+ scenarios[scenario].run()
64
+
65
+
66
+ def _list_scenarios(scenarios: dict[str, ModuleType]) -> None:
67
+ from rich import box
68
+ from rich.table import Table
69
+
70
+ console.print()
71
+ console.print(
72
+ "[bold]OCW Agent Incident Library[/bold]\n"
73
+ "Reproducible AI agent failures — no API keys, no config needed.\n"
74
+ )
75
+ table = Table(box=box.SIMPLE, show_header=True, header_style="bold")
76
+ table.add_column("Scenario", style="cyan", no_wrap=True)
77
+ table.add_column("Description")
78
+ for slug, mod in scenarios.items():
79
+ table.add_row(slug, getattr(mod, "DESCRIPTION", ""))
80
+ console.print(table)
81
+ console.print("[dim]Usage:[/dim] tj demo <scenario> [dim]|[/dim] tj demo <scenario> --json")
82
+ console.print()
@@ -0,0 +1,173 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+
5
+ import click
6
+ import duckdb
7
+
8
+ from tokenjam.core.config import find_config_file, load_config
9
+ from tokenjam.utils.formatting import console
10
+
11
+
12
+ @click.command("doctor")
13
+ @click.option("--json", "output_json", is_flag=True)
14
+ @click.pass_context
15
+ def cmd_doctor(ctx: click.Context, output_json: bool) -> None:
16
+ """Run health checks on tj configuration and environment."""
17
+ config = ctx.obj["config"]
18
+ checks: list[dict] = []
19
+
20
+ # 1. Config file found and valid
21
+ checks.append(_check_config())
22
+
23
+ # 2. DuckDB file writable
24
+ checks.append(_check_db(config))
25
+
26
+ # 3. Ingest secret set
27
+ checks.append(_check_ingest_secret(config))
28
+
29
+ # 4. Prometheus configured
30
+ checks.append(_check_prometheus(config))
31
+
32
+ # 5. Schema validation vs capture
33
+ checks.append(_check_schema_vs_capture(config))
34
+
35
+ # 6. Drift configured but inactive
36
+ checks.append(_check_drift_inactive(config, ctx.obj["db"]))
37
+
38
+ # 7. Webhook URL security
39
+ checks.extend(_check_webhook_security(config))
40
+
41
+ # 8. Webhook domain allowlist
42
+ checks.extend(_check_webhook_allowlist(config))
43
+
44
+ if output_json:
45
+ click.echo(json.dumps(checks, default=str))
46
+ else:
47
+ for c in checks:
48
+ _print_check(c)
49
+
50
+ has_errors = any(c["level"] == "error" for c in checks)
51
+ has_warnings = any(c["level"] == "warning" for c in checks)
52
+ if has_errors:
53
+ ctx.exit(2)
54
+ elif has_warnings:
55
+ ctx.exit(1)
56
+ else:
57
+ ctx.exit(0)
58
+
59
+
60
+ def _check_config() -> dict:
61
+ try:
62
+ path = find_config_file()
63
+ if path is None:
64
+ return {"name": "Config file", "level": "error",
65
+ "message": "No config file found. Run `tj onboard` to create one."}
66
+ load_config(str(path))
67
+ return {"name": "Config file", "level": "ok",
68
+ "message": f"Found and valid: {path}"}
69
+ except Exception as e:
70
+ return {"name": "Config file", "level": "error",
71
+ "message": f"Config parse error: {e}"}
72
+
73
+
74
+ def _check_db(config: object) -> dict:
75
+ try:
76
+ from pathlib import Path
77
+ db_path = Path(config.storage.path).expanduser()
78
+ conn = duckdb.connect(str(db_path))
79
+ conn.close()
80
+ return {"name": "DuckDB writable", "level": "ok",
81
+ "message": f"Database accessible: {db_path}"}
82
+ except Exception as e:
83
+ return {"name": "DuckDB writable", "level": "error",
84
+ "message": f"Cannot open database: {e}"}
85
+
86
+
87
+ def _check_ingest_secret(config: object) -> dict:
88
+ if config.security.ingest_secret:
89
+ return {"name": "Ingest secret", "level": "ok",
90
+ "message": "Ingest secret is configured."}
91
+ return {"name": "Ingest secret", "level": "warning",
92
+ "message": "No ingest secret set. API ingest endpoint is unprotected."}
93
+
94
+
95
+ def _check_prometheus(config: object) -> dict:
96
+ if config.export.prometheus.enabled:
97
+ return {"name": "Prometheus", "level": "ok",
98
+ "message": f"Enabled on port {config.export.prometheus.port}"}
99
+ return {"name": "Prometheus", "level": "info",
100
+ "message": "Prometheus export disabled."}
101
+
102
+
103
+ def _check_schema_vs_capture(config: object) -> dict:
104
+ has_schema = any(
105
+ ac.output_schema for ac in config.agents.values()
106
+ )
107
+ if has_schema and not config.capture.tool_outputs:
108
+ return {"name": "Schema vs capture", "level": "warning",
109
+ "message": "Agent has output_schema but capture.tool_outputs is false. "
110
+ "Schema validation will have no data to validate."}
111
+ return {"name": "Schema vs capture", "level": "ok",
112
+ "message": "Schema and capture settings are consistent."}
113
+
114
+
115
+ def _check_drift_inactive(config: object, db: object) -> dict:
116
+ for agent_id, ac in config.agents.items():
117
+ if ac.drift.enabled:
118
+ count = db.get_completed_session_count(agent_id)
119
+ if count < ac.drift.baseline_sessions:
120
+ return {"name": "Drift detection", "level": "warning",
121
+ "message": f"Agent '{agent_id}' has drift enabled but only "
122
+ f"{count}/{ac.drift.baseline_sessions} baseline sessions."}
123
+ return {"name": "Drift detection", "level": "ok",
124
+ "message": "Drift detection status is consistent."}
125
+
126
+
127
+ def _check_webhook_security(config: object) -> list[dict]:
128
+ results = []
129
+ for ch in config.alerts.channels:
130
+ url = ch.url or ch.webhook_url
131
+ if url and not url.startswith("https://") and not _is_local_url(url):
132
+ results.append({
133
+ "name": "Webhook security",
134
+ "level": "warning",
135
+ "message": f"Non-HTTPS, non-local webhook URL: {url}",
136
+ })
137
+ if not results:
138
+ results.append({"name": "Webhook security", "level": "ok",
139
+ "message": "All webhook URLs are secure or local."})
140
+ return results
141
+
142
+
143
+ def _check_webhook_allowlist(config: object) -> list[dict]:
144
+ allowed = config.security.webhook_allowed_domains
145
+ if not allowed:
146
+ return []
147
+ results = []
148
+ for ch in config.alerts.channels:
149
+ url = ch.url or ch.webhook_url
150
+ if url:
151
+ from urllib.parse import urlparse
152
+ domain = urlparse(url).hostname
153
+ if domain and domain not in allowed:
154
+ results.append({
155
+ "name": "Webhook allowlist",
156
+ "level": "error",
157
+ "message": f"Webhook domain '{domain}' not in allowed list.",
158
+ })
159
+ return results
160
+
161
+
162
+ def _is_local_url(url: str) -> bool:
163
+ from urllib.parse import urlparse
164
+ hostname = urlparse(url).hostname
165
+ return hostname in ("localhost", "127.0.0.1", "::1", "0.0.0.0") if hostname else False
166
+
167
+
168
+ def _print_check(check: dict) -> None:
169
+ level = check["level"]
170
+ icons = {"ok": "[green]\u2713[/green]", "warning": "[yellow]\u26a0[/yellow]",
171
+ "error": "[red]\u2717[/red]", "info": "[blue]i[/blue]"}
172
+ icon = icons.get(level, "?")
173
+ console.print(f" {icon} {check['name']}: {check['message']}")
@@ -0,0 +1,238 @@
1
+ """tj drift — show behavioral drift baselines and Z-scores."""
2
+ from __future__ import annotations
3
+
4
+ import json as json_mod
5
+
6
+ import click
7
+ from rich.table import Table
8
+
9
+ from tokenjam.core.drift import evaluate_drift
10
+ from tokenjam.utils.formatting import console
11
+
12
+
13
+ @click.command("drift")
14
+ @click.option("--agent", default=None, help="Filter to specific agent_id")
15
+ @click.option("--json", "output_json", is_flag=True, help="JSON output")
16
+ @click.pass_context
17
+ def cmd_drift(ctx: click.Context, agent: str | None, output_json: bool) -> None:
18
+ """Show drift baselines and Z-scores for recent sessions."""
19
+ db = ctx.obj["db"]
20
+ config = ctx.obj["config"]
21
+ agent_filter = agent or ctx.obj.get("agent")
22
+
23
+ # Discover agents with baselines
24
+ if agent_filter:
25
+ agent_ids = [agent_filter]
26
+ elif hasattr(db, "conn"):
27
+ rows = db.conn.execute(
28
+ "SELECT DISTINCT agent_id FROM drift_baselines ORDER BY agent_id"
29
+ ).fetchall()
30
+ agent_ids = [r[0] for r in rows]
31
+ else:
32
+ agent_ids = []
33
+
34
+ if not agent_ids:
35
+ if output_json:
36
+ click.echo(json_mod.dumps({"agents": [], "drifted": False}))
37
+ else:
38
+ console.print(
39
+ "[dim]No drift baselines found. "
40
+ "Need at least 10 completed sessions to build a baseline.[/dim]"
41
+ )
42
+ ctx.exit(0)
43
+ return
44
+
45
+ all_results = []
46
+ any_drifted = False
47
+
48
+ for aid in agent_ids:
49
+ baseline = db.get_baseline(aid)
50
+ if baseline is None:
51
+ continue
52
+
53
+ sessions = db.get_completed_sessions(aid, limit=1)
54
+ if not sessions:
55
+ continue
56
+ latest = sessions[0]
57
+
58
+ agent_cfg = config.agents.get(aid)
59
+ threshold = agent_cfg.drift.token_threshold if agent_cfg else 2.0
60
+ seq_threshold = agent_cfg.drift.tool_sequence_diff if agent_cfg else 0.4
61
+
62
+ result = evaluate_drift(
63
+ session=latest,
64
+ baseline=baseline,
65
+ config_threshold=threshold,
66
+ sequence_diff_threshold=seq_threshold,
67
+ db=db,
68
+ )
69
+
70
+ if result.drifted:
71
+ any_drifted = True
72
+
73
+ agent_data = {
74
+ "agent_id": aid,
75
+ "baseline_sessions": baseline.sessions_sampled,
76
+ "drifted": result.drifted,
77
+ "violations": [
78
+ {
79
+ "dimension": v.dimension,
80
+ "z_score": v.z_score,
81
+ "expected": v.expected,
82
+ "observed": v.observed,
83
+ "detail": v.detail,
84
+ }
85
+ for v in result.violations
86
+ ],
87
+ "metrics": _build_metrics(baseline, latest, result, threshold),
88
+ }
89
+ all_results.append(agent_data)
90
+
91
+ if not output_json:
92
+ _print_drift_table(aid, baseline, latest, result, threshold, seq_threshold)
93
+
94
+ if output_json:
95
+ click.echo(json_mod.dumps(
96
+ {"agents": all_results, "drifted": any_drifted},
97
+ default=str,
98
+ ))
99
+
100
+ ctx.exit(1 if any_drifted else 0)
101
+
102
+
103
+ def _build_metrics(baseline, session, result, threshold: float) -> list[dict]:
104
+ """Return per-dimension metric dicts for JSON output."""
105
+ from tokenjam.core.drift import z_score
106
+
107
+ violated_dims = {v.dimension for v in result.violations}
108
+ metrics = []
109
+
110
+ def _add(dimension: str, mean, stddev, current) -> None:
111
+ if mean is None or stddev is None:
112
+ return
113
+ z = z_score(float(current), float(mean), float(stddev))
114
+ metrics.append({
115
+ "dimension": dimension,
116
+ "baseline_mean": mean,
117
+ "baseline_stddev": stddev,
118
+ "current_value": current,
119
+ "z_score": z,
120
+ "status": "DRIFT" if dimension in violated_dims else "ok",
121
+ })
122
+
123
+ _add("input_tokens", baseline.avg_input_tokens, baseline.stddev_input_tokens,
124
+ session.input_tokens)
125
+ _add("output_tokens", baseline.avg_output_tokens, baseline.stddev_output_tokens,
126
+ session.output_tokens)
127
+ if session.duration_seconds is not None:
128
+ _add("session_duration", baseline.avg_session_duration_s,
129
+ baseline.stddev_session_duration, session.duration_seconds)
130
+ _add("tool_call_count", baseline.avg_tool_call_count, baseline.stddev_tool_call_count,
131
+ session.tool_call_count)
132
+
133
+ # tool_sequence is special (Jaccard, no z-score)
134
+ if "tool_sequence" in violated_dims:
135
+ seq_viol = next((v for v in result.violations if v.dimension == "tool_sequence"), None)
136
+ if seq_viol:
137
+ metrics.append({
138
+ "dimension": "tool_sequence",
139
+ "baseline_mean": None,
140
+ "baseline_stddev": None,
141
+ "current_value": seq_viol.observed,
142
+ "z_score": None,
143
+ "status": "DRIFT",
144
+ })
145
+
146
+ return metrics
147
+
148
+
149
+ def _print_drift_table(aid, baseline, session, result, threshold: float, seq_threshold: float = 0.4) -> None:
150
+ """Render a Rich table for a single agent's drift state."""
151
+ from tokenjam.core.drift import z_score
152
+
153
+ violated_dims = {v.dimension for v in result.violations}
154
+ status_label = "[bold red]DRIFTED[/bold red]" if result.drifted else "[green]ok[/green]"
155
+
156
+ console.print()
157
+ console.print(
158
+ f"[bold]Agent:[/bold] {aid} | "
159
+ f"[bold]Baseline:[/bold] {baseline.sessions_sampled} sessions | "
160
+ f"[bold]Status:[/bold] {status_label}"
161
+ )
162
+ console.print()
163
+
164
+ table = Table(show_header=True, header_style="bold")
165
+ table.add_column("Dimension", style="dim")
166
+ table.add_column("Baseline")
167
+ table.add_column("Current")
168
+ table.add_column("Z-Score", justify="right")
169
+ table.add_column("Status")
170
+
171
+ def _z_color(z: float | None) -> str:
172
+ if z is None:
173
+ return "--"
174
+ az = abs(z)
175
+ if az < 1.0:
176
+ return f"[green]{z:.2f}[/green]"
177
+ if az <= threshold:
178
+ return f"[yellow]{z:.2f}[/yellow]"
179
+ return f"[red]{z:.2f}[/red]"
180
+
181
+ def _status_cell(dimension: str) -> str:
182
+ if dimension in violated_dims:
183
+ return "[bold red]DRIFT[/bold red]"
184
+ return "[green]ok[/green]"
185
+
186
+ def _add_row(dimension: str, mean, stddev, current, fmt_baseline: str, fmt_current: str) -> None:
187
+ z = z_score(float(current), float(mean), float(stddev)) if (mean is not None and stddev is not None) else None
188
+ table.add_row(dimension, fmt_baseline, fmt_current, _z_color(z), _status_cell(dimension))
189
+
190
+ if baseline.avg_input_tokens is not None and baseline.stddev_input_tokens is not None:
191
+ _add_row(
192
+ "input_tokens",
193
+ baseline.avg_input_tokens, baseline.stddev_input_tokens, session.input_tokens,
194
+ f"{baseline.avg_input_tokens:,.0f} +/- {baseline.stddev_input_tokens:,.0f}",
195
+ f"{session.input_tokens:,}",
196
+ )
197
+ if baseline.avg_output_tokens is not None and baseline.stddev_output_tokens is not None:
198
+ _add_row(
199
+ "output_tokens",
200
+ baseline.avg_output_tokens, baseline.stddev_output_tokens, session.output_tokens,
201
+ f"{baseline.avg_output_tokens:,.0f} +/- {baseline.stddev_output_tokens:,.0f}",
202
+ f"{session.output_tokens:,}",
203
+ )
204
+ if (
205
+ session.duration_seconds is not None
206
+ and baseline.avg_session_duration_s is not None
207
+ and baseline.stddev_session_duration is not None
208
+ ):
209
+ _add_row(
210
+ "session_duration",
211
+ baseline.avg_session_duration_s, baseline.stddev_session_duration,
212
+ session.duration_seconds,
213
+ f"{baseline.avg_session_duration_s:.1f}s +/- {baseline.stddev_session_duration:.1f}s",
214
+ f"{session.duration_seconds:.1f}s",
215
+ )
216
+ if baseline.avg_tool_call_count is not None and baseline.stddev_tool_call_count is not None:
217
+ _add_row(
218
+ "tool_call_count",
219
+ baseline.avg_tool_call_count, baseline.stddev_tool_call_count, session.tool_call_count,
220
+ f"{baseline.avg_tool_call_count:.0f} +/- {baseline.stddev_tool_call_count:.0f}",
221
+ str(session.tool_call_count),
222
+ )
223
+
224
+ # Tool sequence row (Jaccard, no z-score)
225
+ seq_viol = next((v for v in result.violations if v.dimension == "tool_sequence"), None)
226
+ if seq_viol:
227
+ table.add_row(
228
+ "tool_sequence",
229
+ seq_viol.expected or "",
230
+ seq_viol.observed or "",
231
+ "--",
232
+ "[bold red]DRIFT[/bold red]",
233
+ )
234
+ elif baseline.common_tool_sequences:
235
+ min_sim = 1.0 - seq_threshold
236
+ table.add_row("tool_sequence", f"similarity >= {min_sim:.2f}", "--", "--", "[green]ok[/green]")
237
+
238
+ console.print(table)