tokenjam 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenjam/__init__.py +1 -0
- tokenjam/api/__init__.py +0 -0
- tokenjam/api/app.py +104 -0
- tokenjam/api/deps.py +18 -0
- tokenjam/api/middleware.py +28 -0
- tokenjam/api/routes/__init__.py +0 -0
- tokenjam/api/routes/agents.py +33 -0
- tokenjam/api/routes/alerts.py +77 -0
- tokenjam/api/routes/budget.py +96 -0
- tokenjam/api/routes/cost.py +43 -0
- tokenjam/api/routes/drift.py +63 -0
- tokenjam/api/routes/logs.py +511 -0
- tokenjam/api/routes/metrics.py +81 -0
- tokenjam/api/routes/otlp.py +63 -0
- tokenjam/api/routes/spans.py +202 -0
- tokenjam/api/routes/status.py +84 -0
- tokenjam/api/routes/tools.py +22 -0
- tokenjam/api/routes/traces.py +92 -0
- tokenjam/cli/__init__.py +0 -0
- tokenjam/cli/cmd_alerts.py +94 -0
- tokenjam/cli/cmd_budget.py +119 -0
- tokenjam/cli/cmd_cost.py +90 -0
- tokenjam/cli/cmd_demo.py +82 -0
- tokenjam/cli/cmd_doctor.py +173 -0
- tokenjam/cli/cmd_drift.py +238 -0
- tokenjam/cli/cmd_export.py +200 -0
- tokenjam/cli/cmd_mcp.py +78 -0
- tokenjam/cli/cmd_onboard.py +779 -0
- tokenjam/cli/cmd_serve.py +85 -0
- tokenjam/cli/cmd_status.py +153 -0
- tokenjam/cli/cmd_stop.py +87 -0
- tokenjam/cli/cmd_tools.py +45 -0
- tokenjam/cli/cmd_traces.py +161 -0
- tokenjam/cli/cmd_uninstall.py +159 -0
- tokenjam/cli/main.py +110 -0
- tokenjam/core/__init__.py +0 -0
- tokenjam/core/alerts.py +619 -0
- tokenjam/core/api_backend.py +235 -0
- tokenjam/core/config.py +360 -0
- tokenjam/core/cost.py +102 -0
- tokenjam/core/db.py +718 -0
- tokenjam/core/drift.py +256 -0
- tokenjam/core/ingest.py +265 -0
- tokenjam/core/models.py +225 -0
- tokenjam/core/pricing.py +54 -0
- tokenjam/core/retention.py +21 -0
- tokenjam/core/schema_validator.py +156 -0
- tokenjam/demo/__init__.py +0 -0
- tokenjam/demo/env.py +96 -0
- tokenjam/mcp/__init__.py +0 -0
- tokenjam/mcp/server.py +1067 -0
- tokenjam/otel/__init__.py +0 -0
- tokenjam/otel/exporters.py +26 -0
- tokenjam/otel/provider.py +207 -0
- tokenjam/otel/semconv.py +144 -0
- tokenjam/pricing/models.toml +70 -0
- tokenjam/py.typed +0 -0
- tokenjam/sdk/__init__.py +21 -0
- tokenjam/sdk/agent.py +206 -0
- tokenjam/sdk/bootstrap.py +120 -0
- tokenjam/sdk/http_exporter.py +109 -0
- tokenjam/sdk/integrations/__init__.py +0 -0
- tokenjam/sdk/integrations/anthropic.py +200 -0
- tokenjam/sdk/integrations/autogen.py +97 -0
- tokenjam/sdk/integrations/base.py +27 -0
- tokenjam/sdk/integrations/bedrock.py +103 -0
- tokenjam/sdk/integrations/crewai.py +96 -0
- tokenjam/sdk/integrations/gemini.py +131 -0
- tokenjam/sdk/integrations/langchain.py +156 -0
- tokenjam/sdk/integrations/langgraph.py +101 -0
- tokenjam/sdk/integrations/litellm.py +323 -0
- tokenjam/sdk/integrations/llamaindex.py +52 -0
- tokenjam/sdk/integrations/nemoclaw.py +139 -0
- tokenjam/sdk/integrations/openai.py +159 -0
- tokenjam/sdk/integrations/openai_agents_sdk.py +47 -0
- tokenjam/sdk/transport.py +98 -0
- tokenjam/ui/index.html +1213 -0
- tokenjam/utils/__init__.py +0 -0
- tokenjam/utils/formatting.py +43 -0
- tokenjam/utils/ids.py +15 -0
- tokenjam/utils/time_parse.py +54 -0
- tokenjam-0.2.0.dist-info/METADATA +622 -0
- tokenjam-0.2.0.dist-info/RECORD +86 -0
- tokenjam-0.2.0.dist-info/WHEEL +4 -0
- tokenjam-0.2.0.dist-info/entry_points.txt +2 -0
- tokenjam-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,511 @@
|
|
|
1
|
+
"""Log-to-span converter for Claude Code OTLP log events."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import hashlib
|
|
5
|
+
import logging
|
|
6
|
+
from datetime import datetime, timedelta, timezone
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from tokenjam.core.ingest import IngestPipeline, SpanRejectedError
|
|
10
|
+
from tokenjam.core.models import NormalizedSpan, SpanKind, SpanStatus
|
|
11
|
+
from tokenjam.otel.semconv import ClaudeCodeEvents, CodexEvents, GenAIAttributes
|
|
12
|
+
from tokenjam.utils.ids import new_span_id
|
|
13
|
+
from tokenjam.api.routes.spans import _otlp_value, _safe_int
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _trace_id_from_session(session_id: str) -> str:
|
|
19
|
+
"""Deterministic 32-hex-char trace ID from session.id."""
|
|
20
|
+
return hashlib.md5(session_id.encode()).hexdigest()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _span_id_from_prompt(prompt_id: str) -> str:
|
|
24
|
+
"""Deterministic 16-hex-char span ID from prompt.id.
|
|
25
|
+
Used as parent_span_id for tool/api spans within a turn,
|
|
26
|
+
and as span_id for the user_prompt span itself."""
|
|
27
|
+
return hashlib.md5(prompt_id.encode()).hexdigest()[:16]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _parse_attrs(raw_attrs: list[dict]) -> dict[str, Any]:
|
|
31
|
+
"""Convert OTLP attribute list to a flat dict."""
|
|
32
|
+
attrs: dict[str, Any] = {}
|
|
33
|
+
for attr in raw_attrs:
|
|
34
|
+
key = attr.get("key", "")
|
|
35
|
+
value = _otlp_value(attr.get("value", {}))
|
|
36
|
+
if key and value is not None:
|
|
37
|
+
attrs[key] = value
|
|
38
|
+
return attrs
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _ts_to_datetime(timestamp_ns: int) -> datetime:
|
|
42
|
+
return datetime.fromtimestamp(timestamp_ns / 1e9, tz=timezone.utc)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _api_request_to_span(
|
|
46
|
+
attrs: dict[str, Any],
|
|
47
|
+
resource_attrs: dict[str, Any],
|
|
48
|
+
timestamp_ns: int,
|
|
49
|
+
) -> NormalizedSpan:
|
|
50
|
+
session_id = str(attrs[ClaudeCodeEvents.SESSION_ID])
|
|
51
|
+
prompt_id = attrs.get(ClaudeCodeEvents.PROMPT_ID)
|
|
52
|
+
duration_ms = float(attrs[ClaudeCodeEvents.DURATION_MS])
|
|
53
|
+
start_time = _ts_to_datetime(timestamp_ns)
|
|
54
|
+
end_time = start_time + timedelta(milliseconds=duration_ms)
|
|
55
|
+
|
|
56
|
+
extra_attrs: dict[str, Any] = {}
|
|
57
|
+
for key in (
|
|
58
|
+
ClaudeCodeEvents.SPEED,
|
|
59
|
+
ClaudeCodeEvents.CACHE_CREATION_TOKENS,
|
|
60
|
+
ClaudeCodeEvents.EVENT_SEQUENCE,
|
|
61
|
+
):
|
|
62
|
+
if key in attrs:
|
|
63
|
+
extra_attrs[key] = attrs[key]
|
|
64
|
+
|
|
65
|
+
return NormalizedSpan(
|
|
66
|
+
span_id=new_span_id(),
|
|
67
|
+
trace_id=_trace_id_from_session(session_id),
|
|
68
|
+
name=GenAIAttributes.SPAN_LLM_CALL,
|
|
69
|
+
kind=SpanKind.CLIENT,
|
|
70
|
+
status_code=SpanStatus.OK,
|
|
71
|
+
start_time=start_time,
|
|
72
|
+
end_time=end_time,
|
|
73
|
+
duration_ms=duration_ms,
|
|
74
|
+
agent_id=resource_attrs.get("service.name", "claude-code"),
|
|
75
|
+
session_id=session_id,
|
|
76
|
+
conversation_id=prompt_id,
|
|
77
|
+
parent_span_id=_span_id_from_prompt(prompt_id) if prompt_id else None,
|
|
78
|
+
provider="anthropic",
|
|
79
|
+
model=str(attrs["model"]) if "model" in attrs else None,
|
|
80
|
+
input_tokens=_safe_int(attrs.get(ClaudeCodeEvents.INPUT_TOKENS)),
|
|
81
|
+
output_tokens=_safe_int(attrs.get(ClaudeCodeEvents.OUTPUT_TOKENS)),
|
|
82
|
+
cache_tokens=_safe_int(attrs.get(ClaudeCodeEvents.CACHE_READ_TOKENS, 0)),
|
|
83
|
+
cost_usd=float(attrs[ClaudeCodeEvents.COST_USD]) if ClaudeCodeEvents.COST_USD in attrs else None,
|
|
84
|
+
attributes=extra_attrs,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _tool_result_to_span(
|
|
89
|
+
attrs: dict[str, Any],
|
|
90
|
+
resource_attrs: dict[str, Any],
|
|
91
|
+
timestamp_ns: int,
|
|
92
|
+
) -> NormalizedSpan:
|
|
93
|
+
session_id = str(attrs[ClaudeCodeEvents.SESSION_ID])
|
|
94
|
+
prompt_id = attrs.get(ClaudeCodeEvents.PROMPT_ID)
|
|
95
|
+
duration_ms = float(attrs[ClaudeCodeEvents.DURATION_MS])
|
|
96
|
+
start_time = _ts_to_datetime(timestamp_ns)
|
|
97
|
+
end_time = start_time + timedelta(milliseconds=duration_ms)
|
|
98
|
+
|
|
99
|
+
success_val = attrs.get(ClaudeCodeEvents.SUCCESS)
|
|
100
|
+
# Claude Code sends success as a boolean or the string "true"
|
|
101
|
+
if isinstance(success_val, bool):
|
|
102
|
+
ok = success_val
|
|
103
|
+
else:
|
|
104
|
+
ok = str(success_val).lower() == "true"
|
|
105
|
+
|
|
106
|
+
status_code = SpanStatus.OK if ok else SpanStatus.ERROR
|
|
107
|
+
status_message = attrs.get(ClaudeCodeEvents.ERROR) if not ok else None
|
|
108
|
+
|
|
109
|
+
extra_attrs: dict[str, Any] = {}
|
|
110
|
+
for key in (
|
|
111
|
+
ClaudeCodeEvents.TOOL_PARAMETERS,
|
|
112
|
+
ClaudeCodeEvents.TOOL_INPUT,
|
|
113
|
+
ClaudeCodeEvents.DECISION_TYPE,
|
|
114
|
+
ClaudeCodeEvents.TOOL_RESULT_SIZE,
|
|
115
|
+
ClaudeCodeEvents.EVENT_SEQUENCE,
|
|
116
|
+
):
|
|
117
|
+
if key in attrs:
|
|
118
|
+
extra_attrs[key] = attrs[key]
|
|
119
|
+
|
|
120
|
+
return NormalizedSpan(
|
|
121
|
+
span_id=new_span_id(),
|
|
122
|
+
trace_id=_trace_id_from_session(session_id),
|
|
123
|
+
name=GenAIAttributes.SPAN_TOOL_CALL,
|
|
124
|
+
kind=SpanKind.INTERNAL,
|
|
125
|
+
status_code=status_code,
|
|
126
|
+
status_message=status_message,
|
|
127
|
+
start_time=start_time,
|
|
128
|
+
end_time=end_time,
|
|
129
|
+
duration_ms=duration_ms,
|
|
130
|
+
agent_id=resource_attrs.get("service.name", "claude-code"),
|
|
131
|
+
session_id=session_id,
|
|
132
|
+
conversation_id=prompt_id,
|
|
133
|
+
parent_span_id=_span_id_from_prompt(prompt_id) if prompt_id else None,
|
|
134
|
+
tool_name=str(attrs[ClaudeCodeEvents.TOOL_NAME]),
|
|
135
|
+
attributes=extra_attrs,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _api_error_to_span(
|
|
140
|
+
attrs: dict[str, Any],
|
|
141
|
+
resource_attrs: dict[str, Any],
|
|
142
|
+
timestamp_ns: int,
|
|
143
|
+
) -> NormalizedSpan:
|
|
144
|
+
session_id = str(attrs[ClaudeCodeEvents.SESSION_ID])
|
|
145
|
+
prompt_id = attrs.get(ClaudeCodeEvents.PROMPT_ID)
|
|
146
|
+
duration_ms = float(attrs[ClaudeCodeEvents.DURATION_MS])
|
|
147
|
+
start_time = _ts_to_datetime(timestamp_ns)
|
|
148
|
+
end_time = start_time + timedelta(milliseconds=duration_ms)
|
|
149
|
+
|
|
150
|
+
extra_attrs: dict[str, Any] = {}
|
|
151
|
+
for key in (
|
|
152
|
+
ClaudeCodeEvents.STATUS_CODE_HTTP,
|
|
153
|
+
ClaudeCodeEvents.ATTEMPT,
|
|
154
|
+
ClaudeCodeEvents.EVENT_SEQUENCE,
|
|
155
|
+
):
|
|
156
|
+
if key in attrs:
|
|
157
|
+
extra_attrs[key] = attrs[key]
|
|
158
|
+
|
|
159
|
+
return NormalizedSpan(
|
|
160
|
+
span_id=new_span_id(),
|
|
161
|
+
trace_id=_trace_id_from_session(session_id),
|
|
162
|
+
name=GenAIAttributes.SPAN_LLM_CALL,
|
|
163
|
+
kind=SpanKind.CLIENT,
|
|
164
|
+
status_code=SpanStatus.ERROR,
|
|
165
|
+
status_message=str(attrs[ClaudeCodeEvents.ERROR]),
|
|
166
|
+
start_time=start_time,
|
|
167
|
+
end_time=end_time,
|
|
168
|
+
duration_ms=duration_ms,
|
|
169
|
+
agent_id=resource_attrs.get("service.name", "claude-code"),
|
|
170
|
+
session_id=session_id,
|
|
171
|
+
conversation_id=prompt_id,
|
|
172
|
+
parent_span_id=_span_id_from_prompt(prompt_id) if prompt_id else None,
|
|
173
|
+
provider="anthropic",
|
|
174
|
+
model=str(attrs["model"]) if "model" in attrs else None,
|
|
175
|
+
attributes=extra_attrs,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _user_prompt_to_span(
|
|
180
|
+
attrs: dict[str, Any],
|
|
181
|
+
resource_attrs: dict[str, Any],
|
|
182
|
+
timestamp_ns: int,
|
|
183
|
+
) -> NormalizedSpan:
|
|
184
|
+
session_id = str(attrs[ClaudeCodeEvents.SESSION_ID])
|
|
185
|
+
prompt_id = attrs.get(ClaudeCodeEvents.PROMPT_ID)
|
|
186
|
+
start_time = _ts_to_datetime(timestamp_ns)
|
|
187
|
+
|
|
188
|
+
extra_attrs: dict[str, Any] = {}
|
|
189
|
+
for key in ("prompt_length", ClaudeCodeEvents.EVENT_SEQUENCE):
|
|
190
|
+
if key in attrs:
|
|
191
|
+
extra_attrs[key] = attrs[key]
|
|
192
|
+
|
|
193
|
+
return NormalizedSpan(
|
|
194
|
+
span_id=_span_id_from_prompt(prompt_id) if prompt_id else new_span_id(),
|
|
195
|
+
trace_id=_trace_id_from_session(session_id),
|
|
196
|
+
name=GenAIAttributes.SPAN_INVOKE_AGENT,
|
|
197
|
+
kind=SpanKind.SERVER,
|
|
198
|
+
status_code=SpanStatus.OK,
|
|
199
|
+
start_time=start_time,
|
|
200
|
+
end_time=start_time,
|
|
201
|
+
agent_id=resource_attrs.get("service.name", "claude-code"),
|
|
202
|
+
session_id=session_id,
|
|
203
|
+
conversation_id=prompt_id,
|
|
204
|
+
attributes=extra_attrs,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _tool_decision_to_span(
|
|
209
|
+
attrs: dict[str, Any],
|
|
210
|
+
resource_attrs: dict[str, Any],
|
|
211
|
+
timestamp_ns: int,
|
|
212
|
+
) -> NormalizedSpan:
|
|
213
|
+
session_id = str(attrs[ClaudeCodeEvents.SESSION_ID])
|
|
214
|
+
prompt_id = attrs.get(ClaudeCodeEvents.PROMPT_ID)
|
|
215
|
+
start_time = _ts_to_datetime(timestamp_ns)
|
|
216
|
+
|
|
217
|
+
extra_attrs: dict[str, Any] = {}
|
|
218
|
+
for key in (
|
|
219
|
+
ClaudeCodeEvents.DECISION,
|
|
220
|
+
ClaudeCodeEvents.DECISION_SOURCE,
|
|
221
|
+
ClaudeCodeEvents.EVENT_SEQUENCE,
|
|
222
|
+
):
|
|
223
|
+
if key in attrs:
|
|
224
|
+
extra_attrs[key] = attrs[key]
|
|
225
|
+
|
|
226
|
+
return NormalizedSpan(
|
|
227
|
+
span_id=new_span_id(),
|
|
228
|
+
trace_id=_trace_id_from_session(session_id),
|
|
229
|
+
name="tool_decision",
|
|
230
|
+
kind=SpanKind.INTERNAL,
|
|
231
|
+
status_code=SpanStatus.OK,
|
|
232
|
+
start_time=start_time,
|
|
233
|
+
agent_id=resource_attrs.get("service.name", "claude-code"),
|
|
234
|
+
session_id=session_id,
|
|
235
|
+
conversation_id=prompt_id,
|
|
236
|
+
tool_name=str(attrs[ClaudeCodeEvents.TOOL_NAME]),
|
|
237
|
+
attributes=extra_attrs,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _codex_api_request_to_span(
|
|
242
|
+
attrs: dict[str, Any],
|
|
243
|
+
resource_attrs: dict[str, Any],
|
|
244
|
+
timestamp_ns: int,
|
|
245
|
+
) -> "NormalizedSpan | None":
|
|
246
|
+
"""Only convert api_request events that carry an error; skip successful ones.
|
|
247
|
+
|
|
248
|
+
Token counts live on codex.sse_event (kind=completion), so successful
|
|
249
|
+
api_request events are redundant — _codex_sse_event_to_span captures them.
|
|
250
|
+
"""
|
|
251
|
+
error = attrs.get(CodexEvents.ERROR_MESSAGE)
|
|
252
|
+
if not error:
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
conversation_id = str(attrs.get(CodexEvents.CONVERSATION_ID, "unknown"))
|
|
256
|
+
duration_ms = float(attrs.get(CodexEvents.DURATION_MS, 0))
|
|
257
|
+
start_time = _ts_to_datetime(timestamp_ns)
|
|
258
|
+
end_time = start_time + timedelta(milliseconds=duration_ms)
|
|
259
|
+
|
|
260
|
+
extra_attrs: dict[str, Any] = {}
|
|
261
|
+
for key in (CodexEvents.HTTP_STATUS, CodexEvents.ATTEMPT):
|
|
262
|
+
if key in attrs:
|
|
263
|
+
extra_attrs[key] = attrs[key]
|
|
264
|
+
|
|
265
|
+
return NormalizedSpan(
|
|
266
|
+
span_id=new_span_id(),
|
|
267
|
+
trace_id=_trace_id_from_session(conversation_id),
|
|
268
|
+
name=GenAIAttributes.SPAN_LLM_CALL,
|
|
269
|
+
kind=SpanKind.CLIENT,
|
|
270
|
+
status_code=SpanStatus.ERROR,
|
|
271
|
+
status_message=error,
|
|
272
|
+
start_time=start_time,
|
|
273
|
+
end_time=end_time,
|
|
274
|
+
duration_ms=duration_ms,
|
|
275
|
+
agent_id=resource_attrs.get("service.name", "codex-cli"),
|
|
276
|
+
session_id=conversation_id,
|
|
277
|
+
conversation_id=conversation_id,
|
|
278
|
+
provider="openai",
|
|
279
|
+
model=str(attrs["model"]) if "model" in attrs else None,
|
|
280
|
+
attributes=extra_attrs,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _codex_sse_event_to_span(
|
|
285
|
+
attrs: dict[str, Any],
|
|
286
|
+
resource_attrs: dict[str, Any],
|
|
287
|
+
timestamp_ns: int,
|
|
288
|
+
) -> "NormalizedSpan | None":
|
|
289
|
+
"""Convert SSE completion events to LLM call spans.
|
|
290
|
+
|
|
291
|
+
Codex emits one sse_event per SSE chunk; only the final chunk has
|
|
292
|
+
event.kind == "completion" and carries token counts. All other
|
|
293
|
+
kinds (e.g. "content_block_delta") are skipped.
|
|
294
|
+
"""
|
|
295
|
+
if attrs.get(CodexEvents.EVENT_KIND) != "response.completed":
|
|
296
|
+
return None
|
|
297
|
+
|
|
298
|
+
conversation_id = str(attrs.get(CodexEvents.CONVERSATION_ID, "unknown"))
|
|
299
|
+
duration_ms = float(attrs.get(CodexEvents.DURATION_MS, 0))
|
|
300
|
+
start_time = _ts_to_datetime(timestamp_ns)
|
|
301
|
+
end_time = start_time + timedelta(milliseconds=duration_ms)
|
|
302
|
+
|
|
303
|
+
extra_attrs: dict[str, Any] = {}
|
|
304
|
+
for key in (CodexEvents.REASONING_TOKEN_COUNT, CodexEvents.TOOL_TOKEN_COUNT):
|
|
305
|
+
if key in attrs:
|
|
306
|
+
extra_attrs[key] = attrs[key]
|
|
307
|
+
|
|
308
|
+
return NormalizedSpan(
|
|
309
|
+
span_id=new_span_id(),
|
|
310
|
+
trace_id=_trace_id_from_session(conversation_id),
|
|
311
|
+
name=GenAIAttributes.SPAN_LLM_CALL,
|
|
312
|
+
kind=SpanKind.CLIENT,
|
|
313
|
+
status_code=SpanStatus.OK,
|
|
314
|
+
start_time=start_time,
|
|
315
|
+
end_time=end_time,
|
|
316
|
+
duration_ms=duration_ms,
|
|
317
|
+
agent_id=resource_attrs.get("service.name", "codex-cli"),
|
|
318
|
+
session_id=conversation_id,
|
|
319
|
+
conversation_id=conversation_id,
|
|
320
|
+
provider="openai",
|
|
321
|
+
model=str(attrs["model"]) if "model" in attrs else None,
|
|
322
|
+
input_tokens=_safe_int(attrs.get(CodexEvents.INPUT_TOKEN_COUNT)),
|
|
323
|
+
output_tokens=_safe_int(attrs.get(CodexEvents.OUTPUT_TOKEN_COUNT)),
|
|
324
|
+
cache_tokens=_safe_int(attrs.get(CodexEvents.CACHED_TOKEN_COUNT, 0)),
|
|
325
|
+
attributes=extra_attrs,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _codex_user_prompt_to_span(
|
|
330
|
+
attrs: dict[str, Any],
|
|
331
|
+
resource_attrs: dict[str, Any],
|
|
332
|
+
timestamp_ns: int,
|
|
333
|
+
) -> NormalizedSpan:
|
|
334
|
+
conversation_id = str(attrs.get(CodexEvents.CONVERSATION_ID, "unknown"))
|
|
335
|
+
start_time = _ts_to_datetime(timestamp_ns)
|
|
336
|
+
|
|
337
|
+
extra_attrs: dict[str, Any] = {}
|
|
338
|
+
for key in (CodexEvents.PROMPT_LENGTH, CodexEvents.PROMPT):
|
|
339
|
+
if key in attrs:
|
|
340
|
+
extra_attrs[key] = attrs[key]
|
|
341
|
+
|
|
342
|
+
return NormalizedSpan(
|
|
343
|
+
span_id=new_span_id(),
|
|
344
|
+
trace_id=_trace_id_from_session(conversation_id),
|
|
345
|
+
name=GenAIAttributes.SPAN_INVOKE_AGENT,
|
|
346
|
+
kind=SpanKind.SERVER,
|
|
347
|
+
status_code=SpanStatus.OK,
|
|
348
|
+
start_time=start_time,
|
|
349
|
+
end_time=start_time,
|
|
350
|
+
agent_id=resource_attrs.get("service.name", "codex-cli"),
|
|
351
|
+
session_id=conversation_id,
|
|
352
|
+
conversation_id=conversation_id,
|
|
353
|
+
attributes=extra_attrs,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _codex_tool_decision_to_span(
|
|
358
|
+
attrs: dict[str, Any],
|
|
359
|
+
resource_attrs: dict[str, Any],
|
|
360
|
+
timestamp_ns: int,
|
|
361
|
+
) -> NormalizedSpan:
|
|
362
|
+
conversation_id = str(attrs.get(CodexEvents.CONVERSATION_ID, "unknown"))
|
|
363
|
+
start_time = _ts_to_datetime(timestamp_ns)
|
|
364
|
+
|
|
365
|
+
extra_attrs: dict[str, Any] = {}
|
|
366
|
+
for key in (CodexEvents.DECISION, CodexEvents.DECISION_SOURCE, CodexEvents.CALL_ID):
|
|
367
|
+
if key in attrs:
|
|
368
|
+
extra_attrs[key] = attrs[key]
|
|
369
|
+
|
|
370
|
+
return NormalizedSpan(
|
|
371
|
+
span_id=new_span_id(),
|
|
372
|
+
trace_id=_trace_id_from_session(conversation_id),
|
|
373
|
+
name="tool_decision",
|
|
374
|
+
kind=SpanKind.INTERNAL,
|
|
375
|
+
status_code=SpanStatus.OK,
|
|
376
|
+
start_time=start_time,
|
|
377
|
+
agent_id=resource_attrs.get("service.name", "codex-cli"),
|
|
378
|
+
session_id=conversation_id,
|
|
379
|
+
conversation_id=conversation_id,
|
|
380
|
+
tool_name=str(attrs.get(CodexEvents.TOOL_NAME, "")),
|
|
381
|
+
attributes=extra_attrs,
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def _codex_tool_result_to_span(
|
|
386
|
+
attrs: dict[str, Any],
|
|
387
|
+
resource_attrs: dict[str, Any],
|
|
388
|
+
timestamp_ns: int,
|
|
389
|
+
) -> NormalizedSpan:
|
|
390
|
+
conversation_id = str(attrs.get(CodexEvents.CONVERSATION_ID, "unknown"))
|
|
391
|
+
duration_ms = float(attrs.get(CodexEvents.DURATION_MS, 0))
|
|
392
|
+
start_time = _ts_to_datetime(timestamp_ns)
|
|
393
|
+
end_time = start_time + timedelta(milliseconds=duration_ms)
|
|
394
|
+
|
|
395
|
+
success_val = attrs.get(CodexEvents.SUCCESS)
|
|
396
|
+
if isinstance(success_val, bool):
|
|
397
|
+
ok = success_val
|
|
398
|
+
else:
|
|
399
|
+
ok = str(success_val).lower() == "true"
|
|
400
|
+
|
|
401
|
+
status_code = SpanStatus.OK if ok else SpanStatus.ERROR
|
|
402
|
+
status_message = attrs.get(CodexEvents.ERROR_MESSAGE) if not ok else None
|
|
403
|
+
|
|
404
|
+
extra_attrs: dict[str, Any] = {}
|
|
405
|
+
for key in (CodexEvents.ARGUMENTS, CodexEvents.CALL_ID):
|
|
406
|
+
if key in attrs:
|
|
407
|
+
extra_attrs[key] = attrs[key]
|
|
408
|
+
|
|
409
|
+
return NormalizedSpan(
|
|
410
|
+
span_id=new_span_id(),
|
|
411
|
+
trace_id=_trace_id_from_session(conversation_id),
|
|
412
|
+
name=GenAIAttributes.SPAN_TOOL_CALL,
|
|
413
|
+
kind=SpanKind.INTERNAL,
|
|
414
|
+
status_code=status_code,
|
|
415
|
+
status_message=status_message,
|
|
416
|
+
start_time=start_time,
|
|
417
|
+
end_time=end_time,
|
|
418
|
+
duration_ms=duration_ms,
|
|
419
|
+
agent_id=resource_attrs.get("service.name", "codex-cli"),
|
|
420
|
+
session_id=conversation_id,
|
|
421
|
+
conversation_id=conversation_id,
|
|
422
|
+
tool_name=str(attrs.get(CodexEvents.TOOL_NAME, "")),
|
|
423
|
+
attributes=extra_attrs,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
_CONVERTERS = {
|
|
428
|
+
ClaudeCodeEvents.API_REQUEST: _api_request_to_span,
|
|
429
|
+
ClaudeCodeEvents.TOOL_RESULT: _tool_result_to_span,
|
|
430
|
+
ClaudeCodeEvents.API_ERROR: _api_error_to_span,
|
|
431
|
+
ClaudeCodeEvents.USER_PROMPT: _user_prompt_to_span,
|
|
432
|
+
ClaudeCodeEvents.TOOL_DECISION: _tool_decision_to_span,
|
|
433
|
+
# Codex CLI events
|
|
434
|
+
CodexEvents.API_REQUEST: _codex_api_request_to_span,
|
|
435
|
+
CodexEvents.SSE_EVENT: _codex_sse_event_to_span,
|
|
436
|
+
CodexEvents.USER_PROMPT: _codex_user_prompt_to_span,
|
|
437
|
+
CodexEvents.TOOL_DECISION: _codex_tool_decision_to_span,
|
|
438
|
+
CodexEvents.TOOL_RESULT: _codex_tool_result_to_span,
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def parse_log_records(
|
|
443
|
+
body: dict,
|
|
444
|
+
pipeline: IngestPipeline,
|
|
445
|
+
) -> tuple[int, list[dict[str, str]]]:
|
|
446
|
+
"""
|
|
447
|
+
Walk resourceLogs -> scopeLogs -> logRecords.
|
|
448
|
+
Dispatch each record by event name to the appropriate converter.
|
|
449
|
+
Call pipeline.process() for each resulting NormalizedSpan.
|
|
450
|
+
Returns (ingested_count, rejections_list).
|
|
451
|
+
|
|
452
|
+
Same error-tolerance as spans.py: individual failures are logged and
|
|
453
|
+
collected in rejections, never propagated. Batch continues processing.
|
|
454
|
+
"""
|
|
455
|
+
ingested = 0
|
|
456
|
+
rejections: list[dict[str, str]] = []
|
|
457
|
+
|
|
458
|
+
for resource_log in body.get("resourceLogs", []):
|
|
459
|
+
# Extract resource-level attributes (e.g. service.name)
|
|
460
|
+
resource = resource_log.get("resource", {})
|
|
461
|
+
resource_attrs = _parse_attrs(resource.get("attributes", []))
|
|
462
|
+
|
|
463
|
+
for scope_log in resource_log.get("scopeLogs", []):
|
|
464
|
+
for record in scope_log.get("logRecords", []):
|
|
465
|
+
timestamp_ns = int(record.get("timeUnixNano", 0))
|
|
466
|
+
body_val = record.get("body", {})
|
|
467
|
+
event_name = _otlp_value(body_val) if isinstance(body_val, dict) else body_val
|
|
468
|
+
|
|
469
|
+
# Parse attributes here — needed both for the Codex event.name
|
|
470
|
+
# fallback and for converters that follow.
|
|
471
|
+
attrs = _parse_attrs(record.get("attributes", []))
|
|
472
|
+
|
|
473
|
+
# Codex CLI puts the event name in attrs["event.name"] rather
|
|
474
|
+
# than the log record body; fall back to that when body is empty.
|
|
475
|
+
if not isinstance(event_name, str):
|
|
476
|
+
event_name = attrs.get("event.name")
|
|
477
|
+
|
|
478
|
+
if not isinstance(event_name, str):
|
|
479
|
+
continue
|
|
480
|
+
|
|
481
|
+
# Codex CLI sets timeUnixNano=0 and puts the real timestamp in
|
|
482
|
+
# attrs["event.timestamp"] as an ISO-8601 UTC string.
|
|
483
|
+
if timestamp_ns == 0:
|
|
484
|
+
ts_str = attrs.get(CodexEvents.EVENT_TIMESTAMP)
|
|
485
|
+
if ts_str:
|
|
486
|
+
try:
|
|
487
|
+
dt = datetime.fromisoformat(ts_str.rstrip("Z") + "+00:00")
|
|
488
|
+
timestamp_ns = int(dt.timestamp() * 1e9)
|
|
489
|
+
except ValueError:
|
|
490
|
+
pass
|
|
491
|
+
|
|
492
|
+
converter = _CONVERTERS.get(event_name)
|
|
493
|
+
if converter is None:
|
|
494
|
+
# Unknown event — skip silently
|
|
495
|
+
continue
|
|
496
|
+
|
|
497
|
+
record_id = f"{event_name}:{timestamp_ns}"
|
|
498
|
+
|
|
499
|
+
try:
|
|
500
|
+
span = converter(attrs, resource_attrs, timestamp_ns)
|
|
501
|
+
if span is None:
|
|
502
|
+
continue
|
|
503
|
+
pipeline.process(span)
|
|
504
|
+
ingested += 1
|
|
505
|
+
except SpanRejectedError as exc:
|
|
506
|
+
rejections.append({"record_id": record_id, "reason": str(exc)})
|
|
507
|
+
except Exception as exc:
|
|
508
|
+
logger.warning("Failed to process log record %s: %s", record_id, exc)
|
|
509
|
+
rejections.append({"record_id": record_id, "reason": str(exc)})
|
|
510
|
+
|
|
511
|
+
return ingested, rejections
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""GET /metrics — Prometheus text format metrics from DB aggregation."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from fastapi import APIRouter, Depends, Request
|
|
5
|
+
from fastapi.responses import PlainTextResponse
|
|
6
|
+
|
|
7
|
+
from tokenjam.api.deps import require_api_key
|
|
8
|
+
from tokenjam.core.models import AlertFilters, CostFilters
|
|
9
|
+
|
|
10
|
+
router = APIRouter(dependencies=[Depends(require_api_key)])
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@router.get("/metrics")
|
|
14
|
+
async def prometheus_metrics(request: Request) -> PlainTextResponse:
|
|
15
|
+
"""
|
|
16
|
+
Generate Prometheus text format metrics by querying the DB.
|
|
17
|
+
Regenerated on each request so data is accurate after restarts.
|
|
18
|
+
"""
|
|
19
|
+
db = request.app.state.db
|
|
20
|
+
lines: list[str] = []
|
|
21
|
+
|
|
22
|
+
# -- Cost per agent --
|
|
23
|
+
_add_header(lines, "ocw_cost_usd_total", "gauge", "Running cost total per agent")
|
|
24
|
+
cost_rows = db.get_cost_summary(CostFilters(group_by="agent"))
|
|
25
|
+
for row in cost_rows:
|
|
26
|
+
agent = row.agent_id or "unknown"
|
|
27
|
+
lines.append(f'ocw_cost_usd_total{{agent_id="{_escape(agent)}"}} {row.cost_usd}')
|
|
28
|
+
|
|
29
|
+
# -- Tokens per agent and type --
|
|
30
|
+
_add_header(lines, "ocw_tokens_total", "counter", "Token usage by type")
|
|
31
|
+
for row in cost_rows:
|
|
32
|
+
agent = row.agent_id or "unknown"
|
|
33
|
+
lines.append(f'ocw_tokens_total{{agent_id="{_escape(agent)}",type="input"}} {row.input_tokens}')
|
|
34
|
+
lines.append(f'ocw_tokens_total{{agent_id="{_escape(agent)}",type="output"}} {row.output_tokens}')
|
|
35
|
+
|
|
36
|
+
# -- Tool calls per agent --
|
|
37
|
+
tool_rows = db.get_tool_calls(None, None, None)
|
|
38
|
+
_add_header(lines, "ocw_tool_calls_total", "counter", "Total tool calls per agent and tool")
|
|
39
|
+
for row in tool_rows:
|
|
40
|
+
agent = row.get("agent_id") or "unknown"
|
|
41
|
+
tool = row.get("tool_name") or "unknown"
|
|
42
|
+
count = row.get("call_count", 0)
|
|
43
|
+
lines.append(f'ocw_tool_calls_total{{agent_id="{_escape(agent)}",tool_name="{_escape(tool)}"}} {count}')
|
|
44
|
+
|
|
45
|
+
# -- Alerts per agent, type, severity --
|
|
46
|
+
_add_header(lines, "ocw_alerts_total", "counter", "Total alerts fired")
|
|
47
|
+
alerts = db.get_alerts(AlertFilters(limit=10000))
|
|
48
|
+
alert_counts: dict[tuple[str, str, str], int] = {}
|
|
49
|
+
for a in alerts:
|
|
50
|
+
key = (a.agent_id or "unknown", a.type.value, a.severity.value)
|
|
51
|
+
alert_counts[key] = alert_counts.get(key, 0) + 1
|
|
52
|
+
for (agent, atype, sev), count in alert_counts.items():
|
|
53
|
+
lines.append(
|
|
54
|
+
f'ocw_alerts_total{{agent_id="{_escape(agent)}",'
|
|
55
|
+
f'type="{_escape(atype)}",severity="{_escape(sev)}"}} {count}'
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# -- Session duration (latest completed per agent) --
|
|
59
|
+
_add_header(lines, "ocw_session_duration_seconds", "gauge", "Duration of last completed session")
|
|
60
|
+
# Collect unique agent_ids from cost rows
|
|
61
|
+
agent_ids = {row.agent_id for row in cost_rows if row.agent_id}
|
|
62
|
+
for agent_id in sorted(agent_ids):
|
|
63
|
+
sessions = db.get_completed_sessions(agent_id, limit=1)
|
|
64
|
+
if sessions and sessions[0].duration_seconds is not None:
|
|
65
|
+
lines.append(
|
|
66
|
+
f'ocw_session_duration_seconds{{agent_id="{_escape(agent_id)}"}} '
|
|
67
|
+
f'{sessions[0].duration_seconds:.1f}'
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
lines.append("") # trailing newline
|
|
71
|
+
return PlainTextResponse("\n".join(lines), media_type="text/plain; version=0.0.4")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _add_header(lines: list[str], name: str, mtype: str, help_text: str) -> None:
|
|
75
|
+
lines.append(f"# HELP {name} {help_text}")
|
|
76
|
+
lines.append(f"# TYPE {name} {mtype}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _escape(value: str) -> str:
|
|
80
|
+
"""Escape label values for Prometheus text format."""
|
|
81
|
+
return value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Standard OTLP/HTTP route aliases.
|
|
2
|
+
|
|
3
|
+
POST /v1/traces — forwards to the same OTLP JSON ingest logic as /api/v1/spans.
|
|
4
|
+
POST /v1/metrics — stub (200 OK, silently discards).
|
|
5
|
+
POST /v1/logs — primary ingest path for Claude Code telemetry; converts OTLP log
|
|
6
|
+
events to NormalizedSpan objects via parse_log_records() in logs.py.
|
|
7
|
+
|
|
8
|
+
These exist so that OTel exporters configured with a bare endpoint
|
|
9
|
+
(e.g. ``http://127.0.0.1:7391``) work out of the box — OpenClaw's
|
|
10
|
+
diagnostics-otel plugin uses this convention.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
|
|
16
|
+
from fastapi import APIRouter, Request
|
|
17
|
+
from fastapi.responses import JSONResponse
|
|
18
|
+
|
|
19
|
+
from tokenjam.api.routes.spans import ingest_spans
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
router = APIRouter()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@router.post("/v1/traces")
|
|
27
|
+
async def otlp_traces(request: Request) -> JSONResponse:
|
|
28
|
+
"""Accept OTLP JSON traces — same handler as /api/v1/spans."""
|
|
29
|
+
return await ingest_spans(request)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@router.post("/v1/metrics")
|
|
33
|
+
async def otlp_metrics(request: Request) -> JSONResponse:
|
|
34
|
+
"""Stub — accept and discard OTLP metrics to avoid noisy client warnings."""
|
|
35
|
+
return JSONResponse(status_code=200, content={"status": "ok"})
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@router.post("/v1/logs")
|
|
39
|
+
async def otlp_logs(request: Request) -> JSONResponse:
|
|
40
|
+
"""Accept OTLP JSON logs — primary ingest path for Claude Code telemetry."""
|
|
41
|
+
from tokenjam.api.routes.logs import parse_log_records
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
body = await request.json()
|
|
45
|
+
except Exception:
|
|
46
|
+
return JSONResponse(status_code=400, content={"error": "Invalid JSON body"})
|
|
47
|
+
|
|
48
|
+
if not isinstance(body, dict) or "resourceLogs" not in body:
|
|
49
|
+
# Non-log OTLP signals (resourceSpans, resourceMetrics) routed here
|
|
50
|
+
# when an SDK uses this endpoint as its base — silently ignore.
|
|
51
|
+
return JSONResponse(status_code=200, content={"ingested": 0, "rejected": 0, "rejections": []})
|
|
52
|
+
|
|
53
|
+
pipeline = request.app.state.pipeline
|
|
54
|
+
ingested, rejections = parse_log_records(body, pipeline)
|
|
55
|
+
|
|
56
|
+
return JSONResponse(
|
|
57
|
+
status_code=200,
|
|
58
|
+
content={
|
|
59
|
+
"ingested": ingested,
|
|
60
|
+
"rejected": len(rejections),
|
|
61
|
+
"rejections": rejections,
|
|
62
|
+
},
|
|
63
|
+
)
|