trodo-python 2.6.0__tar.gz → 2.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {trodo_python-2.6.0 → trodo_python-2.8.0}/PKG-INFO +64 -1
  2. {trodo_python-2.6.0 → trodo_python-2.8.0}/README.md +63 -0
  3. {trodo_python-2.6.0 → trodo_python-2.8.0}/pyproject.toml +1 -1
  4. trodo_python-2.8.0/tests/test_llm_usage_cost.py +115 -0
  5. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/__init__.py +1 -1
  6. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/otel/helpers.py +102 -23
  7. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/otel/processor.py +8 -0
  8. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/otel/wrap_agent.py +35 -0
  9. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/session/server_session.py +21 -32
  10. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo_python.egg-info/PKG-INFO +64 -1
  11. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo_python.egg-info/SOURCES.txt +1 -0
  12. {trodo_python-2.6.0 → trodo_python-2.8.0}/setup.cfg +0 -0
  13. {trodo_python-2.6.0 → trodo_python-2.8.0}/tests/test_anon_distinct_id.py +0 -0
  14. {trodo_python-2.6.0 → trodo_python-2.8.0}/tests/test_auto_instrument_fixes.py +0 -0
  15. {trodo_python-2.6.0 → trodo_python-2.8.0}/tests/test_cross_process_session.py +0 -0
  16. {trodo_python-2.6.0 → trodo_python-2.8.0}/tests/test_end_run.py +0 -0
  17. {trodo_python-2.6.0 → trodo_python-2.8.0}/tests/test_processor_methods.py +0 -0
  18. {trodo_python-2.6.0 → trodo_python-2.8.0}/tests/test_register_otel.py +0 -0
  19. {trodo_python-2.6.0 → trodo_python-2.8.0}/tests/test_start_run.py +0 -0
  20. {trodo_python-2.6.0 → trodo_python-2.8.0}/tests/test_wrap_agent_unchanged.py +0 -0
  21. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/api/__init__.py +0 -0
  22. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/api/async_client.py +0 -0
  23. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/api/endpoints.py +0 -0
  24. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/api/http_client.py +0 -0
  25. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/auto/__init__.py +0 -0
  26. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/auto/auto_event_manager.py +0 -0
  27. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/client.py +0 -0
  28. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/managers/__init__.py +0 -0
  29. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/managers/group_manager.py +0 -0
  30. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/managers/people_manager.py +0 -0
  31. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/otel/__init__.py +0 -0
  32. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/otel/auto_instrument.py +0 -0
  33. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/otel/context.py +0 -0
  34. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/otel/register.py +0 -0
  35. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/otel/transport.py +0 -0
  36. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/queue/__init__.py +0 -0
  37. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/queue/batch_flusher.py +0 -0
  38. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/queue/event_queue.py +0 -0
  39. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/session/__init__.py +0 -0
  40. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/session/session_manager.py +0 -0
  41. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/types.py +0 -0
  42. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo/user_context.py +0 -0
  43. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo_python.egg-info/dependency_links.txt +0 -0
  44. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo_python.egg-info/requires.txt +0 -0
  45. {trodo_python-2.6.0 → trodo_python-2.8.0}/trodo_python.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: trodo-python
3
- Version: 2.6.0
3
+ Version: 2.8.0
4
4
  Summary: Trodo Analytics SDK for Python — server-side event tracking
5
5
  License: ISC
6
6
  Keywords: analytics,tracking,trodo,server-side
@@ -274,6 +274,69 @@ with tracer.start_as_current_span('custom') as sp:
274
274
  sp.set_attribute('gen_ai.system', 'my-llm')
275
275
  ```
276
276
 
277
+ ### Cost & token reporting (v2.8.0+)
278
+
279
+ Trodo computes per-span cost from whatever you report. **You don't have to send
280
+ cost** — send tokens and Trodo prices them using the team's **Model Price** config
281
+ (Configuration → Model Price), falling back to built-in defaults. Resolution per
282
+ span, highest priority first:
283
+
284
+ 1. **Explicit `cost`** (a final USD number) — used as-is, never recomputed.
285
+ 2. **`cost_details`** (per-category USD breakdown) — authoritative.
286
+ 3. **Tokens** (`usage_details` map, or `input_tokens`/`output_tokens`) — priced by
287
+ the team's configured model price → global default → left unset if unknown.
288
+
289
+ All token categories live in an open **`usage_details`** map. `input`/`output` are
290
+ the defaults; add `cache_read`, `cache_write`, `reasoning`, `audio`, `image`, or any
291
+ custom key. Raw provider field names are fine — the backend normalises them
292
+ (`prompt_tokens`→`input`, `cache_read_input_tokens`→`cache_read`, …). Custom keys
293
+ must match the category name you price in the UI.
294
+
295
+ ```python
296
+ # (a) Tokens only — Trodo prices it from the model name. The llm() helper
297
+ # auto-forwards the FULL provider usage object, so cache/reasoning tokens
298
+ # are captured with zero config.
299
+ answer = trodo.llm('answer', call_anthropic,
300
+ model='claude-sonnet-4', provider='anthropic')
301
+
302
+ # (b) Raw usage object via track_llm_call — same auto-normalisation.
303
+ trodo.track_llm_call(
304
+ model='gpt-4o', provider='openai',
305
+ usage=resp['usage'], # {prompt_tokens, completion_tokens, prompt_tokens_details:{cached_tokens}}
306
+ prompt=body, completion=resp,
307
+ )
308
+
309
+ # (c) Explicit usage map + cache shorthands.
310
+ trodo.track_llm_call(
311
+ model='claude-sonnet-4', provider='anthropic',
312
+ usage_details={'input': 1000, 'output': 500},
313
+ cache_read_tokens=200, cache_write_tokens=80, # → cache_read / cache_write
314
+ )
315
+
316
+ # (d) Pass cost straight through (skip server-side pricing).
317
+ trodo.track_llm_call(model='gpt-4o', provider='openai', cost=0.0123)
318
+
319
+ # (e) Per-category cost breakdown (authoritative).
320
+ trodo.track_llm_call(
321
+ model='gpt-4o', provider='openai',
322
+ cost_details={'input': 0.0003, 'output': 0.0005, 'cache_read': 0.00001},
323
+ )
324
+ ```
325
+
326
+ Inside a `wrap_agent` / `span` block, set the same fields on the handle:
327
+
328
+ ```python
329
+ s.set_llm(
330
+ model='gpt-4o', provider='openai',
331
+ usage_details={'input': 1000, 'output': 500},
332
+ cache_read_tokens=200,
333
+ # or: cost=0.0123 / cost_details={'input': ..., 'output': ...}
334
+ )
335
+ ```
336
+
337
+ Override auto-extraction with `extract_usage` (scalar in/out) or `extract_usage_map`
338
+ (open map) on `trodo.llm(name, fn, ...)`.
339
+
277
340
  ### Cross-service runs
278
341
 
279
342
  When one service calls another, the downstream service **joins** the
@@ -243,6 +243,69 @@ with tracer.start_as_current_span('custom') as sp:
243
243
  sp.set_attribute('gen_ai.system', 'my-llm')
244
244
  ```
245
245
 
246
+ ### Cost & token reporting (v2.8.0+)
247
+
248
+ Trodo computes per-span cost from whatever you report. **You don't have to send
249
+ cost** — send tokens and Trodo prices them using the team's **Model Price** config
250
+ (Configuration → Model Price), falling back to built-in defaults. Resolution per
251
+ span, highest priority first:
252
+
253
+ 1. **Explicit `cost`** (a final USD number) — used as-is, never recomputed.
254
+ 2. **`cost_details`** (per-category USD breakdown) — authoritative.
255
+ 3. **Tokens** (`usage_details` map, or `input_tokens`/`output_tokens`) — priced by
256
+ the team's configured model price → global default → left unset if unknown.
257
+
258
+ All token categories live in an open **`usage_details`** map. `input`/`output` are
259
+ the defaults; add `cache_read`, `cache_write`, `reasoning`, `audio`, `image`, or any
260
+ custom key. Raw provider field names are fine — the backend normalises them
261
+ (`prompt_tokens`→`input`, `cache_read_input_tokens`→`cache_read`, …). Custom keys
262
+ must match the category name you price in the UI.
263
+
264
+ ```python
265
+ # (a) Tokens only — Trodo prices it from the model name. The llm() helper
266
+ # auto-forwards the FULL provider usage object, so cache/reasoning tokens
267
+ # are captured with zero config.
268
+ answer = trodo.llm('answer', call_anthropic,
269
+ model='claude-sonnet-4', provider='anthropic')
270
+
271
+ # (b) Raw usage object via track_llm_call — same auto-normalisation.
272
+ trodo.track_llm_call(
273
+ model='gpt-4o', provider='openai',
274
+ usage=resp['usage'], # {prompt_tokens, completion_tokens, prompt_tokens_details:{cached_tokens}}
275
+ prompt=body, completion=resp,
276
+ )
277
+
278
+ # (c) Explicit usage map + cache shorthands.
279
+ trodo.track_llm_call(
280
+ model='claude-sonnet-4', provider='anthropic',
281
+ usage_details={'input': 1000, 'output': 500},
282
+ cache_read_tokens=200, cache_write_tokens=80, # → cache_read / cache_write
283
+ )
284
+
285
+ # (d) Pass cost straight through (skip server-side pricing).
286
+ trodo.track_llm_call(model='gpt-4o', provider='openai', cost=0.0123)
287
+
288
+ # (e) Per-category cost breakdown (authoritative).
289
+ trodo.track_llm_call(
290
+ model='gpt-4o', provider='openai',
291
+ cost_details={'input': 0.0003, 'output': 0.0005, 'cache_read': 0.00001},
292
+ )
293
+ ```
294
+
295
+ Inside a `wrap_agent` / `span` block, set the same fields on the handle:
296
+
297
+ ```python
298
+ s.set_llm(
299
+ model='gpt-4o', provider='openai',
300
+ usage_details={'input': 1000, 'output': 500},
301
+ cache_read_tokens=200,
302
+ # or: cost=0.0123 / cost_details={'input': ..., 'output': ...}
303
+ )
304
+ ```
305
+
306
+ Override auto-extraction with `extract_usage` (scalar in/out) or `extract_usage_map`
307
+ (open map) on `trodo.llm(name, fn, ...)`.
308
+
246
309
  ### Cross-service runs
247
310
 
248
311
  When one service calls another, the downstream service **joins** the
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "trodo-python"
7
- version = "2.6.0"
7
+ version = "2.8.0"
8
8
  description = "Trodo Analytics SDK for Python — server-side event tracking"
9
9
  readme = "README.md"
10
10
  license = { text = "ISC" }
@@ -0,0 +1,115 @@
1
+ """LLM usage + cost wire payloads (v2.8.0).
2
+
3
+ Verifies every way a caller can report cost/tokens reaches the backend in the
4
+ expected snake_case shape:
5
+ - explicit ``cost`` (highest priority)
6
+ - open ``usage_details`` map + ``cache_read_tokens``/``cache_write_tokens``
7
+ - raw provider ``usage`` object auto-extracted into usage_details
8
+ - ``cost_details`` per-category breakdown
9
+ - ``llm()`` helper auto-forwarding the full provider usage map (incl. cache)
10
+ """
11
+ from __future__ import annotations
12
+
13
+ from typing import Any, Dict, List
14
+
15
+ from trodo.otel.helpers import _default_usage_map, llm, track_llm_call
16
+ from trodo.otel.wrap_agent import wrap_agent
17
+
18
+
19
+ def _llm_spans(http) -> List[Dict[str, Any]]:
20
+ spans = http.run_ingest[0].get("spans", []) if http.run_ingest else []
21
+ return [s for s in spans if s.get("kind") == "llm"]
22
+
23
+
24
+ def test_default_usage_map_flattens_openai_details():
25
+ out = _default_usage_map(
26
+ {"usage": {"prompt_tokens": 1000, "completion_tokens": 200,
27
+ "prompt_tokens_details": {"cached_tokens": 300}}}
28
+ )
29
+ assert out == {"prompt_tokens": 1000, "completion_tokens": 200, "cached_tokens": 300}
30
+
31
+
32
+ def test_default_usage_map_anthropic_cache_fields():
33
+ out = _default_usage_map(
34
+ {"usage": {"input_tokens": 50, "output_tokens": 25,
35
+ "cache_read_input_tokens": 10, "cache_creation_input_tokens": 5}}
36
+ )
37
+ assert out == {
38
+ "input_tokens": 50, "output_tokens": 25,
39
+ "cache_read_input_tokens": 10, "cache_creation_input_tokens": 5,
40
+ }
41
+
42
+
43
+ def test_default_usage_map_bare_usage_object():
44
+ out = _default_usage_map({"prompt_tokens": 12, "completion_tokens": 4})
45
+ assert out == {"prompt_tokens": 12, "completion_tokens": 4}
46
+
47
+
48
+ def test_track_llm_call_explicit_cost_wins(processor, http):
49
+ with wrap_agent(processor=processor, team_site_id="site-x", agent_name="chat"):
50
+ track_llm_call(model="gpt-4o", provider="openai",
51
+ input_tokens=100, output_tokens=50, cost=0.42)
52
+ span = _llm_spans(http)[0]
53
+ assert span["cost"] == 0.42
54
+ assert span["input_tokens"] == 100
55
+
56
+
57
+ def test_track_llm_call_usage_details_and_cache_shorthands(processor, http):
58
+ with wrap_agent(processor=processor, team_site_id="site-x", agent_name="chat"):
59
+ track_llm_call(model="claude-sonnet-4", provider="anthropic",
60
+ usage_details={"input": 1000, "output": 500},
61
+ cache_read_tokens=200, cache_write_tokens=80)
62
+ span = _llm_spans(http)[0]
63
+ assert span["usage_details"] == {
64
+ "input": 1000, "output": 500, "cache_read": 200, "cache_write": 80,
65
+ }
66
+
67
+
68
+ def test_track_llm_call_raw_usage_auto_extract(processor, http):
69
+ with wrap_agent(processor=processor, team_site_id="site-x", agent_name="chat"):
70
+ track_llm_call(
71
+ model="gpt-4o", provider="openai",
72
+ usage={"prompt_tokens": 800, "completion_tokens": 400,
73
+ "prompt_tokens_details": {"cached_tokens": 100}},
74
+ )
75
+ span = _llm_spans(http)[0]
76
+ assert span["usage_details"] == {
77
+ "prompt_tokens": 800, "completion_tokens": 400, "cached_tokens": 100,
78
+ }
79
+
80
+
81
+ def test_track_llm_call_cost_details(processor, http):
82
+ with wrap_agent(processor=processor, team_site_id="site-x", agent_name="chat"):
83
+ track_llm_call(model="gpt-4o", provider="openai", input_tokens=100,
84
+ output_tokens=50, cost_details={"input": 0.0003, "output": 0.0005})
85
+ span = _llm_spans(http)[0]
86
+ assert span["cost_details"] == {"input": 0.0003, "output": 0.0005}
87
+
88
+
89
+ def test_llm_helper_auto_forwards_usage_map(processor, http):
90
+ def call_model(*_a, **_k):
91
+ return {"text": "hi", "usage": {
92
+ "input_tokens": 1000, "output_tokens": 200, "cache_read_input_tokens": 300}}
93
+
94
+ with wrap_agent(processor=processor, team_site_id="site-x", agent_name="chat"):
95
+ wrapped = llm("answer", call_model, model="claude-sonnet-4", provider="anthropic")
96
+ wrapped()
97
+ span = _llm_spans(http)[0]
98
+ assert span["usage_details"] == {
99
+ "input_tokens": 1000, "output_tokens": 200, "cache_read_input_tokens": 300,
100
+ }
101
+ assert span["model"] == "claude-sonnet-4"
102
+
103
+
104
+ def test_llm_helper_custom_scalar_extractor_backcompat(processor, http):
105
+ def call_model(*_a, **_k):
106
+ return {"weird": {"in": 7, "out": 3}}
107
+
108
+ with wrap_agent(processor=processor, team_site_id="site-x", agent_name="chat"):
109
+ wrapped = llm("answer", call_model, model="x", provider="y",
110
+ extract_usage=lambda r: (r["weird"]["in"], r["weird"]["out"]))
111
+ wrapped()
112
+ span = _llm_spans(http)[0]
113
+ assert span["input_tokens"] == 7
114
+ assert span["output_tokens"] == 3
115
+ assert "usage_details" not in span
@@ -40,7 +40,7 @@ Downstream microservice (join the caller's run instead of making a new one):
40
40
 
41
41
  from __future__ import annotations
42
42
 
43
- __version__ = "2.4.0"
43
+ __version__ = "2.8.0"
44
44
 
45
45
  from typing import Any, Callable, Dict, List, Optional, Union
46
46
 
@@ -232,6 +232,52 @@ def _default_usage_extractor(result: Any) -> Tuple[Optional[int], Optional[int]]
232
232
  return (None, None)
233
233
 
234
234
 
235
+ def _coerce_num(v: Any) -> Optional[float]:
236
+ try:
237
+ n = float(v)
238
+ except (TypeError, ValueError):
239
+ return None
240
+ return n
241
+
242
+
243
+ def _default_usage_map(result: Any) -> Optional[Dict[str, float]]:
244
+ """Forward the FULL provider usage object (incl. cache/reasoning) as an open
245
+ map. The backend normalises raw keys (``prompt_tokens`` -> input,
246
+ ``cache_read_input_tokens`` / ``cached_tokens`` -> cache_read,
247
+ ``cache_creation_input_tokens`` -> cache_write, ``reasoning_tokens`` ->
248
+ reasoning, ...), so passing whatever the provider returned is enough.
249
+
250
+ Flattens OpenAI ``*_tokens_details`` so cached/reasoning leaves survive.
251
+ Accepts either the bare usage object or a full response carrying ``usage`` /
252
+ ``usageMetadata``.
253
+ """
254
+ if result is None:
255
+ return None
256
+ raw: Any = None
257
+ if isinstance(result, dict):
258
+ raw = result.get("usage") or result.get("usageMetadata")
259
+ # Bare usage object passed directly (has numeric token leaves).
260
+ if raw is None and any(_coerce_num(v) is not None or isinstance(v, dict) for v in result.values()):
261
+ raw = result
262
+ else:
263
+ raw = getattr(result, "usage", None) or getattr(result, "usageMetadata", None)
264
+ if not isinstance(raw, dict):
265
+ return None
266
+ out: Dict[str, float] = {}
267
+ for k, v in raw.items():
268
+ if isinstance(v, dict):
269
+ # OpenAI prompt_tokens_details / completion_tokens_details — flatten.
270
+ for dk, dv in v.items():
271
+ n = _coerce_num(dv)
272
+ if n is not None:
273
+ out[dk] = n
274
+ continue
275
+ n = _coerce_num(v)
276
+ if n is not None:
277
+ out[k] = n
278
+ return out or None
279
+
280
+
235
281
  def llm(
236
282
  name: Any = None,
237
283
  fn: Optional[Callable[..., Any]] = None,
@@ -240,13 +286,16 @@ def llm(
240
286
  provider: Optional[str] = None,
241
287
  temperature: Optional[float] = None,
242
288
  extract_usage: Optional[Callable[[Any], Tuple[Optional[int], Optional[int]]]] = None,
289
+ extract_usage_map: Optional[Callable[[Any], Optional[Dict[str, float]]]] = None,
243
290
  ) -> Any:
244
291
  """Wrap an LLM call as a ``kind='llm'`` span with auto token extraction.
245
292
 
246
- The helper records ``model``/``provider`` on entry; on return it inspects
247
- the response for the common usage shapes (OpenAI ``usage.prompt_tokens``,
248
- Anthropic ``usage.input_tokens``, Gemini ``usageMetadata.promptTokenCount``)
249
- and records tokens. Pass ``extract_usage=lambda r: (in, out)`` to override.
293
+ By default the helper forwards the FULL provider usage object (OpenAI
294
+ ``usage``, Anthropic ``usage`` incl. cache fields, Gemini ``usageMetadata``)
295
+ as an open map, so cache/reasoning tokens are captured and priced
296
+ automatically by the backend. Pass ``extract_usage=lambda r: (in, out)`` to
297
+ fall back to scalar-only extraction, or ``extract_usage_map=lambda r: {..}``
298
+ to build the map yourself.
250
299
 
251
300
  Usage::
252
301
 
@@ -257,7 +306,6 @@ def llm(
257
306
  @trodo.llm('plan', model='claude-haiku-4-5', provider='anthropic')
258
307
  def plan(messages): ...
259
308
  """
260
- extractor = extract_usage or _default_usage_extractor
261
309
 
262
310
  def _set_llm(s: SpanHandle) -> None:
263
311
  if model or provider or temperature is not None:
@@ -268,18 +316,25 @@ def llm(
268
316
  )
269
317
 
270
318
  def _on_result(s: SpanHandle, result: Any) -> None:
319
+ if extract_usage is not None:
320
+ # Caller opted into scalar-only extraction (back-compat).
321
+ try:
322
+ pt, ct = extract_usage(result)
323
+ except Exception:
324
+ pt, ct = (None, None)
325
+ if pt is not None or ct is not None:
326
+ s.set_llm(
327
+ model=model, provider=provider,
328
+ input_tokens=pt, output_tokens=ct, temperature=temperature,
329
+ )
330
+ return
331
+ # Default: forward the full provider usage map (incl. cache/reasoning).
271
332
  try:
272
- pt, ct = extractor(result)
333
+ usage_map = (extract_usage_map or _default_usage_map)(result)
273
334
  except Exception:
274
- pt, ct = (None, None)
275
- if pt is not None or ct is not None:
276
- s.set_llm(
277
- model=model,
278
- provider=provider,
279
- input_tokens=pt,
280
- output_tokens=ct,
281
- temperature=temperature,
282
- )
335
+ usage_map = None
336
+ if usage_map:
337
+ s.set_llm(model=model, provider=provider, temperature=temperature, usage_details=usage_map)
283
338
 
284
339
  return _dual_form("llm")(
285
340
  name, fn, kind="llm", extra_set=_set_llm, on_result=_on_result
@@ -387,6 +442,11 @@ def track_llm_call(
387
442
  provider: Optional[str] = None,
388
443
  input_tokens: Optional[int] = None,
389
444
  output_tokens: Optional[int] = None,
445
+ cache_read_tokens: Optional[int] = None,
446
+ cache_write_tokens: Optional[int] = None,
447
+ usage_details: Optional[Dict[str, float]] = None,
448
+ usage: Any = None,
449
+ cost_details: Optional[Dict[str, float]] = None,
390
450
  prompt: Any = None,
391
451
  completion: Any = None,
392
452
  temperature: Optional[float] = None,
@@ -397,23 +457,38 @@ def track_llm_call(
397
457
  """Record a one-shot LLM span for a raw-HTTP caller.
398
458
 
399
459
  Opens and immediately closes a ``span(kind='llm')`` populated with the
400
- model + token counts + prompt/completion. No-op outside an active run
401
- context.
460
+ model + tokens + prompt/completion. No-op outside an active run context.
461
+
462
+ Cost can be reported three ways (in priority order):
463
+ 1. ``cost`` — a final USD figure (overrides all server-side derivation).
464
+ 2. ``cost_details`` — a per-category USD breakdown (authoritative).
465
+ 3. tokens only — the backend prices them against the team's model prices.
466
+
467
+ Tokens can be passed as scalars (``input_tokens``/``output_tokens``),
468
+ cache shorthands (``cache_read_tokens``/``cache_write_tokens``), an open
469
+ ``usage_details`` map, or a raw provider ``usage`` object to auto-extract
470
+ from (e.g. ``resp['usage']`` or ``resp['usageMetadata']``).
402
471
 
403
472
  Usage:
404
473
  resp = httpx.post(url, json=body).json()
405
474
  trodo.track_llm_call(
406
- model='gemini-2.5-flash',
407
- provider='google',
408
- input_tokens=resp['usageMetadata']['promptTokenCount'],
409
- output_tokens=resp['usageMetadata']['candidatesTokenCount'],
410
- prompt=body,
411
- completion=resp,
475
+ model='claude-sonnet-4', provider='anthropic',
476
+ usage=resp['usage'], # cache fields captured automatically
477
+ prompt=body, completion=resp,
412
478
  )
413
479
  """
414
480
  if get_active_context() is None:
415
481
  return
416
482
  span_name = name or (f"llm.{provider}.{model}" if model else "llm")
483
+ # Merge an explicit usage_details map with anything auto-extracted from a
484
+ # raw `usage` object the caller passed through.
485
+ merged_usage: Dict[str, float] = {}
486
+ if usage is not None:
487
+ from_usage = _default_usage_map(usage)
488
+ if from_usage:
489
+ merged_usage.update(from_usage)
490
+ if usage_details:
491
+ merged_usage.update(usage_details)
417
492
  with span_ctx(span_name, kind="llm", input=prompt, attributes=metadata) as s:
418
493
  s.set_llm(
419
494
  model=model,
@@ -421,6 +496,10 @@ def track_llm_call(
421
496
  input_tokens=input_tokens,
422
497
  output_tokens=output_tokens,
423
498
  cost=cost,
499
+ usage_details=merged_usage or None,
500
+ cost_details=cost_details,
501
+ cache_read_tokens=cache_read_tokens,
502
+ cache_write_tokens=cache_write_tokens,
424
503
  temperature=temperature,
425
504
  )
426
505
  if completion is not None:
@@ -59,6 +59,14 @@ class TrodoSpan:
59
59
  input_tokens: Optional[int] = None
60
60
  output_tokens: Optional[int] = None
61
61
  cost: Optional[float] = None
62
+ # Open token-usage map forwarded to the backend, which normalises raw
63
+ # provider field names to canonical categories (input, output, cache_read,
64
+ # cache_write, reasoning, + custom keys) and prices each against the team's
65
+ # configured model prices.
66
+ usage_details: Optional[Dict[str, float]] = None
67
+ # Per-category cost breakdown in USD (authoritative when set — ingested cost
68
+ # always wins over server-side derivation).
69
+ cost_details: Optional[Dict[str, float]] = None
62
70
  temperature: Optional[float] = None
63
71
  tool_name: Optional[str] = None
64
72
  attributes: Optional[Dict[str, Any]] = None
@@ -197,6 +197,11 @@ class SpanHandle:
197
197
  self.input_tokens: Optional[int] = None
198
198
  self.output_tokens: Optional[int] = None
199
199
  self.cost: Optional[float] = None
200
+ # Open token-usage map (canonical or raw provider keys — the backend
201
+ # normalises). Lets callers report cache/reasoning/custom categories.
202
+ self.usage_details: Optional[Dict[str, float]] = None
203
+ # Optional per-category cost breakdown in USD (authoritative when set).
204
+ self.cost_details: Optional[Dict[str, float]] = None
200
205
  self.temperature: Optional[float] = None
201
206
  self.tool_name: Optional[str] = None
202
207
 
@@ -217,6 +222,10 @@ class SpanHandle:
217
222
  input_tokens: Optional[int] = None,
218
223
  output_tokens: Optional[int] = None,
219
224
  cost: Optional[float] = None,
225
+ usage_details: Optional[Dict[str, float]] = None,
226
+ cost_details: Optional[Dict[str, float]] = None,
227
+ cache_read_tokens: Optional[int] = None,
228
+ cache_write_tokens: Optional[int] = None,
220
229
  temperature: Optional[float] = None,
221
230
  ) -> None:
222
231
  if model is not None:
@@ -231,6 +240,28 @@ class SpanHandle:
231
240
  self.cost = float(cost)
232
241
  if temperature is not None:
233
242
  self.temperature = float(temperature)
243
+ # Merge any usage map + cache shorthands into one forwarded map.
244
+ if usage_details or cache_read_tokens is not None or cache_write_tokens is not None:
245
+ merged: Dict[str, float] = dict(self.usage_details or {})
246
+ if usage_details:
247
+ for k, v in usage_details.items():
248
+ try:
249
+ merged[k] = float(v)
250
+ except (TypeError, ValueError):
251
+ continue
252
+ if cache_read_tokens is not None:
253
+ merged["cache_read"] = float(cache_read_tokens)
254
+ if cache_write_tokens is not None:
255
+ merged["cache_write"] = float(cache_write_tokens)
256
+ self.usage_details = merged
257
+ if cost_details:
258
+ merged_c: Dict[str, float] = dict(self.cost_details or {})
259
+ for k, v in cost_details.items():
260
+ try:
261
+ merged_c[k] = float(v)
262
+ except (TypeError, ValueError):
263
+ continue
264
+ self.cost_details = merged_c
234
265
 
235
266
  def set_tool(self, tool_name: str) -> None:
236
267
  self.tool_name = tool_name
@@ -575,6 +606,8 @@ class join_run:
575
606
  input_tokens=self.handle.input_tokens,
576
607
  output_tokens=self.handle.output_tokens,
577
608
  cost=self.handle.cost,
609
+ usage_details=self.handle.usage_details,
610
+ cost_details=self.handle.cost_details,
578
611
  temperature=self.handle.temperature,
579
612
  tool_name=self.handle.tool_name,
580
613
  attributes=self.handle.attributes or None,
@@ -665,6 +698,8 @@ class span:
665
698
  input_tokens=self.handle.input_tokens,
666
699
  output_tokens=self.handle.output_tokens,
667
700
  cost=self.handle.cost,
701
+ usage_details=self.handle.usage_details,
702
+ cost_details=self.handle.cost_details,
668
703
  temperature=self.handle.temperature,
669
704
  tool_name=self.handle.tool_name,
670
705
  attributes=self.handle.attributes or None,
@@ -3,7 +3,6 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import time
6
- import uuid
7
6
  from datetime import datetime, timezone
8
7
  from typing import Any, Dict, Optional
9
8
 
@@ -14,13 +13,24 @@ def now_iso() -> str:
14
13
  return datetime.now(timezone.utc).isoformat()
15
14
 
16
15
 
16
+ def server_session_id(distinct_id: str) -> str:
17
+ """Deterministic, "backend consistent" session id for a backend user.
18
+
19
+ Backend SDKs are stateless: the same distinct_id must resolve to the SAME
20
+ session across processes and restarts. Using ``server:{distinct_id}`` instead
21
+ of a per-process ``uuid4()`` produces exactly one session row per backend
22
+ user (no per-process bloat) and is idempotent server-side.
23
+ """
24
+ return f"server:{distinct_id}"
25
+
26
+
17
27
  def create_server_session(
18
28
  site_id: str,
19
29
  distinct_id: str,
20
30
  session_id: Optional[str] = None,
21
31
  ) -> ServerSession:
22
32
  return ServerSession(
23
- session_id=session_id or str(uuid.uuid4()),
33
+ session_id=session_id or server_session_id(distinct_id),
24
34
  site_id=site_id,
25
35
  distinct_id=distinct_id,
26
36
  start_time=now_iso(),
@@ -30,6 +40,15 @@ def create_server_session(
30
40
 
31
41
 
32
42
  def build_session_payload(session: ServerSession) -> Dict[str, Any]:
43
+ """Minimal server-session payload.
44
+
45
+ Backend SDKs cannot know browser-only signals (geo, device, browser, UTM,
46
+ referrer, wallet), so those fields are OMITTED rather than sent as ~30
47
+ explicit nulls — ingestion defaults missing fields to null. This saves
48
+ ingestion bandwidth and is more accurate. Only the markers ingestion keys
49
+ on are retained: ``is_server_session`` (drives identity-level browser-field
50
+ guards) and ``device_type='server'`` (server-origin fallback detector).
51
+ """
33
52
  return {
34
53
  "session_id": session.session_id,
35
54
  "site_id": session.site_id,
@@ -37,39 +56,9 @@ def build_session_payload(session: ServerSession) -> Dict[str, Any]:
37
56
  "distinct_id": session.distinct_id,
38
57
  "team_id": None,
39
58
  "start_time": session.start_time,
40
- "end_time": None,
41
59
  "last_activity": int(session.last_activity * 1000),
42
- "duration": 0,
43
- "pages_viewed": 0,
44
60
  "is_bounce": False,
45
- "previous_session_id": None,
46
- "time_since_last_session": None,
47
- "entry_page": None,
48
- "exit_page": None,
49
61
  "referrer": "server",
50
- "ip_address": None,
51
- "city": None,
52
- "region": None,
53
- "country": None,
54
- "browser_name": None,
55
- "browser_version": None,
56
62
  "device_type": "server",
57
- "os": None,
58
- "resolution": None,
59
- "user_agent": None,
60
- "language": None,
61
- "wallet_address": None,
62
- "wallet_type": None,
63
- "chain_name": None,
64
- "is_web3_user": False,
65
- "wallet_connected": False,
66
- "utm_source": None,
67
- "utm_medium": None,
68
- "utm_campaign": None,
69
- "utm_term": None,
70
- "utm_content": None,
71
- "utm_id": None,
72
- "visited_pages": [],
73
- "active_time_ms": 0,
74
63
  "is_server_session": True,
75
64
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: trodo-python
3
- Version: 2.6.0
3
+ Version: 2.8.0
4
4
  Summary: Trodo Analytics SDK for Python — server-side event tracking
5
5
  License: ISC
6
6
  Keywords: analytics,tracking,trodo,server-side
@@ -274,6 +274,69 @@ with tracer.start_as_current_span('custom') as sp:
274
274
  sp.set_attribute('gen_ai.system', 'my-llm')
275
275
  ```
276
276
 
277
+ ### Cost & token reporting (v2.8.0+)
278
+
279
+ Trodo computes per-span cost from whatever you report. **You don't have to send
280
+ cost** — send tokens and Trodo prices them using the team's **Model Price** config
281
+ (Configuration → Model Price), falling back to built-in defaults. Resolution per
282
+ span, highest priority first:
283
+
284
+ 1. **Explicit `cost`** (a final USD number) — used as-is, never recomputed.
285
+ 2. **`cost_details`** (per-category USD breakdown) — authoritative.
286
+ 3. **Tokens** (`usage_details` map, or `input_tokens`/`output_tokens`) — priced by
287
+ the team's configured model price → global default → left unset if unknown.
288
+
289
+ All token categories live in an open **`usage_details`** map. `input`/`output` are
290
+ the defaults; add `cache_read`, `cache_write`, `reasoning`, `audio`, `image`, or any
291
+ custom key. Raw provider field names are fine — the backend normalises them
292
+ (`prompt_tokens`→`input`, `cache_read_input_tokens`→`cache_read`, …). Custom keys
293
+ must match the category name you price in the UI.
294
+
295
+ ```python
296
+ # (a) Tokens only — Trodo prices it from the model name. The llm() helper
297
+ # auto-forwards the FULL provider usage object, so cache/reasoning tokens
298
+ # are captured with zero config.
299
+ answer = trodo.llm('answer', call_anthropic,
300
+ model='claude-sonnet-4', provider='anthropic')
301
+
302
+ # (b) Raw usage object via track_llm_call — same auto-normalisation.
303
+ trodo.track_llm_call(
304
+ model='gpt-4o', provider='openai',
305
+ usage=resp['usage'], # {prompt_tokens, completion_tokens, prompt_tokens_details:{cached_tokens}}
306
+ prompt=body, completion=resp,
307
+ )
308
+
309
+ # (c) Explicit usage map + cache shorthands.
310
+ trodo.track_llm_call(
311
+ model='claude-sonnet-4', provider='anthropic',
312
+ usage_details={'input': 1000, 'output': 500},
313
+ cache_read_tokens=200, cache_write_tokens=80, # → cache_read / cache_write
314
+ )
315
+
316
+ # (d) Pass cost straight through (skip server-side pricing).
317
+ trodo.track_llm_call(model='gpt-4o', provider='openai', cost=0.0123)
318
+
319
+ # (e) Per-category cost breakdown (authoritative).
320
+ trodo.track_llm_call(
321
+ model='gpt-4o', provider='openai',
322
+ cost_details={'input': 0.0003, 'output': 0.0005, 'cache_read': 0.00001},
323
+ )
324
+ ```
325
+
326
+ Inside a `wrap_agent` / `span` block, set the same fields on the handle:
327
+
328
+ ```python
329
+ s.set_llm(
330
+ model='gpt-4o', provider='openai',
331
+ usage_details={'input': 1000, 'output': 500},
332
+ cache_read_tokens=200,
333
+ # or: cost=0.0123 / cost_details={'input': ..., 'output': ...}
334
+ )
335
+ ```
336
+
337
+ Override auto-extraction with `extract_usage` (scalar in/out) or `extract_usage_map`
338
+ (open map) on `trodo.llm(name, fn, ...)`.
339
+
277
340
  ### Cross-service runs
278
341
 
279
342
  When one service calls another, the downstream service **joins** the
@@ -4,6 +4,7 @@ tests/test_anon_distinct_id.py
4
4
  tests/test_auto_instrument_fixes.py
5
5
  tests/test_cross_process_session.py
6
6
  tests/test_end_run.py
7
+ tests/test_llm_usage_cost.py
7
8
  tests/test_processor_methods.py
8
9
  tests/test_register_otel.py
9
10
  tests/test_start_run.py
File without changes