traceforge-llm 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
traceforge/__init__.py ADDED
@@ -0,0 +1,19 @@
1
+ """TraceForge — agent runtime tracing and replay."""
2
+ from traceforge.tracer import Tracer, RunContext, __version__
3
+ from traceforge.serialiser import exclude, TraceSerialiseError
4
+ from traceforge.span import Span, SpanType, LLMCallData, ToolCallData
5
+ from traceforge.trace import Trace, TraceManifest
6
+
7
+ __all__ = [
8
+ "Tracer",
9
+ "RunContext",
10
+ "exclude",
11
+ "TraceSerialiseError",
12
+ "Span",
13
+ "SpanType",
14
+ "LLMCallData",
15
+ "ToolCallData",
16
+ "Trace",
17
+ "TraceManifest",
18
+ "__version__",
19
+ ]
traceforge/cli.py ADDED
@@ -0,0 +1,139 @@
1
+ """TraceForge CLI: init, list, open, show."""
2
+ from pathlib import Path
3
+
4
+ import click
5
+
6
+
7
+ @click.group()
8
+ @click.version_option(package_name="traceforge-llm", prog_name="traceforge")
9
+ def cli():
10
+ """TraceForge — agent runtime tracing and replay."""
11
+
12
+
13
+ @cli.command()
14
+ def init():
15
+ """Scaffold traceforge.yaml, agent.py example, .gitignore entry."""
16
+ Path("traceforge.yaml").write_text(
17
+ "auto_save: true\n"
18
+ "store_dir: .traceforge/runs\n"
19
+ "slim: false\n"
20
+ )
21
+
22
+ Path("agent.py").write_text(
23
+ '"""TraceForge example agent.\nRun: python agent.py\n"""\n'
24
+ "import asyncio\n"
25
+ "from anthropic import AsyncAnthropic\n"
26
+ "from traceforge import Tracer\n"
27
+ "from traceforge.integrations.anthropic import AnthropicInstrumentor\n"
28
+ "\n"
29
+ "tracer = Tracer()\n"
30
+ "\n"
31
+ "\n"
32
+ "async def main():\n"
33
+ " async with tracer.run() as run:\n"
34
+ " client = AnthropicInstrumentor(run).instrument(AsyncAnthropic())\n"
35
+ " response = await client.messages.create(\n"
36
+ ' model="claude-haiku-4-5-20251001",\n'
37
+ " max_tokens=256,\n"
38
+ ' system="You are a helpful assistant.",\n'
39
+ ' messages=[{"role": "user", "content": "What is 2 + 2?"}],\n'
40
+ " )\n"
41
+ " print(response.content[0].text)\n"
42
+ "\n"
43
+ " trace = run.trace\n"
44
+ " trace.print_summary()\n"
45
+ ' print(f"\\nReport saved: .traceforge/runs/<run-id>-<run-name>/report.html")\n'
46
+ "\n"
47
+ "\n"
48
+ 'if __name__ == "__main__":\n'
49
+ " asyncio.run(main())\n"
50
+ )
51
+
52
+ gitignore = Path(".gitignore")
53
+ existing = gitignore.read_text() if gitignore.exists() else ""
54
+ if ".traceforge/" not in existing:
55
+ with gitignore.open("a") as f:
56
+ f.write("\n.traceforge/\n")
57
+
58
+ click.echo("Created traceforge.yaml")
59
+ click.echo("Created agent.py (example)")
60
+ click.echo("Updated .gitignore")
61
+ click.echo("\nNext: python agent.py")
62
+
63
+
64
+ @cli.command(name="list")
65
+ def list_runs():
66
+ """List all local traces."""
67
+ from rich.console import Console
68
+ from rich.table import Table
69
+ from rich import box
70
+
71
+ from traceforge.storage.file_store import list_traces
72
+
73
+ console = Console()
74
+ runs = list_traces()
75
+
76
+ if not runs:
77
+ console.print("[dim]No traces found. Run your agent first.[/dim]")
78
+ return
79
+
80
+ table = Table(box=box.SIMPLE, show_header=True, header_style="bold")
81
+ table.add_column("Run name")
82
+ table.add_column("Run ID")
83
+ table.add_column("Started")
84
+ table.add_column("Duration")
85
+ table.add_column("Spans", justify="right")
86
+ table.add_column("Errors", justify="right")
87
+
88
+ for run in runs:
89
+ table.add_row(
90
+ f"[cyan]{run['run_name']}[/cyan]",
91
+ f"[dim]{run['run_id'][:8]}...[/dim]",
92
+ str(run.get("started_at", ""))[:19],
93
+ f"{run.get('duration_ms', '?')}ms",
94
+ str(run.get("total_spans", "?")),
95
+ f"[red]{run.get('errors', 0)}[/red]" if run.get("errors") else "0",
96
+ )
97
+
98
+ console.print(table)
99
+
100
+
101
+ @cli.command(name="open")
102
+ @click.argument("run_id_or_name")
103
+ def open_cmd(run_id_or_name: str):
104
+ """Open a trace HTML report in the browser."""
105
+ import webbrowser
106
+
107
+ from traceforge.storage.file_store import STORE_DIR
108
+
109
+ if not STORE_DIR.exists():
110
+ click.echo(f"Trace store {STORE_DIR} does not exist")
111
+ raise SystemExit(1)
112
+
113
+ matches = [
114
+ d for d in STORE_DIR.iterdir()
115
+ if d.is_dir() and run_id_or_name in d.name
116
+ ]
117
+ if not matches:
118
+ click.echo(f"No trace found matching {run_id_or_name!r}")
119
+ raise SystemExit(1)
120
+
121
+ report = matches[0] / "report.html"
122
+ if not report.exists():
123
+ click.echo(f"Report HTML not found for {matches[0].name}")
124
+ raise SystemExit(1)
125
+ webbrowser.open(f"file://{report.resolve()}")
126
+ click.echo(f"Opening {report}")
127
+
128
+
129
+ @cli.command()
130
+ @click.argument("run_id_or_name")
131
+ def show(run_id_or_name: str):
132
+ """Print a trace summary to the terminal."""
133
+ from traceforge.storage.file_store import load_trace
134
+ trace = load_trace(run_id_or_name)
135
+ trace.print_summary()
136
+
137
+
138
+ if __name__ == "__main__":
139
+ cli()
File without changes
@@ -0,0 +1,79 @@
1
+ """Anthropic AsyncAnthropic instrumentor."""
2
+ import time
3
+ from typing import TYPE_CHECKING, Optional
4
+
5
+ if TYPE_CHECKING:
6
+ from traceforge.tracer import RunContext
7
+
8
+
9
+ class _MockAnthropicResponse:
10
+ """Minimal mock Anthropic response for replay mode."""
11
+
12
+ def __init__(self, text: str):
13
+ self.content = [type("Block", (), {"text": text, "type": "text"})()]
14
+ self.usage = type("Usage", (), {"input_tokens": 0, "output_tokens": 0})()
15
+ self.stop_reason = "end_turn"
16
+
17
+
18
+ class AnthropicInstrumentor:
19
+ """Wraps `client.messages.create` on an Anthropic async client.
20
+
21
+ Usage:
22
+ async with tracer.run() as run:
23
+ instrumentor = AnthropicInstrumentor(run)
24
+ client = instrumentor.instrument(AsyncAnthropic())
25
+ # use client normally — every call is traced
26
+ """
27
+
28
+ def __init__(self, run: "RunContext", mock_interceptor=None):
29
+ self._run = run
30
+ self._mock = mock_interceptor
31
+
32
+ def instrument(self, client):
33
+ original_create = client.messages.create
34
+
35
+ async def traced_create(**kwargs):
36
+ messages = kwargs.get("messages", [])
37
+
38
+ if self._mock is not None:
39
+ cached = self._mock.get(messages)
40
+ if cached is not None:
41
+ self._run.record_llm_call(
42
+ provider="anthropic",
43
+ model=kwargs.get("model", "unknown"),
44
+ messages=messages,
45
+ response=cached,
46
+ system_prompt=kwargs.get("system"),
47
+ latency_ms=0,
48
+ temperature=kwargs.get("temperature"),
49
+ )
50
+ return _MockAnthropicResponse(cached)
51
+
52
+ start = time.time()
53
+ response = await original_create(**kwargs)
54
+ latency_ms = int((time.time() - start) * 1000)
55
+
56
+ response_text: Optional[str] = None
57
+ try:
58
+ response_text = response.content[0].text
59
+ except Exception:
60
+ response_text = str(response)
61
+
62
+ input_tokens = getattr(getattr(response, "usage", None), "input_tokens", None)
63
+ output_tokens = getattr(getattr(response, "usage", None), "output_tokens", None)
64
+
65
+ self._run.record_llm_call(
66
+ provider="anthropic",
67
+ model=kwargs.get("model", "unknown"),
68
+ messages=messages,
69
+ response=response_text,
70
+ system_prompt=kwargs.get("system"),
71
+ input_tokens=input_tokens,
72
+ output_tokens=output_tokens,
73
+ latency_ms=latency_ms,
74
+ temperature=kwargs.get("temperature"),
75
+ )
76
+ return response
77
+
78
+ client.messages.create = traced_create
79
+ return client
@@ -0,0 +1,75 @@
1
+ """LangChain instrumentation — manual.
2
+
3
+ Auto-patching LangChain runnables / chains is fragile across versions, so
4
+ TraceForge ships a *manual* helper: you call `record_chain_step` from your
5
+ LangChain callback handler (or anywhere you have a `RunContext`).
6
+
7
+ Example, inside a `BaseCallbackHandler.on_llm_end`:
8
+
9
+ from traceforge.integrations.langchain import LangChainInstrumentor
10
+
11
+ instrumentor = LangChainInstrumentor(run)
12
+ instrumentor.record_chain_step(
13
+ step_name="my_chain.llm_step",
14
+ inputs={"prompt": prompt},
15
+ outputs={"text": llm_result.generations[0][0].text},
16
+ )
17
+
18
+ No `langchain` import is required at module load — keeping this file safe to
19
+ import even when the optional `langchain` dependency is missing.
20
+ """
21
+ from typing import TYPE_CHECKING, Any, Optional
22
+
23
+ if TYPE_CHECKING:
24
+ from traceforge.tracer import RunContext
25
+
26
+
27
+ class LangChainInstrumentor:
28
+ """Manual LangChain bridge.
29
+
30
+ Constructor signature matches the other instrumentors so users can swap
31
+ them without learning a new API.
32
+ """
33
+
34
+ def __init__(self, run: "RunContext", mock_interceptor=None):
35
+ self._run = run
36
+ self._mock = mock_interceptor
37
+
38
+ def record_chain_step(
39
+ self,
40
+ step_name: str,
41
+ inputs: Any,
42
+ outputs: Any = None,
43
+ latency_ms: Optional[int] = None,
44
+ error: Optional[str] = None,
45
+ ):
46
+ """Record one LangChain step as a tool-call span.
47
+
48
+ We model chain steps as TOOL_CALL spans rather than LLM_CALL because
49
+ a single LangChain chain step may aggregate multiple LLM calls plus
50
+ local logic.
51
+ """
52
+ return self._run.record_tool_call(
53
+ tool_name=step_name,
54
+ tool_input=inputs,
55
+ tool_output=outputs,
56
+ latency_ms=latency_ms,
57
+ error=error,
58
+ )
59
+
60
+ def record_llm_step(
61
+ self,
62
+ model: str,
63
+ messages: list[dict],
64
+ response: str,
65
+ provider: str = "langchain",
66
+ **kwargs,
67
+ ):
68
+ """Record one underlying LLM call from inside a LangChain callback."""
69
+ return self._run.record_llm_call(
70
+ provider=provider,
71
+ model=model,
72
+ messages=messages,
73
+ response=response,
74
+ **kwargs,
75
+ )
@@ -0,0 +1,79 @@
1
+ """OpenAI AsyncOpenAI instrumentor."""
2
+ import time
3
+ from typing import TYPE_CHECKING, Optional
4
+
5
+ if TYPE_CHECKING:
6
+ from traceforge.tracer import RunContext
7
+
8
+
9
+ class _MockOpenAIResponse:
10
+ def __init__(self, text: str):
11
+ self.choices = [
12
+ type("Choice", (), {
13
+ "message": type("Message", (), {"content": text, "role": "assistant"})(),
14
+ "finish_reason": "stop",
15
+ "index": 0,
16
+ })()
17
+ ]
18
+ self.usage = type("Usage", (), {
19
+ "prompt_tokens": 0,
20
+ "completion_tokens": 0,
21
+ "total_tokens": 0,
22
+ })()
23
+
24
+
25
+ class OpenAIInstrumentor:
26
+ """Wraps `client.chat.completions.create` on an OpenAI async client."""
27
+
28
+ def __init__(self, run: "RunContext", mock_interceptor=None):
29
+ self._run = run
30
+ self._mock = mock_interceptor
31
+
32
+ def instrument(self, client):
33
+ original_create = client.chat.completions.create
34
+
35
+ async def traced_create(**kwargs):
36
+ messages = kwargs.get("messages", [])
37
+
38
+ if self._mock is not None:
39
+ cached = self._mock.get(messages)
40
+ if cached is not None:
41
+ self._run.record_llm_call(
42
+ provider="openai",
43
+ model=kwargs.get("model", "unknown"),
44
+ messages=messages,
45
+ response=cached,
46
+ latency_ms=0,
47
+ temperature=kwargs.get("temperature"),
48
+ )
49
+ return _MockOpenAIResponse(cached)
50
+
51
+ start = time.time()
52
+ response = await original_create(**kwargs)
53
+ latency_ms = int((time.time() - start) * 1000)
54
+
55
+ response_text: Optional[str] = None
56
+ try:
57
+ response_text = response.choices[0].message.content
58
+ except Exception:
59
+ response_text = str(response)
60
+
61
+ prompt_tokens = getattr(getattr(response, "usage", None), "prompt_tokens", None)
62
+ completion_tokens = getattr(
63
+ getattr(response, "usage", None), "completion_tokens", None
64
+ )
65
+
66
+ self._run.record_llm_call(
67
+ provider="openai",
68
+ model=kwargs.get("model", "unknown"),
69
+ messages=messages,
70
+ response=response_text,
71
+ input_tokens=prompt_tokens,
72
+ output_tokens=completion_tokens,
73
+ latency_ms=latency_ms,
74
+ temperature=kwargs.get("temperature"),
75
+ )
76
+ return response
77
+
78
+ client.chat.completions.create = traced_create
79
+ return client
traceforge/naming.py ADDED
@@ -0,0 +1,19 @@
1
+ import random
2
+
3
+ ADJECTIVES = [
4
+ "brave", "stoic", "amber", "swift", "calm", "bold", "keen",
5
+ "quiet", "sharp", "noble", "clear", "crisp", "firm", "warm",
6
+ "vast", "deep", "light", "dark", "soft", "hard", "bright",
7
+ "cool", "cold", "wise", "true", "pure", "free", "safe",
8
+ ]
9
+
10
+ NOUNS = [
11
+ "salmon", "crane", "wolf", "fox", "bear", "hawk", "owl",
12
+ "raven", "tiger", "lion", "whale", "seal", "deer", "elk",
13
+ "eagle", "heron", "finch", "robin", "wren", "swift",
14
+ "cedar", "maple", "birch", "pine", "oak", "ash", "elm",
15
+ ]
16
+
17
+
18
+ def generate_run_name() -> str:
19
+ return f"{random.choice(ADJECTIVES)}-{random.choice(NOUNS)}"
traceforge/pricing.py ADDED
@@ -0,0 +1,81 @@
1
+ """Token pricing table for cost estimation.
2
+
3
+ Prices are USD per 1M tokens. Vendors change pricing every few months, so
4
+ the table here is *best effort* — override per-Tracer for production use:
5
+
6
+ from traceforge.pricing import ModelPrice
7
+ tracer = Tracer(pricing={"my-model": ModelPrice(input_per_million=2.0,
8
+ output_per_million=8.0)})
9
+
10
+ Unknown models cost 0.0 (with a one-shot warning), so cost never blocks a
11
+ trace from being saved.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import warnings
16
+ from dataclasses import dataclass
17
+ from typing import Optional
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class ModelPrice:
22
+ input_per_million: float
23
+ output_per_million: float
24
+
25
+
26
+ # Best-effort published list prices as of mid-2026. Override per-Tracer for
27
+ # real production accounting.
28
+ DEFAULT_PRICING: dict[str, ModelPrice] = {
29
+ # Anthropic
30
+ "claude-opus-4-7": ModelPrice(15.00, 75.00),
31
+ "claude-opus-4-6": ModelPrice(15.00, 75.00),
32
+ "claude-sonnet-4-6": ModelPrice(3.00, 15.00),
33
+ "claude-sonnet-4-5": ModelPrice(3.00, 15.00),
34
+ "claude-haiku-4-5": ModelPrice(1.00, 5.00),
35
+ # OpenAI
36
+ "gpt-4o": ModelPrice(2.50, 10.00),
37
+ "gpt-4o-mini": ModelPrice(0.15, 0.60),
38
+ "gpt-4-turbo": ModelPrice(10.00, 30.00),
39
+ "o1": ModelPrice(15.00, 60.00),
40
+ "o1-mini": ModelPrice(3.00, 12.00),
41
+ # Local / free
42
+ "ollama": ModelPrice(0.0, 0.0),
43
+ "local": ModelPrice(0.0, 0.0),
44
+ }
45
+
46
+ _WARNED_MODELS: set[str] = set()
47
+
48
+
49
+ def _lookup(model: str, table: dict[str, ModelPrice]) -> Optional[ModelPrice]:
50
+ if model in table:
51
+ return table[model]
52
+ # Prefix match: "claude-haiku-4-5-20251001" → "claude-haiku-4-5"
53
+ # Longest matching prefix wins so "claude-opus-4-7" beats "claude-opus".
54
+ candidates = [k for k in table if model.startswith(k)]
55
+ if candidates:
56
+ return table[max(candidates, key=len)]
57
+ return None
58
+
59
+
60
+ def estimate_cost(
61
+ model: str,
62
+ input_tokens: Optional[int],
63
+ output_tokens: Optional[int],
64
+ pricing: Optional[dict[str, ModelPrice]] = None,
65
+ ) -> float:
66
+ """Return USD cost for a single LLM call. Returns 0.0 if model unknown."""
67
+ table = pricing if pricing is not None else DEFAULT_PRICING
68
+ price = _lookup(model, table)
69
+ if price is None:
70
+ if model not in _WARNED_MODELS:
71
+ _WARNED_MODELS.add(model)
72
+ warnings.warn(
73
+ f"TraceForge: no pricing for model {model!r}; cost will be 0. "
74
+ "Pass `pricing=` to `Tracer(...)` to override.",
75
+ stacklevel=2,
76
+ )
77
+ return 0.0
78
+ return (
79
+ (input_tokens or 0) / 1_000_000 * price.input_per_million
80
+ + (output_tokens or 0) / 1_000_000 * price.output_per_million
81
+ )