PyPI - traceforge-llm - Versions diffs - 0.2.0__py3-none-any.whl - Mend

traceforge-llm 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

traceforge/__init__.py +19 -0
traceforge/cli.py +139 -0
traceforge/integrations/__init__.py +0 -0
traceforge/integrations/anthropic.py +79 -0
traceforge/integrations/langchain.py +75 -0
traceforge/integrations/openai.py +79 -0
traceforge/naming.py +19 -0
traceforge/pricing.py +81 -0
traceforge/pytest_plugin.py +206 -0
traceforge/replay.py +118 -0
traceforge/report/__init__.py +0 -0
traceforge/report/html_report.py +218 -0
traceforge/report/jsonl.py +26 -0
traceforge/report/terminal.py +83 -0
traceforge/serialiser.py +105 -0
traceforge/span.py +62 -0
traceforge/storage/__init__.py +3 -0
traceforge/storage/file_store.py +80 -0
traceforge/trace.py +94 -0
traceforge/tracer.py +270 -0
traceforge_llm-0.2.0.dist-info/METADATA +564 -0
traceforge_llm-0.2.0.dist-info/RECORD +26 -0
traceforge_llm-0.2.0.dist-info/WHEEL +5 -0
traceforge_llm-0.2.0.dist-info/entry_points.txt +5 -0
traceforge_llm-0.2.0.dist-info/licenses/LICENSE +201 -0
traceforge_llm-0.2.0.dist-info/top_level.txt +1 -0

traceforge/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""TraceForge — agent runtime tracing and replay."""
+from traceforge.tracer import Tracer, RunContext, __version__
+from traceforge.serialiser import exclude, TraceSerialiseError
+from traceforge.span import Span, SpanType, LLMCallData, ToolCallData
+from traceforge.trace import Trace, TraceManifest
+__all__ = [
+    "Tracer",
+    "RunContext",
+    "exclude",
+    "TraceSerialiseError",
+    "Span",
+    "SpanType",
+    "LLMCallData",
+    "ToolCallData",
+    "Trace",
+    "TraceManifest",
+    "__version__",
+]

traceforge/cli.py ADDED Viewed

@@ -0,0 +1,139 @@
+"""TraceForge CLI: init, list, open, show."""
+from pathlib import Path
+import click
+@click.group()
+@click.version_option(package_name="traceforge-llm", prog_name="traceforge")
+def cli():
+    """TraceForge — agent runtime tracing and replay."""
+@cli.command()
+def init():
+    """Scaffold traceforge.yaml, agent.py example, .gitignore entry."""
+    Path("traceforge.yaml").write_text(
+        "auto_save: true\n"
+        "store_dir: .traceforge/runs\n"
+        "slim: false\n"
+    )
+    Path("agent.py").write_text(
+        '"""TraceForge example agent.\nRun: python agent.py\n"""\n'
+        "import asyncio\n"
+        "from anthropic import AsyncAnthropic\n"
+        "from traceforge import Tracer\n"
+        "from traceforge.integrations.anthropic import AnthropicInstrumentor\n"
+        "\n"
+        "tracer = Tracer()\n"
+        "\n"
+        "\n"
+        "async def main():\n"
+        "    async with tracer.run() as run:\n"
+        "        client = AnthropicInstrumentor(run).instrument(AsyncAnthropic())\n"
+        "        response = await client.messages.create(\n"
+        '            model="claude-haiku-4-5-20251001",\n'
+        "            max_tokens=256,\n"
+        '            system="You are a helpful assistant.",\n'
+        '            messages=[{"role": "user", "content": "What is 2 + 2?"}],\n'
+        "        )\n"
+        "        print(response.content[0].text)\n"
+        "\n"
+        "    trace = run.trace\n"
+        "    trace.print_summary()\n"
+        '    print(f"\\nReport saved: .traceforge/runs/<run-id>-<run-name>/report.html")\n'
+        "\n"
+        "\n"
+        'if __name__ == "__main__":\n'
+        "    asyncio.run(main())\n"
+    )
+    gitignore = Path(".gitignore")
+    existing = gitignore.read_text() if gitignore.exists() else ""
+    if ".traceforge/" not in existing:
+        with gitignore.open("a") as f:
+            f.write("\n.traceforge/\n")
+    click.echo("Created traceforge.yaml")
+    click.echo("Created agent.py (example)")
+    click.echo("Updated .gitignore")
+    click.echo("\nNext: python agent.py")
+@cli.command(name="list")
+def list_runs():
+    """List all local traces."""
+    from rich.console import Console
+    from rich.table import Table
+    from rich import box
+    from traceforge.storage.file_store import list_traces
+    console = Console()
+    runs = list_traces()
+    if not runs:
+        console.print("[dim]No traces found. Run your agent first.[/dim]")
+        return
+    table = Table(box=box.SIMPLE, show_header=True, header_style="bold")
+    table.add_column("Run name")
+    table.add_column("Run ID")
+    table.add_column("Started")
+    table.add_column("Duration")
+    table.add_column("Spans", justify="right")
+    table.add_column("Errors", justify="right")
+    for run in runs:
+        table.add_row(
+            f"[cyan]{run['run_name']}[/cyan]",
+            f"[dim]{run['run_id'][:8]}...[/dim]",
+            str(run.get("started_at", ""))[:19],
+            f"{run.get('duration_ms', '?')}ms",
+            str(run.get("total_spans", "?")),
+            f"[red]{run.get('errors', 0)}[/red]" if run.get("errors") else "0",
+        )
+    console.print(table)
+@cli.command(name="open")
+@click.argument("run_id_or_name")
+def open_cmd(run_id_or_name: str):
+    """Open a trace HTML report in the browser."""
+    import webbrowser
+    from traceforge.storage.file_store import STORE_DIR
+    if not STORE_DIR.exists():
+        click.echo(f"Trace store {STORE_DIR} does not exist")
+        raise SystemExit(1)
+    matches = [
+        d for d in STORE_DIR.iterdir()
+        if d.is_dir() and run_id_or_name in d.name
+    ]
+    if not matches:
+        click.echo(f"No trace found matching {run_id_or_name!r}")
+        raise SystemExit(1)
+    report = matches[0] / "report.html"
+    if not report.exists():
+        click.echo(f"Report HTML not found for {matches[0].name}")
+        raise SystemExit(1)
+    webbrowser.open(f"file://{report.resolve()}")
+    click.echo(f"Opening {report}")
+@cli.command()
+@click.argument("run_id_or_name")
+def show(run_id_or_name: str):
+    """Print a trace summary to the terminal."""
+    from traceforge.storage.file_store import load_trace
+    trace = load_trace(run_id_or_name)
+    trace.print_summary()
+if __name__ == "__main__":
+    cli()

traceforge/integrations/__init__.py ADDED Viewed

File without changes

traceforge/integrations/anthropic.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""Anthropic AsyncAnthropic instrumentor."""
+import time
+from typing import TYPE_CHECKING, Optional
+if TYPE_CHECKING:
+    from traceforge.tracer import RunContext
+class _MockAnthropicResponse:
+    """Minimal mock Anthropic response for replay mode."""
+    def __init__(self, text: str):
+        self.content = [type("Block", (), {"text": text, "type": "text"})()]
+        self.usage = type("Usage", (), {"input_tokens": 0, "output_tokens": 0})()
+        self.stop_reason = "end_turn"
+class AnthropicInstrumentor:
+    """Wraps `client.messages.create` on an Anthropic async client.
+    Usage:
+        async with tracer.run() as run:
+            instrumentor = AnthropicInstrumentor(run)
+            client = instrumentor.instrument(AsyncAnthropic())
+            # use client normally — every call is traced
+    """
+    def __init__(self, run: "RunContext", mock_interceptor=None):
+        self._run = run
+        self._mock = mock_interceptor
+    def instrument(self, client):
+        original_create = client.messages.create
+        async def traced_create(**kwargs):
+            messages = kwargs.get("messages", [])
+            if self._mock is not None:
+                cached = self._mock.get(messages)
+                if cached is not None:
+                    self._run.record_llm_call(
+                        provider="anthropic",
+                        model=kwargs.get("model", "unknown"),
+                        messages=messages,
+                        response=cached,
+                        system_prompt=kwargs.get("system"),
+                        latency_ms=0,
+                        temperature=kwargs.get("temperature"),
+                    )
+                    return _MockAnthropicResponse(cached)
+            start = time.time()
+            response = await original_create(**kwargs)
+            latency_ms = int((time.time() - start) * 1000)
+            response_text: Optional[str] = None
+            try:
+                response_text = response.content[0].text
+            except Exception:
+                response_text = str(response)
+            input_tokens = getattr(getattr(response, "usage", None), "input_tokens", None)
+            output_tokens = getattr(getattr(response, "usage", None), "output_tokens", None)
+            self._run.record_llm_call(
+                provider="anthropic",
+                model=kwargs.get("model", "unknown"),
+                messages=messages,
+                response=response_text,
+                system_prompt=kwargs.get("system"),
+                input_tokens=input_tokens,
+                output_tokens=output_tokens,
+                latency_ms=latency_ms,
+                temperature=kwargs.get("temperature"),
+            )
+            return response
+        client.messages.create = traced_create
+        return client

traceforge/integrations/langchain.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""LangChain instrumentation — manual.
+Auto-patching LangChain runnables / chains is fragile across versions, so
+TraceForge ships a *manual* helper: you call `record_chain_step` from your
+LangChain callback handler (or anywhere you have a `RunContext`).
+Example, inside a `BaseCallbackHandler.on_llm_end`:
+    from traceforge.integrations.langchain import LangChainInstrumentor
+    instrumentor = LangChainInstrumentor(run)
+    instrumentor.record_chain_step(
+        step_name="my_chain.llm_step",
+        inputs={"prompt": prompt},
+        outputs={"text": llm_result.generations[0][0].text},
+    )
+No `langchain` import is required at module load — keeping this file safe to
+import even when the optional `langchain` dependency is missing.
+"""
+from typing import TYPE_CHECKING, Any, Optional
+if TYPE_CHECKING:
+    from traceforge.tracer import RunContext
+class LangChainInstrumentor:
+    """Manual LangChain bridge.
+    Constructor signature matches the other instrumentors so users can swap
+    them without learning a new API.
+    """
+    def __init__(self, run: "RunContext", mock_interceptor=None):
+        self._run = run
+        self._mock = mock_interceptor
+    def record_chain_step(
+        self,
+        step_name: str,
+        inputs: Any,
+        outputs: Any = None,
+        latency_ms: Optional[int] = None,
+        error: Optional[str] = None,
+    ):
+        """Record one LangChain step as a tool-call span.
+        We model chain steps as TOOL_CALL spans rather than LLM_CALL because
+        a single LangChain chain step may aggregate multiple LLM calls plus
+        local logic.
+        """
+        return self._run.record_tool_call(
+            tool_name=step_name,
+            tool_input=inputs,
+            tool_output=outputs,
+            latency_ms=latency_ms,
+            error=error,
+        )
+    def record_llm_step(
+        self,
+        model: str,
+        messages: list[dict],
+        response: str,
+        provider: str = "langchain",
+        **kwargs,
+    ):
+        """Record one underlying LLM call from inside a LangChain callback."""
+        return self._run.record_llm_call(
+            provider=provider,
+            model=model,
+            messages=messages,
+            response=response,
+            **kwargs,
+        )

traceforge/integrations/openai.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""OpenAI AsyncOpenAI instrumentor."""
+import time
+from typing import TYPE_CHECKING, Optional
+if TYPE_CHECKING:
+    from traceforge.tracer import RunContext
+class _MockOpenAIResponse:
+    def __init__(self, text: str):
+        self.choices = [
+            type("Choice", (), {
+                "message": type("Message", (), {"content": text, "role": "assistant"})(),
+                "finish_reason": "stop",
+                "index": 0,
+            })()
+        ]
+        self.usage = type("Usage", (), {
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "total_tokens": 0,
+        })()
+class OpenAIInstrumentor:
+    """Wraps `client.chat.completions.create` on an OpenAI async client."""
+    def __init__(self, run: "RunContext", mock_interceptor=None):
+        self._run = run
+        self._mock = mock_interceptor
+    def instrument(self, client):
+        original_create = client.chat.completions.create
+        async def traced_create(**kwargs):
+            messages = kwargs.get("messages", [])
+            if self._mock is not None:
+                cached = self._mock.get(messages)
+                if cached is not None:
+                    self._run.record_llm_call(
+                        provider="openai",
+                        model=kwargs.get("model", "unknown"),
+                        messages=messages,
+                        response=cached,
+                        latency_ms=0,
+                        temperature=kwargs.get("temperature"),
+                    )
+                    return _MockOpenAIResponse(cached)
+            start = time.time()
+            response = await original_create(**kwargs)
+            latency_ms = int((time.time() - start) * 1000)
+            response_text: Optional[str] = None
+            try:
+                response_text = response.choices[0].message.content
+            except Exception:
+                response_text = str(response)
+            prompt_tokens = getattr(getattr(response, "usage", None), "prompt_tokens", None)
+            completion_tokens = getattr(
+                getattr(response, "usage", None), "completion_tokens", None
+            )
+            self._run.record_llm_call(
+                provider="openai",
+                model=kwargs.get("model", "unknown"),
+                messages=messages,
+                response=response_text,
+                input_tokens=prompt_tokens,
+                output_tokens=completion_tokens,
+                latency_ms=latency_ms,
+                temperature=kwargs.get("temperature"),
+            )
+            return response
+        client.chat.completions.create = traced_create
+        return client

traceforge/naming.py ADDED Viewed

@@ -0,0 +1,19 @@
+import random
+ADJECTIVES = [
+    "brave", "stoic", "amber", "swift", "calm", "bold", "keen",
+    "quiet", "sharp", "noble", "clear", "crisp", "firm", "warm",
+    "vast", "deep", "light", "dark", "soft", "hard", "bright",
+    "cool", "cold", "wise", "true", "pure", "free", "safe",
+]
+NOUNS = [
+    "salmon", "crane", "wolf", "fox", "bear", "hawk", "owl",
+    "raven", "tiger", "lion", "whale", "seal", "deer", "elk",
+    "eagle", "heron", "finch", "robin", "wren", "swift",
+    "cedar", "maple", "birch", "pine", "oak", "ash", "elm",
+]
+def generate_run_name() -> str:
+    return f"{random.choice(ADJECTIVES)}-{random.choice(NOUNS)}"

traceforge/pricing.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Token pricing table for cost estimation.
+Prices are USD per 1M tokens. Vendors change pricing every few months, so
+the table here is *best effort* — override per-Tracer for production use:
+    from traceforge.pricing import ModelPrice
+    tracer = Tracer(pricing={"my-model": ModelPrice(input_per_million=2.0,
+                                                    output_per_million=8.0)})
+Unknown models cost 0.0 (with a one-shot warning), so cost never blocks a
+trace from being saved.
+"""
+from __future__ import annotations
+import warnings
+from dataclasses import dataclass
+from typing import Optional
+@dataclass(frozen=True)
+class ModelPrice:
+    input_per_million: float
+    output_per_million: float
+# Best-effort published list prices as of mid-2026. Override per-Tracer for
+# real production accounting.
+DEFAULT_PRICING: dict[str, ModelPrice] = {
+    # Anthropic
+    "claude-opus-4-7":           ModelPrice(15.00, 75.00),
+    "claude-opus-4-6":           ModelPrice(15.00, 75.00),
+    "claude-sonnet-4-6":         ModelPrice(3.00, 15.00),
+    "claude-sonnet-4-5":         ModelPrice(3.00, 15.00),
+    "claude-haiku-4-5":          ModelPrice(1.00, 5.00),
+    # OpenAI
+    "gpt-4o":                    ModelPrice(2.50, 10.00),
+    "gpt-4o-mini":               ModelPrice(0.15, 0.60),
+    "gpt-4-turbo":               ModelPrice(10.00, 30.00),
+    "o1":                        ModelPrice(15.00, 60.00),
+    "o1-mini":                   ModelPrice(3.00, 12.00),
+    # Local / free
+    "ollama":                    ModelPrice(0.0, 0.0),
+    "local":                     ModelPrice(0.0, 0.0),
+}
+_WARNED_MODELS: set[str] = set()
+def _lookup(model: str, table: dict[str, ModelPrice]) -> Optional[ModelPrice]:
+    if model in table:
+        return table[model]
+    # Prefix match: "claude-haiku-4-5-20251001" → "claude-haiku-4-5"
+    # Longest matching prefix wins so "claude-opus-4-7" beats "claude-opus".
+    candidates = [k for k in table if model.startswith(k)]
+    if candidates:
+        return table[max(candidates, key=len)]
+    return None
+def estimate_cost(
+    model: str,
+    input_tokens: Optional[int],
+    output_tokens: Optional[int],
+    pricing: Optional[dict[str, ModelPrice]] = None,
+) -> float:
+    """Return USD cost for a single LLM call. Returns 0.0 if model unknown."""
+    table = pricing if pricing is not None else DEFAULT_PRICING
+    price = _lookup(model, table)
+    if price is None:
+        if model not in _WARNED_MODELS:
+            _WARNED_MODELS.add(model)
+            warnings.warn(
+                f"TraceForge: no pricing for model {model!r}; cost will be 0. "
+                "Pass `pricing=` to `Tracer(...)` to override.",
+                stacklevel=2,
+            )
+        return 0.0
+    return (
+        (input_tokens or 0) / 1_000_000 * price.input_per_million
+        + (output_tokens or 0) / 1_000_000 * price.output_per_million
+    )