PyPI - trace-ai-python - Versions diffs - 0.1.0__py3-none-any.whl - Mend

trace-ai-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

trace_ai_python-0.1.0.dist-info/METADATA +144 -0
trace_ai_python-0.1.0.dist-info/RECORD +7 -0
trace_ai_python-0.1.0.dist-info/WHEEL +4 -0
traceai/__init__.py +6 -0
traceai/_cost.py +36 -0
traceai/langchain.py +284 -0
traceai/tracer.py +116 -0

trace_ai_python-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,144 @@
+Metadata-Version: 2.4
+Name: trace-ai-python
+Version: 0.1.0
+Summary: Observability for LLM workflows — tokens, latency, cost, and anomaly detection
+Project-URL: Homepage, https://use-trace-ai.vercel.app
+Project-URL: Repository, https://github.com/joshuakim314/trace
+Author-email: "trace.ai" <jjkk@umich.edu>
+License: MIT
+Keywords: anthropic,langchain,llm,observability,openai,tracing
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.9
+Provides-Extra: langchain
+Requires-Dist: langchain-core>=0.1.0; extra == 'langchain'
+Description-Content-Type: text/markdown
+# traceai
+Python SDK for [trace.ai](https://use-trace-ai.vercel.app) — observability for LLM workflows.
+Automatically captures tokens, latency, cost, and anomaly scores for every LLM call.
+## Installation
+```bash
+pip install traceai              # core — manual ingest()
+pip install traceai[langchain]   # + LangChain callback handler (Anthropic, OpenAI, etc.)
+```
+## LangChain (recommended)
+Attach `TraceAICallbackHandler` to any LangChain LLM — every call is traced automatically:
+```python
+from traceai import Tracer
+from traceai.langchain import TraceAICallbackHandler
+from langchain_anthropic import ChatAnthropic
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+tracer  = Tracer(api_key="trace_...")
+handler = TraceAICallbackHandler(tracer)
+llm   = ChatAnthropic(model="claude-haiku-4-5-20251001", callbacks=[handler])
+chain = ChatPromptTemplate.from_template("Summarize: {text}") | llm | StrOutputParser()
+chain.invoke({"text": "..."})
+# → shows up in your dashboard automatically
+```
+Works with any LangChain-compatible provider: Anthropic, OpenAI, Gemini, Cohere, and more.
+## Step naming
+Pass `step_name` in config metadata to label steps in the dashboard:
+```python
+chain.invoke(
+    {"text": "..."},
+    config={"metadata": {"step_name": "summarize"}}
+)
+```
+Without a name, the step is labeled from the serialized model name (e.g. `ChatAnthropic`).
+## Multi-step pipelines
+Steps inside a single `chain.invoke()` are automatically grouped into one run in the dashboard. Use `RunnableLambda` to wrap multi-step workflows:
+```python
+from langchain_core.runnables import RunnableLambda
+from langchain_core.messages import SystemMessage, HumanMessage
+def pipeline(inputs, config):
+    intent = llm.invoke(
+        [SystemMessage(content="Classify as: billing, technical, general."),
+         HumanMessage(content=inputs["message"])],
+        config={**config, "metadata": {"step_name": "classify"}},
+    )
+    reply = llm.invoke(
+        [SystemMessage(content="You are a support agent. Be concise."),
+         HumanMessage(content=inputs["message"])],
+        config={**config, "metadata": {"step_name": "generate"}},
+    )
+    return reply.content
+chain = RunnableLambda(pipeline)
+chain.invoke({"message": "..."}, config={"callbacks": [handler]})
+# → both steps appear under one run_id in the dashboard
+```
+## Manual ingest
+For models outside LangChain, or to record any custom step:
+```python
+import time, json
+start    = time.monotonic()
+response = my_model.generate(prompt)
+latency  = int((time.monotonic() - start) * 1000)
+tracer.ingest(
+    run_id        = "my-run-id",
+    step_name     = "generate",
+    step_index    = 0,
+    model         = "my-model",
+    prompt        = json.dumps({"messages": [{"role": "user", "content": prompt}]}),
+    input_tokens  = response.input_tokens,
+    output_tokens = response.output_tokens,
+    total_tokens  = response.total_tokens,
+    latency_ms    = latency,
+    cost          = 0.001,
+    status_success= True,
+    output_code   = response.text,
+)
+```
+`ingest()` fires in a background thread and never blocks your application.
+## Configuration
+```python
+import os
+from traceai import Tracer
+tracer = Tracer(
+    api_key = os.environ["TRACE_API_KEY"],
+    api_url = os.environ.get("TRACE_API_URL", "https://trace-production-940c.up.railway.app"),
+)
+```
+For local dev, add to `.env`:
+```
+TRACE_API_KEY=trace_...
+TRACE_API_URL=http://localhost:8000
+```
+## Links
+- [Dashboard](https://use-trace-ai.vercel.app)
+- [Documentation](https://use-trace-ai.vercel.app/docs)
+- [TypeScript SDK](../sdk/)

trace_ai_python-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+traceai/__init__.py,sha256=-OcE-D7c0d-rR8SeNydhWFjZKGV_zuD50VENX2VkZnw,146
+traceai/_cost.py,sha256=bNIKgx7Y1CUe5NJxJxZkgdHxZgv_28bKxkBzX2rEv2g,1555
+traceai/langchain.py,sha256=JKZQC-CL-RHLxIB0rADYd3xSa3qcZrG93VUqIGX30NU,10234
+traceai/tracer.py,sha256=qv6JtLmkvLegaFvBiuvDjNbEb9HgH7CjotyB-9hwaRc,4187
+trace_ai_python-0.1.0.dist-info/METADATA,sha256=jGyh_2TMNKh4AjBegcDZVddRaNzc9mIcTTeiuFVDeqA,4404
+trace_ai_python-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+trace_ai_python-0.1.0.dist-info/RECORD,,

trace_ai_python-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

traceai/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""traceai — LLM observability for Python."""
+from .tracer import RunContext, Tracer
+__all__ = ["Tracer", "RunContext"]
+__version__ = "0.1.0"

traceai/_cost.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""Pricing table for cost calculation — mirrors sdk/src/cost.ts."""
+from __future__ import annotations
+_PRICING: dict[str, tuple[float, float]] = {
+    # (input_per_1m_usd, output_per_1m_usd)
+    # Anthropic
+    "claude-opus-4-8":                  (15.0,  75.0),
+    "claude-opus-4-8-20251101":         (15.0,  75.0),
+    "claude-sonnet-4-6":                (3.0,   15.0),
+    "claude-sonnet-4-6-20251001":       (3.0,   15.0),
+    "claude-haiku-4-5":                 (0.8,   4.0),
+    "claude-haiku-4-5-20251001":        (0.8,   4.0),
+    "claude-3-5-sonnet-20241022":       (3.0,   15.0),
+    "claude-3-5-haiku-20241022":        (0.8,   4.0),
+    "claude-3-opus-20240229":           (15.0,  75.0),
+    # OpenAI
+    "gpt-4o":                           (2.5,   10.0),
+    "gpt-4o-2024-11-20":                (2.5,   10.0),
+    "gpt-4o-mini":                      (0.15,  0.6),
+    "gpt-4o-mini-2024-07-18":           (0.15,  0.6),
+    "gpt-4-turbo":                      (10.0,  30.0),
+    "gpt-4":                            (30.0,  60.0),
+    "gpt-3.5-turbo":                    (0.5,   1.5),
+    "o1":                               (15.0,  60.0),
+    "o1-mini":                          (3.0,   12.0),
+    "o3-mini":                          (1.1,   4.4),
+}
+def get_cost(model: str, input_tokens: int, output_tokens: int) -> float:
+    pricing = _PRICING.get(model)
+    if not pricing:
+        return 0.0
+    input_per_1m, output_per_1m = pricing
+    return (input_tokens / 1_000_000) * input_per_1m + (output_tokens / 1_000_000) * output_per_1m

traceai/langchain.py ADDED Viewed

@@ -0,0 +1,284 @@
+"""LangChain callback handler for trace.ai.
+Attach to any LangChain LLM or chain — every LLM call is automatically traced:
+    from traceai import Tracer
+    from traceai.langchain import TraceAICallbackHandler
+    tracer  = Tracer(api_key="trace_...")
+    handler = TraceAICallbackHandler(tracer)
+    llm   = ChatAnthropic(model="claude-haiku-4-5-20251001", callbacks=[handler])
+    chain = prompt | llm | StrOutputParser()
+    chain.invoke({"topic": "AI safety"})
+Run grouping
+------------
+LangChain passes a `run_id` (UUID) to each LLM call and a `parent_run_id` for
+the chain that contains it. We use the immediate parent as the trace.ai run_id so
+all LLM calls inside a single chain.invoke() share one run in the dashboard.
+Step naming
+-----------
+Priority order:
+  1. metadata["step_name"] passed in invoke() / run_config
+  2. serialized["name"]  (e.g. "ChatAnthropic", "ChatOpenAI")
+  3. "llm_call"
+Thread safety
+-------------
+The handler can be shared across concurrent requests (threaded Flask, HTTPServer,
+etc.). All per-call state is protected by a single RLock.
+"""
+from __future__ import annotations
+import json
+import threading
+import time
+from typing import Any
+from uuid import UUID
+try:
+    from langchain_core.callbacks import BaseCallbackHandler
+    from langchain_core.messages import BaseMessage
+    from langchain_core.outputs import LLMResult
+except ImportError as e:
+    raise ImportError(
+        "langchain-core is required: pip install traceai[langchain]"
+    ) from e
+from ._cost import get_cost
+from .tracer import Tracer
+def _extract_tokens_anthropic(llm_output: dict) -> tuple[int, int]:
+    usage = llm_output.get("usage", {})
+    inp = usage.get("input_tokens") or usage.get("prompt_tokens") or 0
+    out = usage.get("output_tokens") or usage.get("completion_tokens") or 0
+    return int(inp), int(out)
+def _extract_tokens_openai(llm_output: dict) -> tuple[int, int]:
+    usage = llm_output.get("token_usage", {})
+    inp = usage.get("prompt_tokens") or 0
+    out = usage.get("completion_tokens") or 0
+    return int(inp), int(out)
+def _extract_tokens(llm_output: dict) -> tuple[int, int]:
+    inp, out = _extract_tokens_anthropic(llm_output)
+    if inp or out:
+        return inp, out
+    return _extract_tokens_openai(llm_output)
+def _extract_model(llm_output: dict, serialized: dict) -> str:
+    return (
+        llm_output.get("model")
+        or llm_output.get("model_name")
+        or llm_output.get("model_id")
+        or (serialized.get("kwargs") or {}).get("model")
+        or (serialized.get("kwargs") or {}).get("model_name")
+        or serialized.get("name", "unknown")
+    )
+def _serialize_messages(messages: list[list[BaseMessage]]) -> str:
+    out = []
+    for batch in messages:
+        for msg in batch:
+            role = getattr(msg, "type", "unknown")
+            role = {"human": "user", "ai": "assistant", "system": "system"}.get(role, role)
+            content = msg.content if isinstance(msg.content, str) else json.dumps(msg.content)
+            out.append({"role": role, "content": content})
+    return json.dumps({"messages": out})
+def _extract_output(response: LLMResult) -> str | None:
+    try:
+        gen = response.generations[0][0]
+        if hasattr(gen, "message"):
+            content = gen.message.content
+            if isinstance(content, str):
+                return content
+            if isinstance(content, list):
+                return " ".join(
+                    b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text"
+                )
+        return getattr(gen, "text", None)
+    except (IndexError, AttributeError):
+        return None
+class TraceAICallbackHandler(BaseCallbackHandler):
+    """Attach to any LangChain LLM or chain to automatically trace every call."""
+    def __init__(self, tracer: Tracer) -> None:
+        super().__init__()
+        self.tracer = tracer
+        self._lock = threading.RLock()
+        # run_id (LangChain UUID) → wall-clock start time
+        self._start_times: dict[UUID, float] = {}
+        # run_id → serialized dict (for model name extraction in on_llm_end)
+        self._serialized: dict[UUID, dict] = {}
+        # run_id → prompt string
+        self._prompts: dict[UUID, str] = {}
+        # run_id → step_name
+        self._step_names: dict[UUID, str] = {}
+        # trace_run_id (str) → step counter
+        self._step_counters: dict[str, int] = {}
+    # ── Helpers ───────────────────────────────────────────────────────────────
+    def _trace_run_id(self, lc_run_id: UUID, parent_run_id: UUID | None) -> str:
+        """Map LangChain's run hierarchy to a trace.ai run_id.
+        The immediate parent (chain's run_id) becomes the trace.ai run_id so
+        all LLM calls inside one chain.invoke() share a single run.
+        If there's no parent (bare LLM call), the LLM's own run_id is used.
+        """
+        return str(parent_run_id) if parent_run_id else str(lc_run_id)
+    def _next_step_index(self, trace_run_id: str) -> int:
+        with self._lock:
+            idx = self._step_counters.get(trace_run_id, 0)
+            self._step_counters[trace_run_id] = idx + 1
+        return idx
+    def _step_name(self, run_id: UUID, serialized: dict, metadata: dict | None) -> str:
+        if metadata and metadata.get("step_name"):
+            return str(metadata["step_name"])
+        return serialized.get("name") or "llm_call"
+    def _pop_start(self, run_id: UUID) -> float | None:
+        with self._lock:
+            return self._start_times.pop(run_id, None)
+    def _pop_state(self, run_id: UUID) -> tuple[dict, str, str]:
+        with self._lock:
+            serialized = self._serialized.pop(run_id, {})
+            prompt     = self._prompts.pop(run_id, "")
+            step_name  = self._step_names.pop(run_id, "llm_call")
+        return serialized, prompt, step_name
+    # ── LangChain callbacks ───────────────────────────────────────────────────
+    def on_chat_model_start(
+        self,
+        serialized: dict[str, Any],
+        messages: list[list[BaseMessage]],
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        metadata: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        with self._lock:
+            self._start_times[run_id] = time.monotonic()
+            self._serialized[run_id]  = serialized
+            self._prompts[run_id]     = _serialize_messages(messages)
+            self._step_names[run_id]  = self._step_name(run_id, serialized, metadata)
+    def on_llm_start(
+        self,
+        serialized: dict[str, Any],
+        prompts: list[str],
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        metadata: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        with self._lock:
+            self._start_times[run_id] = time.monotonic()
+            self._serialized[run_id]  = serialized
+            self._prompts[run_id]     = json.dumps({"messages": [{"role": "user", "content": p} for p in prompts]})
+            self._step_names[run_id]  = self._step_name(run_id, serialized, metadata)
+    def on_llm_end(
+        self,
+        response: LLMResult,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> None:
+        start = self._pop_start(run_id)
+        latency_ms = int((time.monotonic() - start) * 1000) if start is not None else 0
+        serialized, prompt, step_name = self._pop_state(run_id)
+        llm_output            = response.llm_output or {}
+        input_tok, output_tok = _extract_tokens(llm_output)
+        total_tok  = input_tok + output_tok
+        model      = _extract_model(llm_output, serialized)
+        cost       = get_cost(model, input_tok, output_tok)
+        output     = _extract_output(response)
+        trace_run_id   = self._trace_run_id(run_id, parent_run_id)
+        step_index     = self._next_step_index(trace_run_id)
+        span_id        = str(run_id)
+        parent_span_id = str(parent_run_id) if parent_run_id and str(parent_run_id) != trace_run_id else None
+        self.tracer.ingest(
+            run_id=trace_run_id,
+            step_name=step_name,
+            step_index=step_index,
+            model=model,
+            prompt=prompt,
+            input_tokens=input_tok,
+            output_tokens=output_tok,
+            total_tokens=total_tok,
+            latency_ms=latency_ms,
+            cost=cost,
+            status_success=True,
+            output_code=output,
+            span_id=span_id,
+            parent_span_id=parent_span_id,
+        )
+    def on_llm_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> None:
+        start = self._pop_start(run_id)
+        latency_ms = int((time.monotonic() - start) * 1000) if start is not None else 0
+        serialized, prompt, step_name = self._pop_state(run_id)
+        model = _extract_model({}, serialized)
+        trace_run_id   = self._trace_run_id(run_id, parent_run_id)
+        step_index     = self._next_step_index(trace_run_id)
+        span_id        = str(run_id)
+        parent_span_id = str(parent_run_id) if parent_run_id and str(parent_run_id) != trace_run_id else None
+        self.tracer.ingest(
+            run_id=trace_run_id,
+            step_name=step_name,
+            step_index=step_index,
+            model=model,
+            prompt=prompt,
+            input_tokens=0,
+            output_tokens=0,
+            total_tokens=0,
+            latency_ms=latency_ms,
+            cost=0.0,
+            status_success=False,
+            error=str(error),
+            span_id=span_id,
+            parent_span_id=parent_span_id,
+        )
+    def on_chain_end(
+        self,
+        outputs: dict[str, Any],
+        *,
+        run_id: UUID,
+        **kwargs: Any,
+    ) -> None:
+        with self._lock:
+            self._step_counters.pop(str(run_id), None)

traceai/tracer.py ADDED Viewed

@@ -0,0 +1,116 @@
+"""Core Tracer — fire-and-forget ingest + run context management."""
+from __future__ import annotations
+import json
+import threading
+import uuid as _uuid
+from contextlib import contextmanager
+from contextvars import ContextVar
+from typing import Any, Generator
+from urllib import request as _urllib_request
+_DEFAULT_URL = "https://trace-production-940c.up.railway.app"
+# ContextVar so run_id propagates automatically across async/threaded code
+_active_run_id: ContextVar[str | None] = ContextVar("traceai_run_id", default=None)
+_active_step_index: ContextVar[int] = ContextVar("traceai_step_index", default=0)
+def _new_uuid() -> str:
+    return str(_uuid.uuid4())
+class Tracer:
+    """
+    trace.ai Python client.
+    Usage::
+        tracer = Tracer(api_key="trace_...")
+        # Manual ingest (any framework)
+        tracer.ingest(
+            run_id="my-run",
+            step_name="classify",
+            step_index=0,
+            model="claude-haiku-4-5-20251001",
+            prompt=json.dumps({"messages": [...]}),
+            input_tokens=12,
+            output_tokens=4,
+            total_tokens=16,
+            latency_ms=84,
+            status_success=True,
+            output_code="billing",
+        )
+        # LangChain — see traceai.langchain.TraceAICallbackHandler
+    """
+    def __init__(self, api_key: str, api_url: str = "") -> None:
+        self.api_key = api_key
+        # Empty string falls back to default so that os.environ.get("TRACE_API_URL", "")
+        # behaves the same as not passing api_url at all.
+        self.api_url = (api_url or _DEFAULT_URL).rstrip("/")
+    # ── Ingest ────────────────────────────────────────────────────────────────
+    def ingest(self, **fields: Any) -> None:
+        """Fire-and-forget POST to /ingest. Never raises — failures are silent."""
+        threading.Thread(target=self._post, args=(fields,), daemon=True).start()
+    def _post(self, payload: dict[str, Any]) -> None:
+        try:
+            data = json.dumps(payload).encode()
+            req = _urllib_request.Request(
+                f"{self.api_url}/ingest",
+                data=data,
+                headers={
+                    "Content-Type": "application/json",
+                    "Authorization": f"Bearer {self.api_key}",
+                },
+                method="POST",
+            )
+            _urllib_request.urlopen(req, timeout=10)
+        except Exception:
+            pass  # never block the application
+    # ── Run context ───────────────────────────────────────────────────────────
+    @contextmanager
+    def run(self, run_id: str | None = None) -> Generator["RunContext", None, None]:
+        """Context manager that sets a run ID for the duration of a block.
+        Use this when you're not using LangChain and want to group manual
+        ingest() calls into a single run::
+            with tracer.run() as run:
+                tracer.ingest(run_id=run.run_id, step_name="step1", ...)
+                tracer.ingest(run_id=run.run_id, step_name="step2", ...)
+        """
+        rid = run_id or _new_uuid()
+        token_id  = _active_run_id.set(rid)
+        token_idx = _active_step_index.set(0)
+        ctx = RunContext(run_id=rid)
+        try:
+            yield ctx
+        finally:
+            _active_run_id.reset(token_id)
+            _active_step_index.reset(token_idx)
+    # ── Helpers for handlers ──────────────────────────────────────────────────
+    def get_active_run_id(self) -> str | None:
+        return _active_run_id.get()
+    def next_step_index(self) -> int:
+        idx = _active_step_index.get()
+        _active_step_index.set(idx + 1)
+        return idx
+class RunContext:
+    """Returned by Tracer.run() — holds the run_id for the current block."""
+    def __init__(self, run_id: str) -> None:
+        self.run_id = run_id