trace-sdk 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ # Python bytecode and caches
2
+ __pycache__/
3
+ **/__pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+ .pytest_cache/
7
+ .mypy_cache/
8
+ .ruff_cache/
9
+ .coverage
10
+ htmlcov/
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ .venv39/
15
+ venv/
16
+ env/
17
+
18
+ # Build and packaging output
19
+ build/
20
+ dist/
21
+ *.egg-info/
22
+
23
+ # Trace local runtime data
24
+ .trace/
25
+
26
+ # TraceHub frontend dependencies and build output
27
+ tracehub/node_modules/
28
+ tracehub/dist/
29
+
30
+ # Environment files and local secrets
31
+ .env
32
+ .env.*
33
+ !.env.example
34
+ *.local
35
+ pypiToken
36
+ pypiToken.txt
37
+ .pypirc
38
+
39
+ # Editor and OS noise
40
+ .idea/
41
+ .vscode/
42
+ .DS_Store
43
+ Thumbs.db
@@ -0,0 +1,57 @@
1
+ Metadata-Version: 2.4
2
+ Name: trace-sdk
3
+ Version: 0.1.1
4
+ Summary: Local version control for AI agent behavior.
5
+ Requires-Python: >=3.9
6
+ Requires-Dist: fastapi>=0.111
7
+ Requires-Dist: pydantic>=2.7
8
+ Requires-Dist: typer>=0.12
9
+ Requires-Dist: uvicorn>=0.30
10
+ Requires-Dist: watchfiles>=0.22
11
+ Provides-Extra: dev
12
+ Requires-Dist: httpx>=0.27; extra == 'dev'
13
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
14
+ Requires-Dist: pytest>=8.2; extra == 'dev'
15
+ Description-Content-Type: text/markdown
16
+
17
+ # Trace
18
+
19
+ Trace is local version control for AI agent behavior. It captures structured agent runs, compares behavior across runs, and restores the configuration that produced a trusted run.
20
+
21
+ ```python
22
+ import trace_sdk as trace
23
+
24
+ @trace.track(agent="planner", config={"model": "mock-v1"})
25
+ def run_agent(request):
26
+ tools = trace.wrap_tools({"lookup": lookup})
27
+ result = tools["lookup"](request)
28
+ trace.decision("lookup_done", status=result["status"])
29
+ return result
30
+ ```
31
+
32
+ ## Commands
33
+
34
+ ```bash
35
+ trace log
36
+ trace show <id>
37
+ trace diff <id_a> <id_b>
38
+ trace diff <id_a> <id_b> --json
39
+ trace revert <id>
40
+ trace revert <id> --agent budget_agent
41
+ trace serve
42
+ trace clear
43
+ ```
44
+
45
+ Runs are stored in `.trace/runs/`. Config restored by `trace revert` is stored in `.trace/config.json`.
46
+
47
+ ## Development
48
+
49
+ ```bash
50
+ uv sync --extra dev
51
+ uv run pytest
52
+ uv run python examples/demo_agent.py --variant stable
53
+ uv run python examples/demo_agent.py --variant drift
54
+ uv run trace log
55
+ ```
56
+
57
+ TraceHub lives in `tracehub/`. Build it with `npm install && npm run build`; `trace serve` serves the built assets from `tracehub/dist`.
@@ -0,0 +1,41 @@
1
+ # Trace
2
+
3
+ Trace is local version control for AI agent behavior. It captures structured agent runs, compares behavior across runs, and restores the configuration that produced a trusted run.
4
+
5
+ ```python
6
+ import trace_sdk as trace
7
+
8
+ @trace.track(agent="planner", config={"model": "mock-v1"})
9
+ def run_agent(request):
10
+ tools = trace.wrap_tools({"lookup": lookup})
11
+ result = tools["lookup"](request)
12
+ trace.decision("lookup_done", status=result["status"])
13
+ return result
14
+ ```
15
+
16
+ ## Commands
17
+
18
+ ```bash
19
+ trace log
20
+ trace show <id>
21
+ trace diff <id_a> <id_b>
22
+ trace diff <id_a> <id_b> --json
23
+ trace revert <id>
24
+ trace revert <id> --agent budget_agent
25
+ trace serve
26
+ trace clear
27
+ ```
28
+
29
+ Runs are stored in `.trace/runs/`. Config restored by `trace revert` is stored in `.trace/config.json`.
30
+
31
+ ## Development
32
+
33
+ ```bash
34
+ uv sync --extra dev
35
+ uv run pytest
36
+ uv run python examples/demo_agent.py --variant stable
37
+ uv run python examples/demo_agent.py --variant drift
38
+ uv run trace log
39
+ ```
40
+
41
+ TraceHub lives in `tracehub/`. Build it with `npm install && npm run build`; `trace serve` serves the built assets from `tracehub/dist`.
@@ -0,0 +1,106 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from dataclasses import dataclass
5
+
6
+ import trace_sdk as trace
7
+
8
+
9
+ @dataclass
10
+ class TripRequest:
11
+ destination: str
12
+ days: int
13
+ total_budget: int
14
+ trip_type: str
15
+
16
+
17
+ def estimate_costs(destination: str, days: int, variant: str):
18
+ base = 32000 if variant == "stable" else 54000
19
+ return {"minimum_viable": base, "destination": destination, "days": days}
20
+
21
+
22
+ def split_budget(total: int, strategy: str):
23
+ activities = int(total * (0.40 if strategy == "balanced" else 0.16))
24
+ return {"activities": activities, "stay": total - activities, "strategy": strategy}
25
+
26
+
27
+ def get_points_of_interest(destination: str):
28
+ return [{"id": index, "name": f"{destination} POI {index}", "cost": 1000 + index * 250} for index in range(1, 16)]
29
+
30
+
31
+ def select_pois_by_trip_type(pois, trip_type: str, poi_budget: int):
32
+ limit = 12 if poi_budget >= 18000 else 4
33
+ return pois[:limit]
34
+
35
+
36
+ def schedule_day_wise_itinerary(pois, days: int):
37
+ return {"days": days, "pois": len(pois)}
38
+
39
+
40
+ class BudgetAgent:
41
+ def __init__(self, variant: str):
42
+ self.variant = variant
43
+ cfg = trace.load_config("budget_agent")
44
+ self.model = cfg.get("model", f"mock-budget-{variant}")
45
+ self.tools = trace.wrap_tools({
46
+ "estimate_costs": lambda destination, days: estimate_costs(destination, days, variant),
47
+ "split_budget": split_budget,
48
+ })
49
+
50
+ @trace.track(agent="budget_agent", config=lambda self, *_args, **_kwargs: {"model": f"mock-budget-{self.variant}", "prompt_id": f"budget-{self.variant}"})
51
+ def run(self, trip_request: TripRequest):
52
+ costs = self.tools["estimate_costs"](destination=trip_request.destination, days=trip_request.days)
53
+ if trip_request.total_budget < costs["minimum_viable"]:
54
+ trace.decision("budget_too_low", requested=trip_request.total_budget, minimum=costs["minimum_viable"])
55
+ strategy = "shoestring"
56
+ else:
57
+ trace.decision("budget_comfortable", surplus=trip_request.total_budget - costs["minimum_viable"])
58
+ strategy = "balanced"
59
+ allocation = self.tools["split_budget"](total=trip_request.total_budget, strategy=strategy)
60
+ return {"allocation": allocation, "strategy": strategy}
61
+
62
+
63
+ class PlannerAgent:
64
+ def __init__(self):
65
+ self.tools = trace.wrap_tools({
66
+ "get_points_of_interest": get_points_of_interest,
67
+ "select_pois_by_trip_type": select_pois_by_trip_type,
68
+ "schedule_day_wise_itinerary": schedule_day_wise_itinerary,
69
+ })
70
+
71
+ @trace.track(agent="planner_agent", config={"model": "mock-planner", "prompt_id": "planner-v1"})
72
+ def run(self, trip_request: TripRequest, allocation):
73
+ pois = self.tools["get_points_of_interest"](destination=trip_request.destination)
74
+ selected = self.tools["select_pois_by_trip_type"](
75
+ pois=pois,
76
+ trip_type=trip_request.trip_type,
77
+ poi_budget=allocation["activities"],
78
+ )
79
+ itinerary = self.tools["schedule_day_wise_itinerary"](pois=selected, days=trip_request.days)
80
+ trace.decision("itinerary_built", pois=len(selected), days=trip_request.days)
81
+ return {"itinerary": itinerary}
82
+
83
+
84
+ class TripOrchestrator:
85
+ def __init__(self, variant: str):
86
+ self.budget_agent = BudgetAgent(variant)
87
+ self.planner_agent = PlannerAgent()
88
+
89
+ @trace.track(agent="orchestrator", config={"pipeline": "budget->planner"})
90
+ def run(self, trip_request: TripRequest):
91
+ budget = self.budget_agent.run(trip_request)
92
+ plan = self.planner_agent.run(trip_request, budget["allocation"])
93
+ return {"strategy": budget["strategy"], "itinerary": plan["itinerary"]}
94
+
95
+
96
+ def main():
97
+ parser = argparse.ArgumentParser()
98
+ parser.add_argument("--variant", choices=["stable", "drift"], default="stable")
99
+ args = parser.parse_args()
100
+ request = TripRequest(destination="Tokyo", days=4, total_budget=50000, trip_type="culture")
101
+ result = TripOrchestrator(args.variant).run(request)
102
+ print(result)
103
+
104
+
105
+ if __name__ == "__main__":
106
+ main()
@@ -0,0 +1,21 @@
1
+ """Sketch for wiring Trace into the separate trip planner codebase.
2
+
3
+ This file intentionally does not import the real planner. Copy the pattern into
4
+ that repo after installing this package locally.
5
+ """
6
+
7
+ import trace_sdk as trace
8
+
9
+
10
+ class PlannerTraceAdapter:
11
+ def __init__(self, planner):
12
+ self.planner = planner
13
+ self.planner.llm = trace.wrap(self.planner.llm)
14
+ self.planner.tools = trace.wrap_tools(self.planner.tools)
15
+
16
+ @trace.track(agent="planner_agent", config={"model": "gemini-2.5-flash", "prompt_id": "planner-v1"})
17
+ def run(self, trip_request, allocation=None):
18
+ config = trace.load_config("planner_agent")
19
+ if config:
20
+ self.planner.apply_config(config)
21
+ return self.planner.run(trip_request, allocation)
@@ -0,0 +1,46 @@
1
+ [project]
2
+ name = "trace-sdk"
3
+ version = "0.1.1"
4
+ description = "Local version control for AI agent behavior."
5
+ readme = "README.md"
6
+ requires-python = ">=3.9"
7
+ dependencies = [
8
+ "fastapi>=0.111",
9
+ "pydantic>=2.7",
10
+ "typer>=0.12",
11
+ "uvicorn>=0.30",
12
+ "watchfiles>=0.22",
13
+ ]
14
+
15
+ [project.scripts]
16
+ trace = "trace_sdk.cli:app"
17
+
18
+ [project.optional-dependencies]
19
+ dev = [
20
+ "httpx>=0.27",
21
+ "pytest>=8.2",
22
+ "pytest-asyncio>=0.23",
23
+ ]
24
+
25
+ [build-system]
26
+ requires = ["hatchling"]
27
+ build-backend = "hatchling.build"
28
+
29
+ [tool.hatch.build.targets.wheel]
30
+ packages = ["trace_sdk"]
31
+
32
+ [tool.hatch.build.targets.sdist]
33
+ include = [
34
+ "/README.md",
35
+ "/pyproject.toml",
36
+ "/examples",
37
+ "/trace_sdk",
38
+ ]
39
+ exclude = [
40
+ "**/__pycache__",
41
+ "*.py[cod]",
42
+ ]
43
+
44
+ [tool.pytest.ini_options]
45
+ testpaths = ["tests"]
46
+ asyncio_mode = "auto"
@@ -0,0 +1,13 @@
1
+ from .capture import decision, span, wrap, wrap_tools
2
+ from .context import TraceError, track
3
+ from .store import load_config
4
+
5
+ __all__ = [
6
+ "TraceError",
7
+ "decision",
8
+ "load_config",
9
+ "span",
10
+ "track",
11
+ "wrap",
12
+ "wrap_tools",
13
+ ]
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import hashlib
5
+ import inspect
6
+ from contextlib import contextmanager
7
+ from typing import Any, Callable
8
+
9
+ from .context import append_step, normalize_config, push_span
10
+
11
+
12
+ def summarize(value: Any, max_string: int = 120) -> Any:
13
+ try:
14
+ if value is None or isinstance(value, (bool, int, float)):
15
+ return value
16
+ if isinstance(value, str):
17
+ digest = hashlib.sha256(value.encode("utf-8")).hexdigest()[:10]
18
+ return {"type": "str", "len": len(value), "hash": digest, "preview": value[:max_string]}
19
+ if isinstance(value, (list, tuple, set)):
20
+ items = list(value)
21
+ return {"type": type(value).__name__, "count": len(items), "sample": [summarize(item) for item in items[:3]]}
22
+ if isinstance(value, dict):
23
+ notable = {}
24
+ for key in ("status", "id", "name", "model", "prompt_id", "strategy", "selected", "count", "minimum_viable", "activities", "days", "total", "budget"):
25
+ if key in value:
26
+ notable[key] = summarize(value[key])
27
+ return {"type": "dict", "keys": sorted(map(str, value.keys())), **notable}
28
+ if hasattr(value, "model_dump"):
29
+ return summarize(value.model_dump())
30
+ if hasattr(value, "__dict__"):
31
+ public = {k: v for k, v in vars(value).items() if not k.startswith("_")}
32
+ if public:
33
+ return {"type": type(value).__name__, **summarize(public)}
34
+ text = repr(value)
35
+ return summarize(text, max_string=max_string)
36
+ except Exception:
37
+ return {"type": type(value).__name__}
38
+
39
+
40
+ def decision(label: str, **meta: Any) -> None:
41
+ try:
42
+ append_step("decision", label, meta={key: summarize(value) for key, value in meta.items()})
43
+ except Exception:
44
+ return
45
+
46
+
47
+ @contextmanager
48
+ def span(name: str):
49
+ with push_span(name):
50
+ yield
51
+
52
+
53
+ def wrap_tools(tools_dict: dict[str, Callable]) -> dict[str, Callable]:
54
+ wrapped = {}
55
+ for name, func in tools_dict.items():
56
+ if inspect.iscoroutinefunction(func):
57
+
58
+ @functools.wraps(func)
59
+ async def async_tool(*args, __func=func, __name=name, **kwargs):
60
+ result = await __func(*args, **kwargs)
61
+ append_step("tool_call", __name, input=summarize({"args": args, "kwargs": kwargs}), output=summarize(result))
62
+ return result
63
+
64
+ wrapped[name] = async_tool
65
+ else:
66
+
67
+ @functools.wraps(func)
68
+ def sync_tool(*args, __func=func, __name=name, **kwargs):
69
+ result = __func(*args, **kwargs)
70
+ append_step("tool_call", __name, input=summarize({"args": args, "kwargs": kwargs}), output=summarize(result))
71
+ return result
72
+
73
+ wrapped[name] = sync_tool
74
+ return wrapped
75
+
76
+
77
+ class ClientProxy:
78
+ def __init__(self, client: Any):
79
+ self._trace_client = client
80
+
81
+ def __getattr__(self, attr: str) -> Any:
82
+ target = getattr(self._trace_client, attr)
83
+ if not callable(target):
84
+ return target
85
+ if inspect.iscoroutinefunction(target):
86
+
87
+ @functools.wraps(target)
88
+ async def async_call(*args, **kwargs):
89
+ result = await target(*args, **kwargs)
90
+ append_step("model_call", _model_name(self._trace_client, attr), input=summarize({"args": args, "kwargs": kwargs}), output=summarize(result))
91
+ return result
92
+
93
+ return async_call
94
+
95
+ @functools.wraps(target)
96
+ def sync_call(*args, **kwargs):
97
+ result = target(*args, **kwargs)
98
+ append_step("model_call", _model_name(self._trace_client, attr), input=summarize({"args": args, "kwargs": kwargs}), output=summarize(result))
99
+ return result
100
+
101
+ return sync_call
102
+
103
+ def __call__(self, *args, **kwargs):
104
+ result = self._trace_client(*args, **kwargs)
105
+ append_step("model_call", _model_name(self._trace_client, "__call__"), input=summarize({"args": args, "kwargs": kwargs}), output=summarize(result))
106
+ return result
107
+
108
+
109
+ def _model_name(client: Any, method: str) -> str:
110
+ for attr in ("model", "model_name", "name"):
111
+ value = getattr(client, attr, None)
112
+ if value:
113
+ return str(value)
114
+ return f"{type(client).__name__}.{method}"
115
+
116
+
117
+ def wrap(client: Any) -> Any:
118
+ return ClientProxy(client)
@@ -0,0 +1,80 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from typing import Optional
5
+
6
+ import typer
7
+
8
+ from .diff import diff_runs, render_diff
9
+ from .store import clear_runs, list_runs, read_run, write_config_for_run
10
+
11
+ app = typer.Typer(no_args_is_help=True)
12
+
13
+
14
+ @app.command()
15
+ def log() -> None:
16
+ """List captured runs."""
17
+ typer.echo(f"{'ID':<8} {'AGENT':<16} {'TIME':<20} {'MODEL':<18} {'STATUS':<8} STEPS")
18
+ for run in list_runs():
19
+ model = run.config.model or run.config.extra.get("model") or run.config.extra.get("pipeline") or "-"
20
+ time = run.timestamp.strftime("%Y-%m-%d %H:%M:%S")
21
+ typer.echo(f"{run.id:<8} {(run.agent or '-'):<16} {time:<20} {str(model):<18} {run.status:<8} {len(run.steps)}")
22
+
23
+
24
+ @app.command()
25
+ def show(run_id: str) -> None:
26
+ """Pretty-print a run."""
27
+ run = read_run(run_id)
28
+ typer.echo(f"Run {run.id} agent={run.agent} status={run.status}")
29
+ typer.echo(f"started={run.started_at} ended={run.ended_at}")
30
+ current = object()
31
+ for step in run.steps:
32
+ span = step.span or "root"
33
+ if span != current:
34
+ typer.echo(f"span {span}")
35
+ current = span
36
+ detail = step.output if step.output is not None else step.meta
37
+ typer.echo(f" {step.index:02d} {step.type:<10} {step.name:<28} {detail}")
38
+ typer.echo(f"output: {run.output}")
39
+
40
+
41
+ @app.command()
42
+ def diff(run_a: str, run_b: str, json_output: bool = typer.Option(False, "--json", help="Emit machine-readable JSON.")) -> None:
43
+ """Diff two runs behaviorally."""
44
+ result = diff_runs(read_run(run_a), read_run(run_b))
45
+ if json_output:
46
+ typer.echo(json.dumps(result, indent=2, sort_keys=True, default=str))
47
+ else:
48
+ typer.echo(render_diff(result))
49
+
50
+
51
+ @app.command()
52
+ def revert(run_id: str, agent: Optional[str] = typer.Option(None, "--agent", help="Restore only one logical agent.")) -> None:
53
+ """Restore config from a run."""
54
+ run = read_run(run_id)
55
+ try:
56
+ restored = write_config_for_run(run, agent=agent)
57
+ except KeyError as exc:
58
+ raise typer.BadParameter(str(exc)) from exc
59
+ names = ", ".join(restored)
60
+ typer.echo(f"Restored configuration for {names} from run {run.id}.")
61
+ typer.echo(
62
+ f"Restored the configuration (model + prompt + tools) for `{names}` from run `{run.id}`. "
63
+ "This restores the setup that produced that behavior - it does not guarantee identical behavior "
64
+ "if the model provider or tool data has changed upstream."
65
+ )
66
+
67
+
68
+ @app.command()
69
+ def clear() -> None:
70
+ """Wipe captured runs."""
71
+ clear_runs()
72
+ typer.echo("Cleared .trace/runs.")
73
+
74
+
75
+ @app.command()
76
+ def serve(host: str = "127.0.0.1", port: int = 7000) -> None:
77
+ """Launch TraceHub."""
78
+ import uvicorn
79
+
80
+ uvicorn.run("trace_sdk.web.server:app", host=host, port=port, reload=False)
@@ -0,0 +1,217 @@
1
+ from __future__ import annotations
2
+
3
+ import contextvars
4
+ import functools
5
+ import hashlib
6
+ import inspect
7
+ import json
8
+ import uuid
9
+ from contextlib import contextmanager
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime, timezone
12
+ from typing import Any, Callable, Optional, Union
13
+
14
+ from .schema import Config, Run, Step
15
+ from .store import write_head, write_run
16
+
17
+
18
+ class TraceError(RuntimeError):
19
+ pass
20
+
21
+
22
+ @dataclass
23
+ class AgentRegistration:
24
+ config_fingerprint: str
25
+ call_site: str
26
+ count: int = 0
27
+
28
+
29
+ @dataclass
30
+ class RunState:
31
+ run: Run
32
+ agent_registry: dict[str, AgentRegistration] = field(default_factory=dict)
33
+
34
+
35
+ _current_state: contextvars.ContextVar[Optional[RunState]] = contextvars.ContextVar("trace_state", default=None)
36
+ _span_stack: contextvars.ContextVar[tuple[str, ...]] = contextvars.ContextVar("trace_span_stack", default=())
37
+
38
+
39
+ def current_state() -> Optional[RunState]:
40
+ return _current_state.get()
41
+
42
+
43
+ def current_span() -> Optional[str]:
44
+ stack = _span_stack.get()
45
+ return "/".join(stack) if stack else None
46
+
47
+
48
+ def normalize_config(config: Optional[Union[dict[str, Any], Config]]) -> Config:
49
+ if isinstance(config, Config):
50
+ return config
51
+ if config is None:
52
+ return Config()
53
+ known = {key: config[key] for key in ("model", "prompt_id", "tools") if key in config}
54
+ extra = {key: value for key, value in config.items() if key not in known}
55
+ if "extra" in config and isinstance(config["extra"], dict):
56
+ extra.update(config["extra"])
57
+ return Config(**known, extra=extra)
58
+
59
+
60
+ def _fingerprint(config: Config) -> str:
61
+ payload = json.dumps(config.model_dump(mode="json"), sort_keys=True, default=str)
62
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
63
+
64
+
65
+ def register_agent(name: str, config: Config, call_site: str) -> str:
66
+ state = current_state()
67
+ if state is None:
68
+ return name
69
+ fingerprint = _fingerprint(config)
70
+ existing = state.agent_registry.get(name)
71
+ if existing is not None:
72
+ if existing.config_fingerprint != fingerprint or existing.call_site != call_site:
73
+ raise TraceError(
74
+ f'duplicate agent name "{name}": two different agents share it. '
75
+ "Agent names must be unique within a run so revert can target them. "
76
+ f'Rename one (e.g. "{name}_a" / "{name}_b").'
77
+ )
78
+ existing.count += 1
79
+ return f"{name}#{existing.count}"
80
+
81
+ state.agent_registry[name] = AgentRegistration(fingerprint, call_site, count=1)
82
+ state.run.span_configs.setdefault(name, config)
83
+ return f"{name}#1"
84
+
85
+
86
+ def append_step(step_type: str, name: str, input: Any = None, output: Any = None, meta: Optional[dict[str, Any]] = None) -> None:
87
+ state = current_state()
88
+ if state is None:
89
+ return
90
+ try:
91
+ span = current_span()
92
+ step = Step(
93
+ index=len(state.run.steps),
94
+ type=step_type,
95
+ name=name,
96
+ input=input,
97
+ output=output,
98
+ span=span,
99
+ meta=meta or {},
100
+ )
101
+ state.run.steps.append(step)
102
+ write_run(state.run)
103
+ except Exception:
104
+ return
105
+
106
+
107
+ @contextmanager
108
+ def push_span(name: str, config: Optional[Config] = None, call_site: Optional[str] = None):
109
+ token = None
110
+ try:
111
+ span_name = name
112
+ if config is not None:
113
+ span_name = register_agent(name, config, call_site or name)
114
+ stack = _span_stack.get()
115
+ token = _span_stack.set((*stack, span_name))
116
+ yield
117
+ finally:
118
+ if token is not None:
119
+ _span_stack.reset(token)
120
+
121
+
122
+ def _new_run(agent: Optional[str], config: Config, input_value: Any) -> Run:
123
+ now = datetime.now(timezone.utc)
124
+ return Run(
125
+ id=uuid.uuid4().hex[:6],
126
+ timestamp=now,
127
+ agent=agent,
128
+ config=config,
129
+ input=input_value,
130
+ started_at=now,
131
+ )
132
+
133
+
134
+ def track(agent: Optional[str] = None, config: Optional[Union[dict[str, Any], Config, Callable[..., Any]]] = None):
135
+
136
+ def decorator(fn: Callable):
137
+ call_site = f"{fn.__module__}.{fn.__qualname__}"
138
+
139
+ def summarize_call(args: tuple[Any, ...], kwargs: dict[str, Any]) -> Any:
140
+ from .capture import summarize
141
+
142
+ return summarize({"args": args, "kwargs": kwargs})
143
+
144
+ def resolve_config(args: tuple[Any, ...], kwargs: dict[str, Any]) -> Config:
145
+ if callable(config):
146
+ return normalize_config(config(*args, **kwargs))
147
+ return normalize_config(config)
148
+
149
+ async def run_async(*args, **kwargs):
150
+ config_model = resolve_config(args, kwargs)
151
+ state = current_state()
152
+ if state is not None:
153
+ name = agent or fn.__qualname__
154
+ with push_span(name, config_model, call_site):
155
+ return await fn(*args, **kwargs)
156
+
157
+ run = _new_run(agent, config_model, summarize_call(args, kwargs))
158
+ state = RunState(run=run)
159
+ state.agent_registry[run.agent or "orchestrator"] = AgentRegistration(_fingerprint(config_model), call_site, 1)
160
+ state_token = _current_state.set(state)
161
+ span_token = _span_stack.set(())
162
+ try:
163
+ write_run(run)
164
+ result = await fn(*args, **kwargs)
165
+ from .capture import summarize
166
+
167
+ run.output = summarize(result)
168
+ run.status = "success"
169
+ return result
170
+ except Exception as exc:
171
+ run.status = "error"
172
+ run.error = str(exc)
173
+ raise
174
+ finally:
175
+ run.ended_at = datetime.now(timezone.utc)
176
+ write_run(run)
177
+ write_head(run.id)
178
+ _span_stack.reset(span_token)
179
+ _current_state.reset(state_token)
180
+
181
+ def run_sync(*args, **kwargs):
182
+ config_model = resolve_config(args, kwargs)
183
+ state = current_state()
184
+ if state is not None:
185
+ name = agent or fn.__qualname__
186
+ with push_span(name, config_model, call_site):
187
+ return fn(*args, **kwargs)
188
+
189
+ run = _new_run(agent, config_model, summarize_call(args, kwargs))
190
+ state = RunState(run=run)
191
+ state.agent_registry[run.agent or "orchestrator"] = AgentRegistration(_fingerprint(config_model), call_site, 1)
192
+ state_token = _current_state.set(state)
193
+ span_token = _span_stack.set(())
194
+ try:
195
+ write_run(run)
196
+ result = fn(*args, **kwargs)
197
+ from .capture import summarize
198
+
199
+ run.output = summarize(result)
200
+ run.status = "success"
201
+ return result
202
+ except Exception as exc:
203
+ run.status = "error"
204
+ run.error = str(exc)
205
+ raise
206
+ finally:
207
+ run.ended_at = datetime.now(timezone.utc)
208
+ write_run(run)
209
+ write_head(run.id)
210
+ _span_stack.reset(span_token)
211
+ _current_state.reset(state_token)
212
+
213
+ if inspect.iscoroutinefunction(fn):
214
+ return functools.wraps(fn)(run_async)
215
+ return functools.wraps(fn)(run_sync)
216
+
217
+ return decorator
@@ -0,0 +1,168 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter, defaultdict
4
+ from difflib import SequenceMatcher
5
+ from typing import Any, Optional
6
+
7
+ from .schema import Config, Run, Step
8
+
9
+
10
+ def diff_runs(a: Run, b: Run) -> dict[str, Any]:
11
+ config_changes = _diff_configs(a, b)
12
+ span_diffs = _diff_spans(a, b)
13
+ outcome = _diff_outcome(a, b)
14
+ root = _root_cause(span_diffs)
15
+ return {
16
+ "a": a.id,
17
+ "b": b.id,
18
+ "config": config_changes,
19
+ "spans": span_diffs,
20
+ "outcome": outcome,
21
+ "root_cause": root,
22
+ }
23
+
24
+
25
+ def render_diff(diff: dict[str, Any]) -> str:
26
+ lines = [f"diff {diff['a']} -> {diff['b']}"]
27
+ lines.append("config:")
28
+ if diff["config"]:
29
+ for item in diff["config"]:
30
+ lines.append(f" ~ {item['path']} {item['a']} -> {item['b']}")
31
+ else:
32
+ lines.append(" = unchanged")
33
+
34
+ lines.append("behavior:")
35
+ if diff["spans"]:
36
+ for span in diff["spans"]:
37
+ lines.append(f" span {span['span']}:")
38
+ if span["changes"]:
39
+ for change in span["changes"]:
40
+ lines.append(f" {change['kind']} {change['detail']}")
41
+ else:
42
+ lines.append(" = control flow unchanged")
43
+ else:
44
+ lines.append(" = unchanged")
45
+
46
+ lines.append("outcome:")
47
+ if diff["outcome"]:
48
+ for item in diff["outcome"]:
49
+ lines.append(f" ~ {item['path']} {item['a']} -> {item['b']}")
50
+ else:
51
+ lines.append(" = unchanged")
52
+ if diff.get("root_cause"):
53
+ lines.append(f"root cause: {diff['root_cause']}")
54
+ return "\n".join(lines)
55
+
56
+
57
+ def _config_dump(config: Config) -> dict[str, Any]:
58
+ return config.model_dump(mode="json")
59
+
60
+
61
+ def _diff_configs(a: Run, b: Run) -> list[dict[str, Any]]:
62
+ changes = []
63
+ names = sorted({a.agent or "orchestrator", b.agent or "orchestrator", *a.span_configs, *b.span_configs})
64
+ configs_a = {a.agent or "orchestrator": a.config, **a.span_configs}
65
+ configs_b = {b.agent or "orchestrator": b.config, **b.span_configs}
66
+ for name in names:
67
+ ca = _config_dump(configs_a.get(name, Config()))
68
+ cb = _config_dump(configs_b.get(name, Config()))
69
+ for key in ("model", "prompt_id", "tools", "extra"):
70
+ if ca.get(key) != cb.get(key):
71
+ changes.append({"path": f"{name}.{key}", "a": ca.get(key), "b": cb.get(key)})
72
+ return changes
73
+
74
+
75
+ def _logical_span(span: Optional[str]) -> str:
76
+ if not span:
77
+ return "root"
78
+ parts = []
79
+ for part in span.split("/"):
80
+ parts.append(part.split("#", 1)[0])
81
+ return "/".join(parts)
82
+
83
+
84
+ def _by_span(steps: list[Step]) -> dict[str, list[Step]]:
85
+ grouped: dict[str, list[Step]] = defaultdict(list)
86
+ for step in steps:
87
+ grouped[_logical_span(step.span)].append(step)
88
+ return grouped
89
+
90
+
91
+ def _token(step: Step) -> str:
92
+ return f"{step.type}:{step.name}"
93
+
94
+
95
+ def _diff_spans(a: Run, b: Run) -> list[dict[str, Any]]:
96
+ grouped_a = _by_span(a.steps)
97
+ grouped_b = _by_span(b.steps)
98
+ spans = sorted(set(grouped_a) | set(grouped_b), key=lambda name: _first_index(name, grouped_a, grouped_b))
99
+ result = []
100
+ for span in spans:
101
+ steps_a = grouped_a.get(span, [])
102
+ steps_b = grouped_b.get(span, [])
103
+ tokens_a = [_token(step) for step in steps_a]
104
+ tokens_b = [_token(step) for step in steps_b]
105
+ changes = []
106
+
107
+ counts_a = Counter(tokens_a)
108
+ counts_b = Counter(tokens_b)
109
+ for token in sorted(set(counts_a) | set(counts_b)):
110
+ if counts_a[token] != counts_b[token]:
111
+ changes.append({"kind": "~", "detail": f"{token} count {counts_a[token]} -> {counts_b[token]}"})
112
+
113
+ matcher = SequenceMatcher(a=tokens_a, b=tokens_b)
114
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():
115
+ if tag == "equal":
116
+ continue
117
+ if tag == "delete":
118
+ changes.append({"kind": "-", "detail": ", ".join(tokens_a[i1:i2])})
119
+ elif tag == "insert":
120
+ changes.append({"kind": "+", "detail": ", ".join(tokens_b[j1:j2])})
121
+ else:
122
+ changes.append({"kind": "~", "detail": f"{tokens_a[i1:i2]} -> {tokens_b[j1:j2]}"})
123
+
124
+ for left, right in zip(steps_a, steps_b):
125
+ if _token(left) == _token(right) and left.output != right.output:
126
+ changes.append({"kind": "~", "detail": f"{_token(left)} output {left.output} -> {right.output}"})
127
+ if left.type == "decision" and right.type == "decision" and left.meta != right.meta:
128
+ changes.append({"kind": "~", "detail": f"decision:{left.name} meta {left.meta} -> {right.meta}"})
129
+
130
+ if changes:
131
+ result.append({"span": span, "changes": _dedupe(changes), "input_changed": _span_input(steps_a) != _span_input(steps_b)})
132
+ return result
133
+
134
+
135
+ def _first_index(name: str, grouped_a: dict[str, list[Step]], grouped_b: dict[str, list[Step]]) -> int:
136
+ indexes = [steps[0].index for steps in (grouped_a.get(name, []), grouped_b.get(name, [])) if steps]
137
+ return min(indexes) if indexes else 10**9
138
+
139
+
140
+ def _span_input(steps: list[Step]) -> Any:
141
+ return steps[0].input if steps else None
142
+
143
+
144
+ def _dedupe(changes: list[dict[str, Any]]) -> list[dict[str, Any]]:
145
+ seen = set()
146
+ result = []
147
+ for change in changes:
148
+ key = (change["kind"], str(change["detail"]))
149
+ if key not in seen:
150
+ seen.add(key)
151
+ result.append(change)
152
+ return result
153
+
154
+
155
+ def _diff_outcome(a: Run, b: Run) -> list[dict[str, Any]]:
156
+ changes = []
157
+ if a.status != b.status:
158
+ changes.append({"path": "status", "a": a.status, "b": b.status})
159
+ if a.output != b.output:
160
+ changes.append({"path": "output", "a": a.output, "b": b.output})
161
+ return changes
162
+
163
+
164
+ def _root_cause(span_diffs: list[dict[str, Any]]) -> Optional[str]:
165
+ for item in span_diffs:
166
+ if not item.get("input_changed"):
167
+ return item["span"]
168
+ return span_diffs[0]["span"] if span_diffs else None
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Any, Literal, Optional
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class Step(BaseModel):
10
+ index: int
11
+ type: Literal["model_call", "tool_call", "decision"]
12
+ name: str
13
+ input: Any = None
14
+ output: Any = None
15
+ span: Optional[str] = None
16
+ meta: dict[str, Any] = Field(default_factory=dict)
17
+
18
+
19
+ class Config(BaseModel):
20
+ model: Optional[str] = None
21
+ prompt_id: Optional[str] = None
22
+ tools: list[str] = Field(default_factory=list)
23
+ extra: dict[str, Any] = Field(default_factory=dict)
24
+
25
+
26
+ class Run(BaseModel):
27
+ id: str
28
+ timestamp: datetime
29
+ agent: Optional[str] = None
30
+ config: Config
31
+ span_configs: dict[str, Config] = Field(default_factory=dict)
32
+ input: Any = None
33
+ steps: list[Step] = Field(default_factory=list)
34
+ output: Any = None
35
+ status: Literal["success", "error", "running"] = "running"
36
+ error: Optional[str] = None
37
+ started_at: Optional[datetime] = None
38
+ ended_at: Optional[datetime] = None
@@ -0,0 +1,113 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import shutil
5
+ from pathlib import Path
6
+ from typing import Any, Optional
7
+
8
+ from .schema import Config, Run
9
+
10
+
11
+ def trace_root() -> Path:
12
+ return Path.cwd() / ".trace"
13
+
14
+
15
+ def runs_dir() -> Path:
16
+ return trace_root() / "runs"
17
+
18
+
19
+ def ensure_trace_dirs() -> None:
20
+ runs_dir().mkdir(parents=True, exist_ok=True)
21
+
22
+
23
+ def run_path(run_id: str) -> Path:
24
+ return runs_dir() / f"{run_id}.json"
25
+
26
+
27
+ def write_run(run: Run) -> None:
28
+ ensure_trace_dirs()
29
+ payload = run.model_dump(mode="json")
30
+ run_path(run.id).write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
31
+
32
+
33
+ def write_head(run_id: str) -> None:
34
+ ensure_trace_dirs()
35
+ (trace_root() / "HEAD").write_text(run_id, encoding="utf-8")
36
+
37
+
38
+ def read_run(run_id: str) -> Run:
39
+ path = run_path(run_id)
40
+ if not path.exists():
41
+ matches = list(runs_dir().glob(f"{run_id}*.json")) if runs_dir().exists() else []
42
+ if len(matches) == 1:
43
+ path = matches[0]
44
+ return Run.model_validate_json(path.read_text(encoding="utf-8"))
45
+
46
+
47
+ def list_runs() -> list[Run]:
48
+ if not runs_dir().exists():
49
+ return []
50
+ runs: list[Run] = []
51
+ for path in runs_dir().glob("*.json"):
52
+ try:
53
+ runs.append(Run.model_validate_json(path.read_text(encoding="utf-8")))
54
+ except Exception:
55
+ continue
56
+ return sorted(runs, key=lambda run: run.timestamp, reverse=True)
57
+
58
+
59
+ def config_path() -> Path:
60
+ return trace_root() / "config.json"
61
+
62
+
63
+ def _config_to_block(config: Config) -> dict[str, Any]:
64
+ block = config.model_dump(mode="json", exclude_none=True)
65
+ if not block.get("tools"):
66
+ block.pop("tools", None)
67
+ if not block.get("extra"):
68
+ block.pop("extra", None)
69
+ return block
70
+
71
+
72
+ def write_config_for_run(run: Run, agent: Optional[str] = None) -> list[str]:
73
+ ensure_trace_dirs()
74
+ existing = load_config() if config_path().exists() else {}
75
+ if not isinstance(existing, dict):
76
+ existing = {}
77
+
78
+ orchestrator_name = run.agent or "orchestrator"
79
+ available = {orchestrator_name: run.config, **run.span_configs}
80
+
81
+ if agent is not None:
82
+ if agent not in available:
83
+ names = ", ".join(sorted(available))
84
+ raise KeyError(f'unknown agent "{agent}". Available agents: {names}')
85
+ existing[agent] = _config_to_block(available[agent])
86
+ restored = [agent]
87
+ else:
88
+ existing[orchestrator_name] = _config_to_block(run.config)
89
+ for name, config in run.span_configs.items():
90
+ existing[name] = _config_to_block(config)
91
+ restored = [orchestrator_name, *sorted(run.span_configs)]
92
+
93
+ config_path().write_text(json.dumps(existing, indent=2, sort_keys=True), encoding="utf-8")
94
+ return restored
95
+
96
+
97
+ def load_config(span: Optional[str] = None) -> Any:
98
+ path = config_path()
99
+ if not path.exists():
100
+ return {} if span is None else {}
101
+ data = json.loads(path.read_text(encoding="utf-8"))
102
+ if span is None:
103
+ return data
104
+ return data.get(span, {})
105
+
106
+
107
+ def clear_runs() -> None:
108
+ if runs_dir().exists():
109
+ shutil.rmtree(runs_dir())
110
+ runs_dir().mkdir(parents=True, exist_ok=True)
111
+ head = trace_root() / "HEAD"
112
+ if head.exists():
113
+ head.unlink()
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,91 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import json
5
+ from pathlib import Path
6
+ from typing import AsyncIterator
7
+
8
+ from fastapi import FastAPI, HTTPException
9
+ from fastapi.responses import HTMLResponse, StreamingResponse
10
+ from fastapi.staticfiles import StaticFiles
11
+ from watchfiles import awatch
12
+
13
+ from trace_sdk.diff import diff_runs
14
+ from trace_sdk.store import list_runs, read_run, runs_dir
15
+
16
+ app = FastAPI(title="TraceHub", version="0.1.0")
17
+
18
+ DIST = Path(__file__).resolve().parents[2] / "tracehub" / "dist"
19
+ if DIST.exists():
20
+ app.mount("/assets", StaticFiles(directory=DIST / "assets"), name="assets")
21
+
22
+
23
+ def _summary(run):
24
+ duration = None
25
+ if run.started_at and run.ended_at:
26
+ duration = (run.ended_at - run.started_at).total_seconds()
27
+ spans = []
28
+ for step in run.steps:
29
+ if step.span and step.span not in spans:
30
+ spans.append(step.span)
31
+ return {
32
+ "id": run.id,
33
+ "agent": run.agent,
34
+ "status": run.status,
35
+ "started_at": run.started_at.isoformat() if run.started_at else None,
36
+ "ended_at": run.ended_at.isoformat() if run.ended_at else None,
37
+ "duration": duration,
38
+ "steps": len(run.steps),
39
+ "spans": spans,
40
+ }
41
+
42
+
43
+ @app.get("/runs")
44
+ def runs():
45
+ return [_summary(run) for run in list_runs()]
46
+
47
+
48
+ @app.get("/runs/{run_id}")
49
+ def run_detail(run_id: str):
50
+ try:
51
+ return read_run(run_id).model_dump(mode="json")
52
+ except FileNotFoundError as exc:
53
+ raise HTTPException(status_code=404, detail="Run not found") from exc
54
+
55
+
56
+ @app.get("/diff")
57
+ def diff(a: str, b: str):
58
+ return diff_runs(read_run(a), read_run(b))
59
+
60
+
61
+ @app.get("/stream")
62
+ async def stream():
63
+ async def events() -> AsyncIterator[str]:
64
+ yield f"data: {json.dumps({'event': 'ready'})}\n\n"
65
+ runs_dir().mkdir(parents=True, exist_ok=True)
66
+ async for changes in awatch(runs_dir()):
67
+ for _, path in changes:
68
+ if str(path).endswith(".json"):
69
+ run_id = Path(path).stem
70
+ yield f"data: {json.dumps({'event': 'run_updated', 'id': run_id})}\n\n"
71
+ await asyncio.sleep(0)
72
+
73
+ return StreamingResponse(events(), media_type="text/event-stream")
74
+
75
+
76
+ @app.get("/", response_class=HTMLResponse)
77
+ def index():
78
+ index_path = DIST / "index.html"
79
+ if index_path.exists():
80
+ return index_path.read_text(encoding="utf-8")
81
+ return """
82
+ <!doctype html>
83
+ <html>
84
+ <head><title>TraceHub</title><style>body{font-family:system-ui;background:#101114;color:#f5f5f5;padding:32px}a{color:#7dd3fc}</style></head>
85
+ <body>
86
+ <h1>TraceHub</h1>
87
+ <p>React assets are not built yet. Run <code>cd tracehub && npm install && npm run build</code>, then restart <code>trace serve</code>.</p>
88
+ <p>API docs are available at <a href="/docs">/docs</a>.</p>
89
+ </body>
90
+ </html>
91
+ """