swarmtrace 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ Metadata-Version: 2.4
2
+ Name: swarmtrace
3
+ Version: 0.1.0
4
+ Summary: pytest for AI agents — trace, debug and catch regressions in LLM swarms
5
+ Author: Ravi
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: litai
9
+ Requires-Dist: click
10
+ Requires-Dist: rich
11
+ Dynamic: author
12
+ Dynamic: description
13
+ Dynamic: description-content-type
14
+ Dynamic: requires-dist
15
+ Dynamic: requires-python
16
+ Dynamic: summary
17
+
18
+ # Tracely
19
+ > Open-source AI observability framework. Trace, debug and detect regressions in LLM apps with one decorator.
20
+
21
+ ## Install
22
+ pip install tracely
23
+
24
+ ## Usage
25
+ from tracely import observe
26
+
27
+ @observe
28
+ def my_agent(question):
29
+ return llm.chat(question)
30
+
31
+ ## Features
32
+ - One decorator, zero config
33
+ - Latency tracking
34
+ - Error capture
35
+ - AI-powered regression detection
36
+ - CLI: tracely view
37
+ - Works with any LLM
38
+
39
+ ## Why Not LangSmith?
40
+ | | LangSmith | Tracely |
41
+ |---|---|---|
42
+ | Open Source | No | Yes |
43
+ | Any Framework | No | Yes |
44
+ | Self-hosted | No | Yes |
45
+ | Regression Detection | No | Yes |
46
+ | Setup | Complex | One decorator |
@@ -0,0 +1,29 @@
1
+ # Tracely
2
+ > Open-source AI observability framework. Trace, debug and detect regressions in LLM apps with one decorator.
3
+
4
+ ## Install
5
+ pip install tracely
6
+
7
+ ## Usage
8
+ from tracely import observe
9
+
10
+ @observe
11
+ def my_agent(question):
12
+ return llm.chat(question)
13
+
14
+ ## Features
15
+ - One decorator, zero config
16
+ - Latency tracking
17
+ - Error capture
18
+ - AI-powered regression detection
19
+ - CLI: tracely view
20
+ - Works with any LLM
21
+
22
+ ## Why Not LangSmith?
23
+ | | LangSmith | Tracely |
24
+ |---|---|---|
25
+ | Open Source | No | Yes |
26
+ | Any Framework | No | Yes |
27
+ | Self-hosted | No | Yes |
28
+ | Regression Detection | No | Yes |
29
+ | Setup | Complex | One decorator |
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,14 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="swarmtrace",
5
+ version="0.1.0",
6
+ description="pytest for AI agents — trace, debug and catch regressions in LLM swarms",
7
+ long_description=open("README.md").read(),
8
+ long_description_content_type="text/markdown",
9
+ author="Ravi",
10
+ packages=find_packages(),
11
+ install_requires=["litai", "click", "rich"],
12
+ entry_points={"console_scripts": ["swarmtrace=tracely.cli:view"]},
13
+ python_requires=">=3.8",
14
+ )
@@ -0,0 +1,46 @@
1
+ Metadata-Version: 2.4
2
+ Name: swarmtrace
3
+ Version: 0.1.0
4
+ Summary: pytest for AI agents — trace, debug and catch regressions in LLM swarms
5
+ Author: Ravi
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: litai
9
+ Requires-Dist: click
10
+ Requires-Dist: rich
11
+ Dynamic: author
12
+ Dynamic: description
13
+ Dynamic: description-content-type
14
+ Dynamic: requires-dist
15
+ Dynamic: requires-python
16
+ Dynamic: summary
17
+
18
+ # Tracely
19
+ > Open-source AI observability framework. Trace, debug and detect regressions in LLM apps with one decorator.
20
+
21
+ ## Install
22
+ pip install tracely
23
+
24
+ ## Usage
25
+ from tracely import observe
26
+
27
+ @observe
28
+ def my_agent(question):
29
+ return llm.chat(question)
30
+
31
+ ## Features
32
+ - One decorator, zero config
33
+ - Latency tracking
34
+ - Error capture
35
+ - AI-powered regression detection
36
+ - CLI: tracely view
37
+ - Works with any LLM
38
+
39
+ ## Why Not LangSmith?
40
+ | | LangSmith | Tracely |
41
+ |---|---|---|
42
+ | Open Source | No | Yes |
43
+ | Any Framework | No | Yes |
44
+ | Self-hosted | No | Yes |
45
+ | Regression Detection | No | Yes |
46
+ | Setup | Complex | One decorator |
@@ -0,0 +1,15 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ swarmtrace.egg-info/PKG-INFO
5
+ swarmtrace.egg-info/SOURCES.txt
6
+ swarmtrace.egg-info/dependency_links.txt
7
+ swarmtrace.egg-info/entry_points.txt
8
+ swarmtrace.egg-info/requires.txt
9
+ swarmtrace.egg-info/top_level.txt
10
+ tracely/__init__.py
11
+ tracely/cli.py
12
+ tracely/regression.py
13
+ tracely/replay.py
14
+ tracely/storage.py
15
+ tracely/tracer.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ swarmtrace = tracely.cli:view
@@ -0,0 +1,3 @@
1
+ litai
2
+ click
3
+ rich
@@ -0,0 +1 @@
1
+ tracely
@@ -0,0 +1,5 @@
1
+ from tracely.tracer import observe
2
+ from tracely.storage import get_traces, save_trace
3
+
4
+ __version__ = "0.1.0"
5
+ __all__ = ["observe", "get_traces"]
@@ -0,0 +1,53 @@
1
+ import sqlite3
2
+ import os
3
+ import sys
4
+
5
+ DB_PATH = os.path.expanduser("~/.tracely.db")
6
+
7
+ def get_traces():
8
+ conn = sqlite3.connect(DB_PATH)
9
+ rows = conn.execute("SELECT * FROM traces ORDER BY timestamp DESC LIMIT 20").fetchall()
10
+ conn.close()
11
+ return rows
12
+
13
+ def print_tree(traces, parent_id=None, indent=0):
14
+ children = [t for t in traces if t[1] == parent_id]
15
+ for t in children:
16
+ id_, par, func, args, output, latency, error, timestamp, in_tok, out_tok, cost = t
17
+ status = "ERROR" if error else "OK"
18
+ prefix = " " * indent + ("└── " if indent > 0 else "")
19
+ print(f"{prefix}{func}() [{id_}] {latency}s | {in_tok}in/{out_tok}out tokens | ${cost} | {status}")
20
+ print_tree(traces, id_, indent + 1)
21
+
22
+ def view():
23
+ try:
24
+ traces = get_traces()
25
+ except:
26
+ traces = []
27
+
28
+ if not traces:
29
+ print("No traces found.")
30
+ return
31
+
32
+ total_cost = sum(t[10] for t in traces if t[10])
33
+ total_tokens = sum((t[8] or 0) + (t[9] or 0) for t in traces)
34
+
35
+ print("\n=== Tracely Trace View ===")
36
+ print(f"{'ID':<10} {'FUNCTION':<20} {'LATENCY':<10} {'TOKENS':<15} {'COST':<12} {'STATUS'}")
37
+ print("-" * 80)
38
+ for t in traces:
39
+ id_, parent_id, func, args, output, latency, error, timestamp, in_tok, out_tok, cost = t
40
+ status = "ERROR" if error else "OK"
41
+ tokens_str = f"{in_tok or 0}in/{out_tok or 0}out"
42
+ print(f"{id_:<10} {func:<20} {str(latency)+'s':<10} {tokens_str:<15} ${cost or 0:<11} {status}")
43
+
44
+ print("\n=== Tree View ===")
45
+ print_tree(traces)
46
+
47
+ print("\n=== Summary ===")
48
+ print(f"Total traces : {len(traces)}")
49
+ print(f"Total tokens : {total_tokens}")
50
+ print(f"Total cost : ${round(total_cost, 6)}")
51
+
52
+ if __name__ == "__main__":
53
+ view()
@@ -0,0 +1,68 @@
1
+ import sys
2
+ sys.path.insert(0, "/teamspace/studios/this_studio/tracely")
3
+
4
+ import os
5
+ from litai import LLM
6
+ from tracely.storage import get_traces, save_trace
7
+ import uuid
8
+ from datetime import datetime
9
+
10
+ llm = LLM(model="anthropic/claude-haiku-4-5-20251001", api_key=os.environ.get("LIGHTNING_API_KEY"))
11
+
12
+ def score_similarity(output_a: str, output_b: str) -> float:
13
+ """Use AI to score how similar two outputs are. Returns 0.0 to 1.0"""
14
+ prompt = f"""Compare these two AI outputs and return ONLY a number between 0.0 and 1.0.
15
+ 1.0 = identical meaning. 0.0 = completely different.
16
+
17
+ Output A: {output_a[:300]}
18
+ Output B: {output_b[:300]}
19
+
20
+ Reply with just the number, nothing else."""
21
+
22
+ score = llm.chat(prompt).strip()
23
+ try:
24
+ return float(score)
25
+ except:
26
+ return 0.5
27
+
28
+ def compare(func, inputs: list, version_a_prompt: str, version_b_prompt: str):
29
+ """
30
+ Compare two prompt versions against the same inputs.
31
+ Detects regressions automatically.
32
+ """
33
+ print(f"\n[Tracely Regression] Comparing v1 vs v2 on {len(inputs)} inputs...\n")
34
+ print(f"{'INPUT':<30} {'V1 LATENCY':<12} {'V2 LATENCY':<12} {'SIMILARITY':<12} {'REGRESSION?'}")
35
+ print("-" * 85)
36
+
37
+ regressions = 0
38
+
39
+ for input_text in inputs:
40
+ # Run v1
41
+ import time
42
+ start = time.time()
43
+ out_a = func(input_text, version_a_prompt)
44
+ lat_a = round(time.time() - start, 2)
45
+
46
+ # Run v2
47
+ start = time.time()
48
+ out_b = func(input_text, version_b_prompt)
49
+ lat_b = round(time.time() - start, 2)
50
+
51
+ # Score similarity
52
+ similarity = score_similarity(out_a, out_b)
53
+ regressed = similarity < 0.6
54
+
55
+ if regressed:
56
+ regressions += 1
57
+
58
+ flag = "🔴 YES" if regressed else "✅ NO"
59
+ short_input = input_text[:28] + ".." if len(input_text) > 28 else input_text
60
+ print(f"{short_input:<30} {str(lat_a)+'s':<12} {str(lat_b)+'s':<12} {str(similarity):<12} {flag}")
61
+
62
+ print(f"\n{'='*85}")
63
+ print(f"Result: {regressions}/{len(inputs)} regressions detected")
64
+ if regressions > 0:
65
+ print("⚠️ WARNING: Your new prompt may have regressed!")
66
+ else:
67
+ print("✅ No regressions. Safe to ship.")
68
+ print()
@@ -0,0 +1,40 @@
1
+ import sys
2
+ sys.path.insert(0, "/teamspace/studios/this_studio/tracely")
3
+
4
+ from tracely.storage import get_by_id, get_traces
5
+
6
+ def replay(trace_id: str):
7
+ trace = get_by_id(trace_id)
8
+ if not trace:
9
+ print(f"Trace {trace_id} not found.")
10
+ return
11
+
12
+ id_, parent_id, func, args, output, latency, error, timestamp, in_tok, out_tok, cost = trace
13
+
14
+ print("\n=== Tracely Replay: " + trace_id + " ===")
15
+ print("Function : " + str(func))
16
+ print("Timestamp : " + str(timestamp))
17
+ print("Args : " + str(args))
18
+ print("Output : " + str(output))
19
+ print("Latency : " + str(latency) + "s")
20
+ print("Tokens : " + str(in_tok) + " in / " + str(out_tok) + " out")
21
+ print("Cost : $" + str(cost))
22
+ print("Error : " + str(error if error else "None"))
23
+ print("Parent : " + str(parent_id if parent_id else "root"))
24
+
25
+ def show_failures():
26
+ traces = get_traces(limit=50)
27
+ failed = [t for t in traces if t[6]]
28
+
29
+ if not failed:
30
+ print("No failures found.")
31
+ return
32
+
33
+ print("\n=== Failed Traces ===")
34
+ print(f"{'ID':<10} {'FUNCTION':<20} {'ERROR':<40} {'TIMESTAMP'}")
35
+ print("-" * 90)
36
+ for t in failed:
37
+ id_, parent_id, func, args, output, latency, error, timestamp, in_tok, out_tok, cost = t
38
+ print(f"{id_:<10} {func:<20} {str(error)[:38]:<40} {timestamp}")
39
+ print(f"\nTotal failures: {len(failed)}")
40
+ print("\nReplay any failure: from tracely.replay import replay; replay('id')")
@@ -0,0 +1,63 @@
1
+ import sqlite3
2
+ import os
3
+
4
+ DB_PATH = os.path.expanduser("~/.tracely.db")
5
+
6
+ def init_db():
7
+ conn = sqlite3.connect(DB_PATH)
8
+ conn.execute("""
9
+ CREATE TABLE IF NOT EXISTS traces (
10
+ id TEXT PRIMARY KEY,
11
+ parent_id TEXT,
12
+ function TEXT,
13
+ args TEXT,
14
+ output TEXT,
15
+ latency_sec REAL,
16
+ error TEXT,
17
+ timestamp TEXT,
18
+ input_tokens INTEGER,
19
+ output_tokens INTEGER,
20
+ cost_usd REAL
21
+ )
22
+ """)
23
+ conn.commit()
24
+ conn.close()
25
+
26
+ def save_trace(trace: dict):
27
+ init_db()
28
+ conn = sqlite3.connect(DB_PATH)
29
+ conn.execute("""
30
+ INSERT OR REPLACE INTO traces
31
+ VALUES (:id, :parent_id, :function, :args, :output,
32
+ :latency_sec, :error, :timestamp,
33
+ :input_tokens, :output_tokens, :cost_usd)
34
+ """, trace)
35
+ conn.commit()
36
+ conn.close()
37
+
38
+ def get_traces(limit=20):
39
+ init_db()
40
+ conn = sqlite3.connect(DB_PATH)
41
+ rows = conn.execute(
42
+ "SELECT * FROM traces ORDER BY timestamp DESC LIMIT ?", (limit,)
43
+ ).fetchall()
44
+ conn.close()
45
+ return rows
46
+
47
+ def get_by_id(trace_id):
48
+ init_db()
49
+ conn = sqlite3.connect(DB_PATH)
50
+ row = conn.execute(
51
+ "SELECT * FROM traces WHERE id = ?", (trace_id,)
52
+ ).fetchone()
53
+ conn.close()
54
+ return row
55
+
56
+ def get_tree(parent_id=None):
57
+ init_db()
58
+ conn = sqlite3.connect(DB_PATH)
59
+ rows = conn.execute(
60
+ "SELECT * FROM traces WHERE parent_id IS ? ORDER BY timestamp ASC", (parent_id,)
61
+ ).fetchall()
62
+ conn.close()
63
+ return rows
@@ -0,0 +1,137 @@
1
+ import time
2
+ import uuid
3
+ import functools
4
+ import threading
5
+ import asyncio
6
+ from datetime import datetime
7
+ from tracely.storage import save_trace
8
+
9
+ _local = threading.local()
10
+
11
+ # Pricing per million tokens (Haiku 4.5)
12
+ PRICING = {
13
+ "anthropic/claude-haiku-4-5-20251001": {"input": 0.80, "output": 4.00},
14
+ "openai/gpt-4o-mini": {"input": 0.15, "output": 0.60},
15
+ "openai/gpt-4o": {"input": 2.50, "output": 10.00},
16
+ "default": {"input": 1.00, "output": 3.00},
17
+ }
18
+
19
+ def get_cost(model, input_tokens, output_tokens):
20
+ price = PRICING.get(model, PRICING["default"])
21
+ return round(
22
+ (input_tokens * price["input"] / 1_000_000) +
23
+ (output_tokens * price["output"] / 1_000_000), 8
24
+ )
25
+
26
+ def extract_real_tokens(result):
27
+ """Try to extract real token counts from LLM response objects."""
28
+ # Anthropic SDK response
29
+ if hasattr(result, "usage"):
30
+ usage = result.usage
31
+ return getattr(usage, "input_tokens", 0), getattr(usage, "output_tokens", 0)
32
+ # OpenAI SDK response
33
+ if hasattr(result, "usage") and hasattr(result.usage, "prompt_tokens"):
34
+ return result.usage.prompt_tokens, result.usage.completion_tokens
35
+ # Dict response
36
+ if isinstance(result, dict) and "usage" in result:
37
+ u = result["usage"]
38
+ return u.get("input_tokens") or u.get("prompt_tokens", 0), u.get("output_tokens") or u.get("completion_tokens", 0)
39
+ return None, None
40
+
41
+ def _get_parent_id():
42
+ return getattr(_local, "current_trace_id", None)
43
+
44
+ def _set_parent_id(trace_id):
45
+ _local.current_trace_id = trace_id
46
+
47
+ def _save(trace_id, parent_id, func_name, args, result, latency, error, model="default"):
48
+ # Try real tokens first, fall back to estimate
49
+ input_tokens, output_tokens = extract_real_tokens(result)
50
+ source = "exact"
51
+ if input_tokens is None:
52
+ input_tokens = max(1, len(str(args)) // 4)
53
+ output_tokens = max(1, len(str(result)) // 4) if result else 0
54
+ source = "estimated"
55
+
56
+ cost = get_cost(model, input_tokens, output_tokens)
57
+
58
+ indent = " " if parent_id else ""
59
+ status = "✗ FAILED" if error else "✓ done"
60
+ print(f"[Tracely] {indent}{status}: {func_name} | {latency}s | {input_tokens}in/{output_tokens}out ({source}) | ${cost}")
61
+
62
+ save_trace({
63
+ "id": trace_id,
64
+ "parent_id": parent_id,
65
+ "function": func_name,
66
+ "args": str(args)[:200],
67
+ "output": str(result)[:200] if result else None,
68
+ "latency_sec": latency,
69
+ "error": str(error) if error else None,
70
+ "timestamp": datetime.utcnow().isoformat(),
71
+ "input_tokens": input_tokens,
72
+ "output_tokens": output_tokens,
73
+ "cost_usd": cost
74
+ })
75
+
76
+ def observe(func=None, model="default"):
77
+ """
78
+ Usage:
79
+ @observe
80
+ def my_agent(): ...
81
+
82
+ @observe(model="anthropic/claude-haiku-4-5-20251001")
83
+ def my_agent(): ...
84
+ """
85
+ def decorator(f):
86
+ if asyncio.iscoroutinefunction(f):
87
+ @functools.wraps(f)
88
+ async def async_wrapper(*args, **kwargs):
89
+ trace_id = str(uuid.uuid4())[:8]
90
+ parent_id = _get_parent_id()
91
+ prev = parent_id
92
+ _set_parent_id(trace_id)
93
+ start = time.time()
94
+ error = None
95
+ result = None
96
+ indent = " " if parent_id else ""
97
+ print(f"[Tracely] {indent}▶ {f.__name__} started (id={trace_id})")
98
+ try:
99
+ result = await f(*args, **kwargs)
100
+ except Exception as e:
101
+ error = e
102
+ finally:
103
+ latency = round(time.time() - start, 3)
104
+ _save(trace_id, parent_id, f.__name__, args, result, latency, error, model)
105
+ _set_parent_id(prev)
106
+ if error:
107
+ raise error
108
+ return result
109
+ return async_wrapper
110
+
111
+ @functools.wraps(f)
112
+ def sync_wrapper(*args, **kwargs):
113
+ trace_id = str(uuid.uuid4())[:8]
114
+ parent_id = _get_parent_id()
115
+ prev = parent_id
116
+ _set_parent_id(trace_id)
117
+ start = time.time()
118
+ error = None
119
+ result = None
120
+ indent = " " if parent_id else ""
121
+ print(f"[Tracely] {indent}▶ {f.__name__} started (id={trace_id})")
122
+ try:
123
+ result = f(*args, **kwargs)
124
+ except Exception as e:
125
+ error = e
126
+ finally:
127
+ latency = round(time.time() - start, 3)
128
+ _save(trace_id, parent_id, f.__name__, args, result, latency, error, model)
129
+ _set_parent_id(prev)
130
+ if error:
131
+ raise error
132
+ return result
133
+ return sync_wrapper
134
+
135
+ if func is not None:
136
+ return decorator(func)
137
+ return decorator