tracellm 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tracellm
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Local-first observability for LLM applications
5
5
  Requires-Python: >=3.9
6
6
  Description-Content-Type: text/markdown
@@ -13,45 +13,78 @@ every API interaction logged locally, queryable from your terminal.
13
13
  No backend. No signup. Nothing leaves your machine.
14
14
 
15
15
  ## Install
16
-
16
+ ```bash
17
17
  pip install tracellm
18
+ ```
18
19
 
19
20
  ## Usage
20
-
21
+ ```python
21
22
  from tracellm import trace
22
23
  import groq
23
24
 
24
25
  client = groq.Groq(api_key="your-key")
25
26
 
26
27
  @trace
27
- def llm_call(model="llama-3.1-8b-instant", messages=[{"role": "user", "content": "hello"}]):
28
+ def ask(model, messages):
28
29
  return client.chat.completions.create(model=model, messages=messages)
29
30
 
30
- llm_call()
31
-
32
- That's it. Every call is traced automatically.
31
+ ask(
32
+ model="llama-3.1-8b-instant",
33
+ messages=[{"role": "user", "content": "Explain black holes in one line"}]
34
+ )
35
+ ```
33
36
 
34
- ## Query traces from terminal
37
+ Every call is traced automatically. No try/except. No setup.
35
38
 
39
+ ## Query traces
40
+ ```bash
36
41
  python -m tracellm.cli --Status failed
37
42
  python -m tracellm.cli --Latency 2.0
38
43
  python -m tracellm.cli --Model llama-3.1-8b-instant
39
44
  python -m tracellm.cli --Status failed --Latency 1.5
45
+ python -m tracellm.cli --Time "2026-04-03"
46
+ ```
40
47
 
41
- ## What gets captured
48
+ ## Cost tracking
49
+ ```bash
50
+ # cost per trace
51
+ python -m tracellm.cli --Cost
52
+
53
+ # full summary by model
54
+ python -m tracellm.cli --Cost Summary
55
+ ```
56
+
57
+ Output:
58
+ === Cost Summary ===
59
+ llama-3.1-8b-instant
60
+ Calls : 8
61
+ Tokens : 405
62
+ Cost : $0.000020
42
63
 
64
+ Total calls made : 8
65
+ Total tokens used: 405
66
+ Total cost : $0.000020
67
+
68
+ ## What gets captured
43
69
  - Model, prompt, response
44
70
  - Tokens used, latency, finish reason
45
71
  - Error type and message on failures
46
72
  - Timestamp for every call
47
73
 
48
- ## Limitations
74
+ ## Pricing
75
+ Default pricing is bundled. To override, create `~/.tracellm/pricing.json`:
76
+ ```json
77
+ {
78
+ "my-custom-model": 0.05
79
+ }
80
+ ```
81
+ Values are per million tokens.
49
82
 
50
- Storage is append-only JSON lines. Latency query supports >=
51
- for latency, exact match for everything else. Early days.
83
+ ## Limitations
84
+ Storage is append-only JSON lines. Latency filter supports `>=`,
85
+ exact match for everything else. Early days.
52
86
 
53
87
  ## Roadmap
54
-
55
88
  - Binary storage for faster querying at scale
56
- - Cost calculation per model
89
+ - Async tracing support
57
90
  - Terminal dashboard
@@ -0,0 +1,83 @@
1
+ # tracellm
2
+
3
+ Lightweight tracing for LLM applications. One decorator —
4
+ every API interaction logged locally, queryable from your terminal.
5
+
6
+ No backend. No signup. Nothing leaves your machine.
7
+
8
+ ## Install
9
+ ```bash
10
+ pip install tracellm
11
+ ```
12
+
13
+ ## Usage
14
+ ```python
15
+ from tracellm import trace
16
+ import groq
17
+
18
+ client = groq.Groq(api_key="your-key")
19
+
20
+ @trace
21
+ def ask(model, messages):
22
+ return client.chat.completions.create(model=model, messages=messages)
23
+
24
+ ask(
25
+ model="llama-3.1-8b-instant",
26
+ messages=[{"role": "user", "content": "Explain black holes in one line"}]
27
+ )
28
+ ```
29
+
30
+ Every call is traced automatically. No try/except. No setup.
31
+
32
+ ## Query traces
33
+ ```bash
34
+ python -m tracellm.cli --Status failed
35
+ python -m tracellm.cli --Latency 2.0
36
+ python -m tracellm.cli --Model llama-3.1-8b-instant
37
+ python -m tracellm.cli --Status failed --Latency 1.5
38
+ python -m tracellm.cli --Time "2026-04-03"
39
+ ```
40
+
41
+ ## Cost tracking
42
+ ```bash
43
+ # cost per trace
44
+ python -m tracellm.cli --Cost
45
+
46
+ # full summary by model
47
+ python -m tracellm.cli --Cost Summary
48
+ ```
49
+
50
+ Output:
51
+ === Cost Summary ===
52
+ llama-3.1-8b-instant
53
+ Calls : 8
54
+ Tokens : 405
55
+ Cost : $0.000020
56
+
57
+ Total calls made : 8
58
+ Total tokens used: 405
59
+ Total cost : $0.000020
60
+
61
+ ## What gets captured
62
+ - Model, prompt, response
63
+ - Tokens used, latency, finish reason
64
+ - Error type and message on failures
65
+ - Timestamp for every call
66
+
67
+ ## Pricing
68
+ Default pricing is bundled. To override, create `~/.tracellm/pricing.json`:
69
+ ```json
70
+ {
71
+ "my-custom-model": 0.05
72
+ }
73
+ ```
74
+ Values are per million tokens.
75
+
76
+ ## Limitations
77
+ Storage is append-only JSON lines. Latency filter supports `>=`,
78
+ exact match for everything else. Early days.
79
+
80
+ ## Roadmap
81
+ - Binary storage for faster querying at scale
82
+ - Async tracing support
83
+ - Terminal dashboard
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "tracellm"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "Local-first observability for LLM applications"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -0,0 +1,3 @@
1
+ from .tracer import Tracer
2
+ from .decorator import trace
3
+ from .pricing import load_pricing
@@ -0,0 +1,108 @@
1
+ import argparse
2
+ import json
3
+ import os
4
+ from datetime import datetime
5
+ from tracellm import load_pricing
6
+
7
+ TRACE_FILE = os.path.join(os.path.dirname(__file__), '..', 'trace.txt')
8
+
9
+ Running_total = {"Model" : None , "Calls" : 0 , "Tokens" : 0 , "Cost" : 0}
10
+
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("-S" , "--Status" , help = "Based on what status do filter" , choices = ["success" , "failed"])
13
+ parser.add_argument("-L" , "--Latency" , help = "Based on what latency do filter")
14
+ parser.add_argument("-M" , "--Model" , help = "Based on what model do filter")
15
+ parser.add_argument("-E" , "--Error" , help = "Based on what type of Error do filter")
16
+ parser.add_argument("-T" , "--Time" , help = "Based on what time do filter")
17
+ parser.add_argument("-C" , "--Cost" , help = "Calculates the token cost , use word 'Summary' for cost summary")
18
+ args = parser.parse_args()
19
+
20
+ def parse_time_input(s):
21
+ for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d"):
22
+ try:
23
+ return datetime.strptime(s, fmt)
24
+ except ValueError:
25
+ continue
26
+ raise ValueError(f"Time format not recognised: {s}")
27
+ conditions = []
28
+ if args.Status:
29
+ conditions.append(lambda t : t.get('Status') == args.Status)
30
+ if args.Latency:
31
+ conditions.append(lambda t : t.get('Latency') >= float(args.Latency))
32
+ if args.Model:
33
+ conditions.append(lambda t : t.get('Model') == args.Model)
34
+ if args.Error:
35
+ conditions.append(lambda t : t.get('Error Type' , None) == args.Error)
36
+ if args.Time:
37
+ query_time = parse_time_input(args.Time)
38
+ conditions.append(
39
+ lambda t, qt=query_time: datetime.strptime(
40
+ t.get('Timestamp', ''), "%Y-%m-%d %H:%M:%S"
41
+ ) >= qt
42
+ )
43
+ if conditions != []:
44
+ with open(TRACE_FILE , 'r') as trace_file:
45
+ for line in trace_file:
46
+ trace = json.loads(line)
47
+ if all(condition(trace) for condition in conditions):
48
+ print("\n--- Trace ---")
49
+ for key, value in trace.items():
50
+ print(f" {key}: {value}")
51
+ print("-------------")
52
+
53
+ if args.Cost:
54
+ pricing = load_pricing()
55
+ total_calls = 0
56
+ total_tokens = 0
57
+ total_cost = 0
58
+ model_breakdown = {}
59
+ if args.Cost != 'Summary':
60
+ C_or_S = 'Cost'
61
+ else:
62
+ C_or_S = 'Summary'
63
+ with open(TRACE_FILE , 'r') as f:
64
+ for line in f:
65
+ trace = json.loads(line)
66
+ if 'Tokens used' not in trace:
67
+ continue
68
+ T_used = trace['Tokens used']
69
+ if trace['Model'] not in pricing:
70
+ print(f"Model :{trace['Model']} is not stored.\nPlease config the pricing file and add the model ")
71
+ continue
72
+ cost = T_used * pricing[trace['Model']] / 1000000
73
+ if C_or_S == 'Cost':
74
+ print(f"Model : {trace['Model']}")
75
+ print(f"Tokens Used : {T_used}")
76
+ print(f"Cost : {cost}")
77
+ print("---------------------\n")
78
+ else:
79
+ total_cost += cost
80
+ total_calls += 1
81
+ total_tokens += trace['Tokens used']
82
+ if trace['Model'] in model_breakdown:
83
+ model_breakdown[trace['Model']]['Total Calls'] += 1
84
+ model_breakdown[trace['Model']]['Total Tokens'] += trace['Tokens used']
85
+ model_breakdown[trace['Model']]['Total Cost'] += cost
86
+ else:
87
+ model_breakdown[trace['Model']] = {}
88
+ model_breakdown[trace['Model']] = {
89
+ 'Total Calls' : 1,
90
+ 'Total Tokens' : trace['Tokens used'],
91
+ 'Total Cost' : cost
92
+ }
93
+ if C_or_S == 'Summary':
94
+ print("\n=== Cost Summary ===")
95
+ for model, data in model_breakdown.items():
96
+ print(f"\n {model}")
97
+ print(f" Calls : {data['Total Calls']}")
98
+ print(f" Tokens : {data['Total Tokens']}")
99
+ print(f" Cost : ${data['Total Cost']:.6f}")
100
+ print("====================")
101
+ print(f"\n Total calls made : {total_calls}")
102
+ print(f" Total Tokens used : {total_tokens}")
103
+ print(f" Total cost: ${total_cost:.6f}")
104
+ print("====================")
105
+
106
+
107
+
108
+
@@ -0,0 +1,33 @@
1
+ import os
2
+ import json
3
+ Pricing = {
4
+ "gpt-oss-20B (low)": 0.0675,
5
+ "gpt-oss-20B (high)": 1.25,
6
+ "Gemini 3.1 Pro Preview": 1.15,
7
+ "GPT-5.4 (xhigh)": 12.50,
8
+ "GPT-5.3 Codex (xhigh)": 11.50,
9
+ "Claude Opus 4.6 (max)": 11.25,
10
+ "Mercury 2": 0.04,
11
+ "Qwen3.5 0.8B": 0.02,
12
+ "Granite 4.0 H Small": 0.03,
13
+ "Granite 3.3 8B": 0.04,
14
+ "DeepSeek R1 Distill Qwen 32B": 0.09,
15
+ "NVIDIA Nemotron 3 Nano": 0.03,
16
+ "Gemma 3n E4B": 0.03,
17
+ "Qwen3.5 2B": 0.04,
18
+ "Llama 4 Scout": 0.05,
19
+ "Grok 4.20 Beta 0309": 0.45,
20
+ "Gemini 2.0 Pro Experimental": 1.10,
21
+ "llama-3.1-8b-instant": 0.05
22
+ }
23
+
24
+ user_home = os.path.expanduser('~')
25
+ config_dirc = os.path.join(user_home , '.tracellm')
26
+ personal_dict = os.path.join(config_dirc , 'pricing.json')
27
+ def load_pricing():
28
+ if os.path.exists(personal_dict):
29
+ with open(personal_dict , 'r') as f:
30
+ return json.load(f)
31
+ else:
32
+ return Pricing
33
+
@@ -2,6 +2,7 @@ import json
2
2
  import time
3
3
  from datetime import datetime
4
4
  import os
5
+
5
6
  TRACE_FILE = os.path.join(os.path.dirname(__file__), '..', 'trace.txt')
6
7
 
7
8
  class Tracer:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tracellm
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Local-first observability for LLM applications
5
5
  Requires-Python: >=3.9
6
6
  Description-Content-Type: text/markdown
@@ -13,45 +13,78 @@ every API interaction logged locally, queryable from your terminal.
13
13
  No backend. No signup. Nothing leaves your machine.
14
14
 
15
15
  ## Install
16
-
16
+ ```bash
17
17
  pip install tracellm
18
+ ```
18
19
 
19
20
  ## Usage
20
-
21
+ ```python
21
22
  from tracellm import trace
22
23
  import groq
23
24
 
24
25
  client = groq.Groq(api_key="your-key")
25
26
 
26
27
  @trace
27
- def llm_call(model="llama-3.1-8b-instant", messages=[{"role": "user", "content": "hello"}]):
28
+ def ask(model, messages):
28
29
  return client.chat.completions.create(model=model, messages=messages)
29
30
 
30
- llm_call()
31
-
32
- That's it. Every call is traced automatically.
31
+ ask(
32
+ model="llama-3.1-8b-instant",
33
+ messages=[{"role": "user", "content": "Explain black holes in one line"}]
34
+ )
35
+ ```
33
36
 
34
- ## Query traces from terminal
37
+ Every call is traced automatically. No try/except. No setup.
35
38
 
39
+ ## Query traces
40
+ ```bash
36
41
  python -m tracellm.cli --Status failed
37
42
  python -m tracellm.cli --Latency 2.0
38
43
  python -m tracellm.cli --Model llama-3.1-8b-instant
39
44
  python -m tracellm.cli --Status failed --Latency 1.5
45
+ python -m tracellm.cli --Time "2026-04-03"
46
+ ```
40
47
 
41
- ## What gets captured
48
+ ## Cost tracking
49
+ ```bash
50
+ # cost per trace
51
+ python -m tracellm.cli --Cost
52
+
53
+ # full summary by model
54
+ python -m tracellm.cli --Cost Summary
55
+ ```
56
+
57
+ Output:
58
+ === Cost Summary ===
59
+ llama-3.1-8b-instant
60
+ Calls : 8
61
+ Tokens : 405
62
+ Cost : $0.000020
42
63
 
64
+ Total calls made : 8
65
+ Total tokens used: 405
66
+ Total cost : $0.000020
67
+
68
+ ## What gets captured
43
69
  - Model, prompt, response
44
70
  - Tokens used, latency, finish reason
45
71
  - Error type and message on failures
46
72
  - Timestamp for every call
47
73
 
48
- ## Limitations
74
+ ## Pricing
75
+ Default pricing is bundled. To override, create `~/.tracellm/pricing.json`:
76
+ ```json
77
+ {
78
+ "my-custom-model": 0.05
79
+ }
80
+ ```
81
+ Values are per million tokens.
49
82
 
50
- Storage is append-only JSON lines. Latency query supports >=
51
- for latency, exact match for everything else. Early days.
83
+ ## Limitations
84
+ Storage is append-only JSON lines. Latency filter supports `>=`,
85
+ exact match for everything else. Early days.
52
86
 
53
87
  ## Roadmap
54
-
55
88
  - Binary storage for faster querying at scale
56
- - Cost calculation per model
89
+ - Async tracing support
57
90
  - Terminal dashboard
@@ -3,6 +3,7 @@ pyproject.toml
3
3
  tracellm/__init__.py
4
4
  tracellm/cli.py
5
5
  tracellm/decorator.py
6
+ tracellm/pricing.py
6
7
  tracellm/tracer.py
7
8
  tracellm.egg-info/PKG-INFO
8
9
  tracellm.egg-info/SOURCES.txt
tracellm-0.2.0/README.md DELETED
@@ -1,50 +0,0 @@
1
- # tracellm
2
-
3
- Lightweight tracing for LLM applications. One decorator —
4
- every API interaction logged locally, queryable from your terminal.
5
-
6
- No backend. No signup. Nothing leaves your machine.
7
-
8
- ## Install
9
-
10
- pip install tracellm
11
-
12
- ## Usage
13
-
14
- from tracellm import trace
15
- import groq
16
-
17
- client = groq.Groq(api_key="your-key")
18
-
19
- @trace
20
- def llm_call(model="llama-3.1-8b-instant", messages=[{"role": "user", "content": "hello"}]):
21
- return client.chat.completions.create(model=model, messages=messages)
22
-
23
- llm_call()
24
-
25
- That's it. Every call is traced automatically.
26
-
27
- ## Query traces from terminal
28
-
29
- python -m tracellm.cli --Status failed
30
- python -m tracellm.cli --Latency 2.0
31
- python -m tracellm.cli --Model llama-3.1-8b-instant
32
- python -m tracellm.cli --Status failed --Latency 1.5
33
-
34
- ## What gets captured
35
-
36
- - Model, prompt, response
37
- - Tokens used, latency, finish reason
38
- - Error type and message on failures
39
- - Timestamp for every call
40
-
41
- ## Limitations
42
-
43
- Storage is append-only JSON lines. Latency query supports >=
44
- for latency, exact match for everything else. Early days.
45
-
46
- ## Roadmap
47
-
48
- - Binary storage for faster querying at scale
49
- - Cost calculation per model
50
- - Terminal dashboard
@@ -1,2 +0,0 @@
1
- from .tracer import Tracer
2
- from .decorator import trace
@@ -1,33 +0,0 @@
1
- import argparse
2
- import json
3
- import os
4
- TRACE_FILE = os.path.join(os.path.dirname(__file__), '..', 'trace.txt')
5
-
6
- parser = argparse.ArgumentParser()
7
- parser.add_argument("-S" , "--Status" , help = "Based on what status do filter" , choices = ["success" , "failed"])
8
- parser.add_argument("-L" , "--Latency" , help = "Based on what latency do filter")
9
- parser.add_argument("-M" , "--Model" , help = "Based on what model do filter")
10
- parser.add_argument("-E" , "--Error" , help = "Based on what type of Error do filter")
11
- parser.add_argument("-T" , "--Time" , help = "Based on what time do filter")
12
- args = parser.parse_args()
13
-
14
-
15
- conditions = []
16
- if args.Status:
17
- conditions.append(lambda t : t.get('Status') == args.Status)
18
- if args.Latency:
19
- conditions.append(lambda t : t.get('Latency') >= float(args.Latency))
20
- if args.Model:
21
- conditions.append(lambda t : t.get('Model') == args.Model)
22
- if args.Error:
23
- conditions.append(lambda t : t.get('Error Type' , None) == args.Error)
24
- if args.Time:
25
- conditions.append(lambda t : t.get('Start Time') >= float(args.Time))
26
- with open(TRACE_FILE , 'r') as trace_file:
27
- for line in trace_file:
28
- trace = json.loads(line)
29
- if all(condition(trace) for condition in conditions):
30
- print("\n--- Trace ---")
31
- for key, value in trace.items():
32
- print(f" {key}: {value}")
33
- print("-------------")
File without changes
File without changes