tracellm 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tracellm
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Local-first observability for LLM applications
5
5
  Requires-Python: >=3.9
6
6
  Description-Content-Type: text/markdown
@@ -32,46 +32,59 @@ ask(
32
32
  model="llama-3.1-8b-instant",
33
33
  messages=[{"role": "user", "content": "Explain black holes in one line"}]
34
34
  )
35
- ...
36
35
  ```
37
- Output on query:
38
- ```
39
- --- Trace ---
40
- Model: llama-3.1-8b-instant
41
- Prompt: Explain black holes in one line
42
- Response: A black hole is a region where gravity...
43
- Tokens: 43
44
- Latency: 0.847
45
- Status: success
46
- Timestamp: 2026-04-03 19:46:27
47
- -------------
48
- ...
49
- ```
50
- That's it. Every call is traced automatically.
51
36
 
52
- ## Query traces from terminal
37
+ Every call is traced automatically. No try/except. No setup.
38
+
39
+ ## Query traces
53
40
  ```bash
54
41
  python -m tracellm.cli --Status failed
55
42
  python -m tracellm.cli --Latency 2.0
56
43
  python -m tracellm.cli --Model llama-3.1-8b-instant
57
44
  python -m tracellm.cli --Status failed --Latency 1.5
58
- ...
45
+ python -m tracellm.cli --Time "2026-04-03"
59
46
  ```
60
47
 
61
- ## What gets captured
48
+ ## Cost tracking
49
+ ```bash
50
+ # cost per trace
51
+ python -m tracellm.cli --Cost
52
+
53
+ # full summary by model
54
+ python -m tracellm.cli --Cost Summary
55
+ ```
62
56
 
57
+ Output:
58
+ === Cost Summary ===
59
+ llama-3.1-8b-instant
60
+ Calls : 8
61
+ Tokens : 405
62
+ Cost : $0.000020
63
+
64
+ Total calls made : 8
65
+ Total tokens used: 405
66
+ Total cost : $0.000020
67
+
68
+ ## What gets captured
63
69
  - Model, prompt, response
64
70
  - Tokens used, latency, finish reason
65
71
  - Error type and message on failures
66
72
  - Timestamp for every call
67
73
 
68
- ## Limitations
74
+ ## Pricing
75
+ Default pricing is bundled. To override, create `~/.tracellm/pricing.json`:
76
+ ```json
77
+ {
78
+ "my-custom-model": 0.05
79
+ }
80
+ ```
81
+ Values are per million tokens.
69
82
 
70
- Storage is append-only JSON lines. Latency query supports >=
71
- for latency, exact match for everything else. Early days.
83
+ ## Limitations
84
+ Storage is append-only JSON lines. Latency filter supports `>=`,
85
+ exact match for everything else. Early days.
72
86
 
73
87
  ## Roadmap
74
-
75
88
  - Binary storage for faster querying at scale
76
- - Cost calculation per model
89
+ - Async tracing support
77
90
  - Terminal dashboard
@@ -25,46 +25,59 @@ ask(
25
25
  model="llama-3.1-8b-instant",
26
26
  messages=[{"role": "user", "content": "Explain black holes in one line"}]
27
27
  )
28
- ...
29
28
  ```
30
- Output on query:
31
- ```
32
- --- Trace ---
33
- Model: llama-3.1-8b-instant
34
- Prompt: Explain black holes in one line
35
- Response: A black hole is a region where gravity...
36
- Tokens: 43
37
- Latency: 0.847
38
- Status: success
39
- Timestamp: 2026-04-03 19:46:27
40
- -------------
41
- ...
42
- ```
43
- That's it. Every call is traced automatically.
44
29
 
45
- ## Query traces from terminal
30
+ Every call is traced automatically. No try/except. No setup.
31
+
32
+ ## Query traces
46
33
  ```bash
47
34
  python -m tracellm.cli --Status failed
48
35
  python -m tracellm.cli --Latency 2.0
49
36
  python -m tracellm.cli --Model llama-3.1-8b-instant
50
37
  python -m tracellm.cli --Status failed --Latency 1.5
51
- ...
38
+ python -m tracellm.cli --Time "2026-04-03"
52
39
  ```
53
40
 
54
- ## What gets captured
41
+ ## Cost tracking
42
+ ```bash
43
+ # cost per trace
44
+ python -m tracellm.cli --Cost
45
+
46
+ # full summary by model
47
+ python -m tracellm.cli --Cost Summary
48
+ ```
55
49
 
50
+ Output:
51
+ === Cost Summary ===
52
+ llama-3.1-8b-instant
53
+ Calls : 8
54
+ Tokens : 405
55
+ Cost : $0.000020
56
+
57
+ Total calls made : 8
58
+ Total tokens used: 405
59
+ Total cost : $0.000020
60
+
61
+ ## What gets captured
56
62
  - Model, prompt, response
57
63
  - Tokens used, latency, finish reason
58
64
  - Error type and message on failures
59
65
  - Timestamp for every call
60
66
 
61
- ## Limitations
67
+ ## Pricing
68
+ Default pricing is bundled. To override, create `~/.tracellm/pricing.json`:
69
+ ```json
70
+ {
71
+ "my-custom-model": 0.05
72
+ }
73
+ ```
74
+ Values are per million tokens.
62
75
 
63
- Storage is append-only JSON lines. Latency query supports >=
64
- for latency, exact match for everything else. Early days.
76
+ ## Limitations
77
+ Storage is append-only JSON lines. Latency filter supports `>=`,
78
+ exact match for everything else. Early days.
65
79
 
66
80
  ## Roadmap
67
-
68
81
  - Binary storage for faster querying at scale
69
- - Cost calculation per model
82
+ - Async tracing support
70
83
  - Terminal dashboard
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "tracellm"
7
- version = "0.2.1"
7
+ version = "0.2.2"
8
8
  description = "Local-first observability for LLM applications"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -0,0 +1,3 @@
1
+ from .tracer import Tracer
2
+ from .decorator import trace
3
+ from .pricing import load_pricing
@@ -0,0 +1,108 @@
1
+ import argparse
2
+ import json
3
+ import os
4
+ from datetime import datetime
5
+ from tracellm import load_pricing
6
+
7
+ TRACE_FILE = os.path.join(os.path.dirname(__file__), '..', 'trace.txt')
8
+
9
+ Running_total = {"Model" : None , "Calls" : 0 , "Tokens" : 0 , "Cost" : 0}
10
+
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("-S" , "--Status" , help = "Based on what status do filter" , choices = ["success" , "failed"])
13
+ parser.add_argument("-L" , "--Latency" , help = "Based on what latency do filter")
14
+ parser.add_argument("-M" , "--Model" , help = "Based on what model do filter")
15
+ parser.add_argument("-E" , "--Error" , help = "Based on what type of Error do filter")
16
+ parser.add_argument("-T" , "--Time" , help = "Based on what time do filter")
17
+ parser.add_argument("-C" , "--Cost" , help = "Calculates the token cost , use word 'Summary' for cost summary")
18
+ args = parser.parse_args()
19
+
20
+ def parse_time_input(s):
21
+ for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d"):
22
+ try:
23
+ return datetime.strptime(s, fmt)
24
+ except ValueError:
25
+ continue
26
+ raise ValueError(f"Time format not recognised: {s}")
27
+ conditions = []
28
+ if args.Status:
29
+ conditions.append(lambda t : t.get('Status') == args.Status)
30
+ if args.Latency:
31
+ conditions.append(lambda t : t.get('Latency') >= float(args.Latency))
32
+ if args.Model:
33
+ conditions.append(lambda t : t.get('Model') == args.Model)
34
+ if args.Error:
35
+ conditions.append(lambda t : t.get('Error Type' , None) == args.Error)
36
+ if args.Time:
37
+ query_time = parse_time_input(args.Time)
38
+ conditions.append(
39
+ lambda t, qt=query_time: datetime.strptime(
40
+ t.get('Timestamp', ''), "%Y-%m-%d %H:%M:%S"
41
+ ) >= qt
42
+ )
43
+ if conditions != []:
44
+ with open(TRACE_FILE , 'r') as trace_file:
45
+ for line in trace_file:
46
+ trace = json.loads(line)
47
+ if all(condition(trace) for condition in conditions):
48
+ print("\n--- Trace ---")
49
+ for key, value in trace.items():
50
+ print(f" {key}: {value}")
51
+ print("-------------")
52
+
53
+ if args.Cost:
54
+ pricing = load_pricing()
55
+ total_calls = 0
56
+ total_tokens = 0
57
+ total_cost = 0
58
+ model_breakdown = {}
59
+ if args.Cost != 'Summary':
60
+ C_or_S = 'Cost'
61
+ else:
62
+ C_or_S = 'Summary'
63
+ with open(TRACE_FILE , 'r') as f:
64
+ for line in f:
65
+ trace = json.loads(line)
66
+ if 'Tokens used' not in trace:
67
+ continue
68
+ T_used = trace['Tokens used']
69
+ if trace['Model'] not in pricing:
70
+ print(f"Model :{trace['Model']} is not stored.\nPlease config the pricing file and add the model ")
71
+ continue
72
+ cost = T_used * pricing[trace['Model']] / 1000000
73
+ if C_or_S == 'Cost':
74
+ print(f"Model : {trace['Model']}")
75
+ print(f"Tokens Used : {T_used}")
76
+ print(f"Cost : {cost}")
77
+ print("---------------------\n")
78
+ else:
79
+ total_cost += cost
80
+ total_calls += 1
81
+ total_tokens += trace['Tokens used']
82
+ if trace['Model'] in model_breakdown:
83
+ model_breakdown[trace['Model']]['Total Calls'] += 1
84
+ model_breakdown[trace['Model']]['Total Tokens'] += trace['Tokens used']
85
+ model_breakdown[trace['Model']]['Total Cost'] += cost
86
+ else:
87
+ model_breakdown[trace['Model']] = {}
88
+ model_breakdown[trace['Model']] = {
89
+ 'Total Calls' : 1,
90
+ 'Total Tokens' : trace['Tokens used'],
91
+ 'Total Cost' : cost
92
+ }
93
+ if C_or_S == 'Summary':
94
+ print("\n=== Cost Summary ===")
95
+ for model, data in model_breakdown.items():
96
+ print(f"\n {model}")
97
+ print(f" Calls : {data['Total Calls']}")
98
+ print(f" Tokens : {data['Total Tokens']}")
99
+ print(f" Cost : ${data['Total Cost']:.6f}")
100
+ print("====================")
101
+ print(f"\n Total calls made : {total_calls}")
102
+ print(f" Total Tokens used : {total_tokens}")
103
+ print(f" Total cost: ${total_cost:.6f}")
104
+ print("====================")
105
+
106
+
107
+
108
+
@@ -0,0 +1,33 @@
1
+ import os
2
+ import json
3
+ Pricing = {
4
+ "gpt-oss-20B (low)": 0.0675,
5
+ "gpt-oss-20B (high)": 1.25,
6
+ "Gemini 3.1 Pro Preview": 1.15,
7
+ "GPT-5.4 (xhigh)": 12.50,
8
+ "GPT-5.3 Codex (xhigh)": 11.50,
9
+ "Claude Opus 4.6 (max)": 11.25,
10
+ "Mercury 2": 0.04,
11
+ "Qwen3.5 0.8B": 0.02,
12
+ "Granite 4.0 H Small": 0.03,
13
+ "Granite 3.3 8B": 0.04,
14
+ "DeepSeek R1 Distill Qwen 32B": 0.09,
15
+ "NVIDIA Nemotron 3 Nano": 0.03,
16
+ "Gemma 3n E4B": 0.03,
17
+ "Qwen3.5 2B": 0.04,
18
+ "Llama 4 Scout": 0.05,
19
+ "Grok 4.20 Beta 0309": 0.45,
20
+ "Gemini 2.0 Pro Experimental": 1.10,
21
+ "llama-3.1-8b-instant": 0.05
22
+ }
23
+
24
+ user_home = os.path.expanduser('~')
25
+ config_dirc = os.path.join(user_home , '.tracellm')
26
+ personal_dict = os.path.join(config_dirc , 'pricing.json')
27
+ def load_pricing():
28
+ if os.path.exists(personal_dict):
29
+ with open(personal_dict , 'r') as f:
30
+ return json.load(f)
31
+ else:
32
+ return Pricing
33
+
@@ -2,6 +2,7 @@ import json
2
2
  import time
3
3
  from datetime import datetime
4
4
  import os
5
+
5
6
  TRACE_FILE = os.path.join(os.path.dirname(__file__), '..', 'trace.txt')
6
7
 
7
8
  class Tracer:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tracellm
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Local-first observability for LLM applications
5
5
  Requires-Python: >=3.9
6
6
  Description-Content-Type: text/markdown
@@ -32,46 +32,59 @@ ask(
32
32
  model="llama-3.1-8b-instant",
33
33
  messages=[{"role": "user", "content": "Explain black holes in one line"}]
34
34
  )
35
- ...
36
35
  ```
37
- Output on query:
38
- ```
39
- --- Trace ---
40
- Model: llama-3.1-8b-instant
41
- Prompt: Explain black holes in one line
42
- Response: A black hole is a region where gravity...
43
- Tokens: 43
44
- Latency: 0.847
45
- Status: success
46
- Timestamp: 2026-04-03 19:46:27
47
- -------------
48
- ...
49
- ```
50
- That's it. Every call is traced automatically.
51
36
 
52
- ## Query traces from terminal
37
+ Every call is traced automatically. No try/except. No setup.
38
+
39
+ ## Query traces
53
40
  ```bash
54
41
  python -m tracellm.cli --Status failed
55
42
  python -m tracellm.cli --Latency 2.0
56
43
  python -m tracellm.cli --Model llama-3.1-8b-instant
57
44
  python -m tracellm.cli --Status failed --Latency 1.5
58
- ...
45
+ python -m tracellm.cli --Time "2026-04-03"
59
46
  ```
60
47
 
61
- ## What gets captured
48
+ ## Cost tracking
49
+ ```bash
50
+ # cost per trace
51
+ python -m tracellm.cli --Cost
52
+
53
+ # full summary by model
54
+ python -m tracellm.cli --Cost Summary
55
+ ```
62
56
 
57
+ Output:
58
+ === Cost Summary ===
59
+ llama-3.1-8b-instant
60
+ Calls : 8
61
+ Tokens : 405
62
+ Cost : $0.000020
63
+
64
+ Total calls made : 8
65
+ Total tokens used: 405
66
+ Total cost : $0.000020
67
+
68
+ ## What gets captured
63
69
  - Model, prompt, response
64
70
  - Tokens used, latency, finish reason
65
71
  - Error type and message on failures
66
72
  - Timestamp for every call
67
73
 
68
- ## Limitations
74
+ ## Pricing
75
+ Default pricing is bundled. To override, create `~/.tracellm/pricing.json`:
76
+ ```json
77
+ {
78
+ "my-custom-model": 0.05
79
+ }
80
+ ```
81
+ Values are per million tokens.
69
82
 
70
- Storage is append-only JSON lines. Latency query supports >=
71
- for latency, exact match for everything else. Early days.
83
+ ## Limitations
84
+ Storage is append-only JSON lines. Latency filter supports `>=`,
85
+ exact match for everything else. Early days.
72
86
 
73
87
  ## Roadmap
74
-
75
88
  - Binary storage for faster querying at scale
76
- - Cost calculation per model
89
+ - Async tracing support
77
90
  - Terminal dashboard
@@ -3,6 +3,7 @@ pyproject.toml
3
3
  tracellm/__init__.py
4
4
  tracellm/cli.py
5
5
  tracellm/decorator.py
6
+ tracellm/pricing.py
6
7
  tracellm/tracer.py
7
8
  tracellm.egg-info/PKG-INFO
8
9
  tracellm.egg-info/SOURCES.txt
@@ -1,2 +0,0 @@
1
- from .tracer import Tracer
2
- from .decorator import trace
@@ -1,45 +0,0 @@
1
- import argparse
2
- import json
3
- import os
4
- from datetime import datetime
5
- TRACE_FILE = os.path.join(os.path.dirname(__file__), '..', 'trace.txt')
6
-
7
- parser = argparse.ArgumentParser()
8
- parser.add_argument("-S" , "--Status" , help = "Based on what status do filter" , choices = ["success" , "failed"])
9
- parser.add_argument("-L" , "--Latency" , help = "Based on what latency do filter")
10
- parser.add_argument("-M" , "--Model" , help = "Based on what model do filter")
11
- parser.add_argument("-E" , "--Error" , help = "Based on what type of Error do filter")
12
- parser.add_argument("-T" , "--Time" , help = "Based on what time do filter")
13
- args = parser.parse_args()
14
-
15
- def parse_time_input(s):
16
- for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d"):
17
- try:
18
- return datetime.strptime(s, fmt)
19
- except ValueError:
20
- continue
21
- raise ValueError(f"Time format not recognised: {s}")
22
- conditions = []
23
- if args.Status:
24
- conditions.append(lambda t : t.get('Status') == args.Status)
25
- if args.Latency:
26
- conditions.append(lambda t : t.get('Latency') >= float(args.Latency))
27
- if args.Model:
28
- conditions.append(lambda t : t.get('Model') == args.Model)
29
- if args.Error:
30
- conditions.append(lambda t : t.get('Error Type' , None) == args.Error)
31
- if args.Time:
32
- query_time = parse_time_input(args.Time)
33
- conditions.append(
34
- lambda t, qt=query_time: datetime.strptime(
35
- t.get('Timestamp', ''), "%Y-%m-%d %H:%M:%S"
36
- ) >= qt
37
- )
38
- with open(TRACE_FILE , 'r') as trace_file:
39
- for line in trace_file:
40
- trace = json.loads(line)
41
- if all(condition(trace) for condition in conditions):
42
- print("\n--- Trace ---")
43
- for key, value in trace.items():
44
- print(f" {key}: {value}")
45
- print("-------------")
File without changes
File without changes