tsave 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ from .client import TokenSaverClient
2
+
3
+ __all__ = ["TokenSaverClient"]
token_saver/cli.py ADDED
@@ -0,0 +1,32 @@
1
+ """CLI entry point: tsave scan <file.py>"""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ from .core.static_analyzer import scan_file
7
+
8
+
9
+ def main():
10
+ if len(sys.argv) < 3 or sys.argv[1] != "scan":
11
+ print("Usage: tsave scan <file.py> [file2.py ...]")
12
+ sys.exit(1)
13
+
14
+ files = sys.argv[2:]
15
+ total_findings = 0
16
+
17
+ for f in files:
18
+ p = Path(f)
19
+ if not p.exists():
20
+ print(f"tsave: {f} -- file not found", file=sys.stderr)
21
+ continue
22
+ report = scan_file(p)
23
+ print(report.format())
24
+ total_findings += len(report.findings)
25
+ if len(files) > 1:
26
+ print()
27
+
28
+ sys.exit(1 if total_findings > 0 else 0)
29
+
30
+
31
+ if __name__ == "__main__":
32
+ main()
token_saver/client.py ADDED
@@ -0,0 +1,160 @@
1
+ """TokenSaverClient — drop-in replacement for anthropic.Anthropic with built-in cost tracking."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+
7
+ import anthropic
8
+
9
+ from .core.tokenizer import (
10
+ PRICING,
11
+ CostEstimate,
12
+ TokenCount,
13
+ count_tokens,
14
+ estimate_cost,
15
+ monthly_projection,
16
+ )
17
+ from .core.analyzer import AnalysisReport, analyze
18
+ from .core.compressor import CompressedResult, compress
19
+
20
+
21
+ @dataclass
22
+ class UsageRecord:
23
+ model: str
24
+ input_tokens: int
25
+ output_tokens: int
26
+ cache_read_tokens: int = 0
27
+ cache_creation_tokens: int = 0
28
+
29
+ @property
30
+ def input_cost(self) -> float:
31
+ rate_in, _ = PRICING.get(self.model, (3.00, 15.00))
32
+ return self.input_tokens * rate_in / 1_000_000
33
+
34
+ @property
35
+ def output_cost(self) -> float:
36
+ _, rate_out = PRICING.get(self.model, (3.00, 15.00))
37
+ return self.output_tokens * rate_out / 1_000_000
38
+
39
+ @property
40
+ def total_cost(self) -> float:
41
+ return self.input_cost + self.output_cost
42
+
43
+
44
+ class TokenSaverClient:
45
+ """Wraps anthropic.Anthropic with token counting, cost tracking, analysis, and compression."""
46
+
47
+ def __init__(self, **kwargs):
48
+ self._client = anthropic.Anthropic(**kwargs)
49
+ self._history: list[UsageRecord] = []
50
+
51
+ @property
52
+ def raw(self) -> anthropic.Anthropic:
53
+ return self._client
54
+
55
+ @property
56
+ def history(self) -> list[UsageRecord]:
57
+ return list(self._history)
58
+
59
+ @property
60
+ def total_cost(self) -> float:
61
+ return sum(r.total_cost for r in self._history)
62
+
63
+ @property
64
+ def total_input_tokens(self) -> int:
65
+ return sum(r.input_tokens for r in self._history)
66
+
67
+ @property
68
+ def total_output_tokens(self) -> int:
69
+ return sum(r.output_tokens for r in self._history)
70
+
71
+ def create(self, **kwargs) -> anthropic.types.Message:
72
+ response = self._client.messages.create(**kwargs)
73
+ usage = response.usage
74
+ record = UsageRecord(
75
+ model=kwargs.get("model", response.model),
76
+ input_tokens=usage.input_tokens,
77
+ output_tokens=usage.output_tokens,
78
+ cache_read_tokens=getattr(usage, "cache_read_input_tokens", 0) or 0,
79
+ cache_creation_tokens=getattr(usage, "cache_creation_input_tokens", 0) or 0,
80
+ )
81
+ self._history.append(record)
82
+ return response
83
+
84
+ def count_tokens(
85
+ self,
86
+ *,
87
+ model: str,
88
+ messages: list[dict],
89
+ system: str | list[dict] | None = None,
90
+ tools: list[dict] | None = None,
91
+ ) -> TokenCount:
92
+ return count_tokens(self._client, model=model, messages=messages, system=system, tools=tools)
93
+
94
+ def estimate_cost(
95
+ self,
96
+ *,
97
+ model: str,
98
+ messages: list[dict],
99
+ estimated_output_tokens: int = 1000,
100
+ system: str | list[dict] | None = None,
101
+ tools: list[dict] | None = None,
102
+ ) -> CostEstimate:
103
+ return estimate_cost(
104
+ self._client,
105
+ model=model,
106
+ messages=messages,
107
+ estimated_output_tokens=estimated_output_tokens,
108
+ system=system,
109
+ tools=tools,
110
+ )
111
+
112
+ def analyze(
113
+ self,
114
+ *,
115
+ model: str,
116
+ messages: list[dict],
117
+ system: str | list[dict] | None = None,
118
+ tools: list[dict] | None = None,
119
+ ) -> AnalysisReport:
120
+ return analyze(self._client, model=model, messages=messages, system=system, tools=tools)
121
+
122
+ def compress(
123
+ self,
124
+ *,
125
+ model: str,
126
+ messages: list[dict],
127
+ target_reduction: float = 0.5,
128
+ query: str | None = None,
129
+ keep_last_n: int = 4,
130
+ ) -> CompressedResult:
131
+ return compress(
132
+ self._client,
133
+ model=model,
134
+ messages=messages,
135
+ target_reduction=target_reduction,
136
+ query=query,
137
+ keep_last_n=keep_last_n,
138
+ )
139
+
140
+ def monthly_projection(self, requests_per_day: int, days: int = 30):
141
+ if not self._history:
142
+ return monthly_projection(0.0, requests_per_day, days)
143
+ avg_cost = self.total_cost / len(self._history)
144
+ return monthly_projection(avg_cost, requests_per_day, days)
145
+
146
+ def usage_summary(self) -> str:
147
+ n = len(self._history)
148
+ if n == 0:
149
+ return "No requests tracked yet."
150
+ lines = [
151
+ f"=== Usage Summary ({n} requests) ===",
152
+ f"Total input tokens: {self.total_input_tokens:,}",
153
+ f"Total output tokens: {self.total_output_tokens:,}",
154
+ f"Total cost: ${self.total_cost:.4f}",
155
+ f"Avg cost/request: ${self.total_cost / n:.4f}",
156
+ ]
157
+ models_used = set(r.model for r in self._history)
158
+ if len(models_used) > 1:
159
+ lines.append(f"Models used: {', '.join(sorted(models_used))}")
160
+ return "\n".join(lines)
File without changes
@@ -0,0 +1,167 @@
1
+ """Pre-send prescriptive analysis with optimization suggestions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+
7
+ import anthropic
8
+
9
+ from .tokenizer import PRICING, count_tokens
10
+
11
+
12
+ @dataclass
13
+ class Suggestion:
14
+ category: str
15
+ message: str
16
+ estimated_saving_pct: float = 0.0
17
+
18
+
19
+ @dataclass
20
+ class AnalysisReport:
21
+ model: str
22
+ input_tokens: int
23
+ suggestions: list[Suggestion] = field(default_factory=list)
24
+ alternative_models: list[dict] = field(default_factory=list)
25
+
26
+ @property
27
+ def input_cost(self) -> float:
28
+ rate_in, _ = PRICING.get(self.model, (3.00, 15.00))
29
+ return self.input_tokens * rate_in / 1_000_000
30
+
31
+ @property
32
+ def potential_savings_pct(self) -> float:
33
+ if not self.suggestions:
34
+ return 0.0
35
+ return max(s.estimated_saving_pct for s in self.suggestions)
36
+
37
+ def format(self) -> str:
38
+ lines = [
39
+ f"=== Analysis Report ===",
40
+ f"Model: {self.model}",
41
+ f"Input tokens: {self.input_tokens:,}",
42
+ f"Estimated input cost: ${self.input_cost:.4f}",
43
+ ]
44
+ if self.suggestions:
45
+ lines.append(f"\nSuggestions ({len(self.suggestions)}):")
46
+ for i, s in enumerate(self.suggestions, 1):
47
+ saving = f" (~{s.estimated_saving_pct:.0f}% saving)" if s.estimated_saving_pct else ""
48
+ lines.append(f" {i}. [{s.category}] {s.message}{saving}")
49
+ else:
50
+ lines.append("\nNo optimization suggestions -- looks good!")
51
+
52
+ if self.alternative_models:
53
+ lines.append("\nAlternative models:")
54
+ for alt in self.alternative_models:
55
+ lines.append(f" - {alt['model']}: ${alt['cost']:.4f} (save ${alt['saving']:.4f})")
56
+
57
+ return "\n".join(lines)
58
+
59
+
60
+ def _check_message_length(messages: list[dict]) -> list[Suggestion]:
61
+ suggestions = []
62
+ for i, msg in enumerate(messages):
63
+ content = msg.get("content", "")
64
+ if isinstance(content, str) and len(content) > 50_000:
65
+ suggestions.append(Suggestion(
66
+ category="large-message",
67
+ message=f"Message {i} has {len(content):,} chars — consider compressing or chunking",
68
+ estimated_saving_pct=30.0,
69
+ ))
70
+ return suggestions
71
+
72
+
73
+ def _check_system_prompt(system: str | list[dict] | None) -> list[Suggestion]:
74
+ suggestions = []
75
+ if system is None:
76
+ return suggestions
77
+ text = system if isinstance(system, str) else " ".join(
78
+ b.get("text", "") for b in system if isinstance(b, dict)
79
+ )
80
+ if len(text) > 10_000:
81
+ suggestions.append(Suggestion(
82
+ category="large-system-prompt",
83
+ message=f"System prompt is {len(text):,} chars — consider trimming or using caching",
84
+ estimated_saving_pct=20.0,
85
+ ))
86
+ return suggestions
87
+
88
+
89
+ def _check_redundant_turns(messages: list[dict]) -> list[Suggestion]:
90
+ suggestions = []
91
+ if len(messages) > 20:
92
+ suggestions.append(Suggestion(
93
+ category="long-conversation",
94
+ message=f"Conversation has {len(messages)} turns — consider summarizing older turns",
95
+ estimated_saving_pct=40.0,
96
+ ))
97
+ return suggestions
98
+
99
+
100
+ def _check_caching(system: str | list[dict] | None, tools: list[dict] | None) -> list[Suggestion]:
101
+ suggestions = []
102
+ has_cache_control = False
103
+ if isinstance(system, list):
104
+ for block in system:
105
+ if isinstance(block, dict) and "cache_control" in block:
106
+ has_cache_control = True
107
+ break
108
+ if tools:
109
+ for tool in tools:
110
+ if isinstance(tool, dict) and "cache_control" in tool:
111
+ has_cache_control = True
112
+ break
113
+
114
+ sys_text = ""
115
+ if isinstance(system, str):
116
+ sys_text = system
117
+ elif isinstance(system, list):
118
+ sys_text = " ".join(b.get("text", "") for b in system if isinstance(b, dict))
119
+
120
+ if not has_cache_control and (len(sys_text) > 2048 or (tools and len(tools) > 3)):
121
+ suggestions.append(Suggestion(
122
+ category="no-caching",
123
+ message="Large system prompt or many tools without cache_control — enable prompt caching for 90% input cost reduction on cache hits",
124
+ estimated_saving_pct=50.0,
125
+ ))
126
+ return suggestions
127
+
128
+
129
+ def _find_cheaper_models(model: str, input_tokens: int) -> list[dict]:
130
+ current_rate, _ = PRICING.get(model, (3.00, 15.00))
131
+ current_cost = input_tokens * current_rate / 1_000_000
132
+ alternatives = []
133
+ for alt_model, (alt_rate, _) in sorted(PRICING.items(), key=lambda x: x[1][0]):
134
+ if alt_rate < current_rate and alt_model != model:
135
+ alt_cost = input_tokens * alt_rate / 1_000_000
136
+ alternatives.append({
137
+ "model": alt_model,
138
+ "cost": alt_cost,
139
+ "saving": current_cost - alt_cost,
140
+ })
141
+ return alternatives
142
+
143
+
144
+ def analyze(
145
+ client: anthropic.Anthropic,
146
+ *,
147
+ model: str,
148
+ messages: list[dict],
149
+ system: str | list[dict] | None = None,
150
+ tools: list[dict] | None = None,
151
+ ) -> AnalysisReport:
152
+ tc = count_tokens(client, model=model, messages=messages, system=system, tools=tools)
153
+
154
+ suggestions: list[Suggestion] = []
155
+ suggestions.extend(_check_message_length(messages))
156
+ suggestions.extend(_check_system_prompt(system))
157
+ suggestions.extend(_check_redundant_turns(messages))
158
+ suggestions.extend(_check_caching(system, tools))
159
+
160
+ alternatives = _find_cheaper_models(model, tc.input_tokens)
161
+
162
+ return AnalysisReport(
163
+ model=model,
164
+ input_tokens=tc.input_tokens,
165
+ suggestions=suggestions,
166
+ alternative_models=alternatives,
167
+ )
@@ -0,0 +1,184 @@
1
+ """Semantic compressor with relevance scoring for conversation history."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import anthropic
8
+
9
+
10
+ @dataclass
11
+ class CompressedResult:
12
+ original_messages: list[dict]
13
+ compressed_messages: list[dict]
14
+ original_tokens: int
15
+ compressed_tokens: int
16
+
17
+ @property
18
+ def reduction_pct(self) -> float:
19
+ if self.original_tokens == 0:
20
+ return 0.0
21
+ return (1 - self.compressed_tokens / self.original_tokens) * 100
22
+
23
+ def format(self) -> str:
24
+ return (
25
+ f"Original: {self.original_tokens:,} tokens ({len(self.original_messages)} messages)\n"
26
+ f"Compressed: {self.compressed_tokens:,} tokens ({len(self.compressed_messages)} messages)\n"
27
+ f"Reduction: {self.reduction_pct:.1f}%"
28
+ )
29
+
30
+
31
+ def _score_message_relevance(message: dict, query: str | None) -> float:
32
+ content = message.get("content", "")
33
+ if isinstance(content, list):
34
+ content = " ".join(
35
+ block.get("text", "") for block in content
36
+ if isinstance(block, dict) and block.get("type") == "text"
37
+ )
38
+ if not isinstance(content, str):
39
+ return 1.0
40
+
41
+ score = 0.5
42
+
43
+ role = message.get("role", "")
44
+ if role == "assistant":
45
+ score += 0.1
46
+ if role == "system":
47
+ return 1.0
48
+
49
+ has_tool_use = False
50
+ raw_content = message.get("content", "")
51
+ if isinstance(raw_content, list):
52
+ for block in raw_content:
53
+ if isinstance(block, dict) and block.get("type") in ("tool_use", "tool_result"):
54
+ has_tool_use = True
55
+ break
56
+ if has_tool_use:
57
+ score += 0.2
58
+
59
+ if query:
60
+ query_words = set(query.lower().split())
61
+ content_words = set(content.lower().split())
62
+ overlap = len(query_words & content_words)
63
+ if query_words:
64
+ score += 0.3 * (overlap / len(query_words))
65
+
66
+ return min(score, 1.0)
67
+
68
+
69
+ def _summarize_messages(
70
+ client: anthropic.Anthropic,
71
+ messages_to_summarize: list[dict],
72
+ model: str,
73
+ ) -> str:
74
+ conversation_text = []
75
+ for msg in messages_to_summarize:
76
+ role = msg.get("role", "unknown")
77
+ content = msg.get("content", "")
78
+ if isinstance(content, list):
79
+ content = " ".join(
80
+ block.get("text", "") for block in content
81
+ if isinstance(block, dict) and block.get("type") == "text"
82
+ )
83
+ if isinstance(content, str) and content.strip():
84
+ conversation_text.append(f"[{role}]: {content}")
85
+
86
+ if not conversation_text:
87
+ return ""
88
+
89
+ joined = "\n".join(conversation_text)
90
+
91
+ resp = client.messages.create(
92
+ model=model,
93
+ max_tokens=512,
94
+ messages=[{
95
+ "role": "user",
96
+ "content": (
97
+ "Compress this conversation into 2-3 sentences max. "
98
+ "Keep only facts, decisions, and key terms. No filler.\n\n"
99
+ f"{joined}"
100
+ ),
101
+ }],
102
+ )
103
+ return resp.content[0].text
104
+
105
+
106
+ def compress(
107
+ client: anthropic.Anthropic,
108
+ *,
109
+ model: str,
110
+ messages: list[dict],
111
+ target_reduction: float = 0.5,
112
+ query: str | None = None,
113
+ keep_last_n: int = 4,
114
+ ) -> CompressedResult:
115
+ if len(messages) <= keep_last_n:
116
+ tc = client.messages.count_tokens(model=model, messages=messages)
117
+ return CompressedResult(
118
+ original_messages=messages,
119
+ compressed_messages=list(messages),
120
+ original_tokens=tc.input_tokens,
121
+ compressed_tokens=tc.input_tokens,
122
+ )
123
+
124
+ original_tc = client.messages.count_tokens(model=model, messages=messages)
125
+
126
+ protected = messages[-keep_last_n:]
127
+ candidates = messages[:-keep_last_n]
128
+
129
+ if not candidates:
130
+ compressed_tc = client.messages.count_tokens(model=model, messages=protected)
131
+ return CompressedResult(
132
+ original_messages=messages,
133
+ compressed_messages=list(protected),
134
+ original_tokens=original_tc.input_tokens,
135
+ compressed_tokens=compressed_tc.input_tokens,
136
+ )
137
+
138
+ summary = _summarize_messages(client, candidates, model)
139
+
140
+ compressed_messages = []
141
+ if summary:
142
+ compressed_messages.append({
143
+ "role": "user",
144
+ "content": f"[Prior context] {summary}",
145
+ })
146
+ compressed_messages.append({
147
+ "role": "assistant",
148
+ "content": "Understood.",
149
+ })
150
+
151
+ compressed_messages.extend(protected)
152
+
153
+ if not compressed_messages:
154
+ compressed_messages = list(protected)
155
+
156
+ if compressed_messages and compressed_messages[0].get("role") != "user":
157
+ compressed_messages.insert(0, {
158
+ "role": "user",
159
+ "content": "[Conversation continues from earlier context]",
160
+ })
161
+
162
+ final = []
163
+ prev_role = None
164
+ for msg in compressed_messages:
165
+ role = msg.get("role")
166
+ if role == prev_role and role in ("user", "assistant"):
167
+ existing = final[-1].get("content", "")
168
+ new_content = msg.get("content", "")
169
+ if isinstance(existing, str) and isinstance(new_content, str):
170
+ final[-1] = {**final[-1], "content": f"{existing}\n\n{new_content}"}
171
+ else:
172
+ final.append(msg)
173
+ else:
174
+ final.append(msg)
175
+ prev_role = role
176
+
177
+ compressed_tc = client.messages.count_tokens(model=model, messages=final)
178
+
179
+ return CompressedResult(
180
+ original_messages=messages,
181
+ compressed_messages=final,
182
+ original_tokens=original_tc.input_tokens,
183
+ compressed_tokens=compressed_tc.input_tokens,
184
+ )
@@ -0,0 +1,273 @@
1
+ """Static analyzer: scans Python source for token-wasting patterns before execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ import textwrap
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+
10
+
11
+ @dataclass
12
+ class Finding:
13
+ file: str
14
+ line: int
15
+ rule: str
16
+ message: str
17
+ estimated_waste_tokens: int
18
+ fix: str
19
+
20
+ def format(self) -> str:
21
+ return (
22
+ f" {self.file}:{self.line} [{self.rule}]\n"
23
+ f" {self.message}\n"
24
+ f" ~{self.estimated_waste_tokens:,} tokens wasted per call\n"
25
+ f" Fix:\n"
26
+ + textwrap.indent(self.fix, " ")
27
+ )
28
+
29
+
30
+ @dataclass
31
+ class ScanReport:
32
+ file: str
33
+ findings: list[Finding] = field(default_factory=list)
34
+
35
+ @property
36
+ def total_estimated_waste(self) -> int:
37
+ return sum(f.estimated_waste_tokens for f in self.findings)
38
+
39
+ def format(self) -> str:
40
+ if not self.findings:
41
+ return f"tsave: {self.file} -- no issues found"
42
+ lines = [f"tsave: {self.file} -- {len(self.findings)} issue(s)\n"]
43
+ for f in self.findings:
44
+ lines.append(f.format())
45
+ lines.append("")
46
+ lines.append(f"Total estimated waste: ~{self.total_estimated_waste:,} tokens/call")
47
+ return "\n".join(lines)
48
+
49
+
50
+ _API_CALL_ATTRS = {
51
+ "create", "stream", "count_tokens",
52
+ }
53
+
54
+ _EXPENSIVE_MODELS = {"claude-opus-4-8", "claude-opus-4-7", "claude-opus-4-6", "claude-fable-5"}
55
+
56
+
57
+ def _is_api_call(node: ast.Call) -> bool:
58
+ if isinstance(node.func, ast.Attribute) and node.func.attr in _API_CALL_ATTRS:
59
+ return True
60
+ return False
61
+
62
+
63
+ def _get_string_value(node: ast.expr) -> str | None:
64
+ if isinstance(node, ast.Constant) and isinstance(node.value, str):
65
+ return node.value
66
+ return None
67
+
68
+
69
+ def _get_keyword(call: ast.Call, name: str) -> ast.keyword | None:
70
+ for kw in call.keywords:
71
+ if kw.arg == name:
72
+ return kw
73
+ return None
74
+
75
+
76
+ class _Visitor(ast.NodeVisitor):
77
+ def __init__(self, filename: str, source_lines: list[str]):
78
+ self.filename = filename
79
+ self.source_lines = source_lines
80
+ self.findings: list[Finding] = []
81
+ self._loop_stack: list[ast.AST] = []
82
+ self._system_assignments: list[int] = []
83
+ self._seen_models: list[tuple[int, str]] = []
84
+
85
+ def _in_loop(self) -> bool:
86
+ return len(self._loop_stack) > 0
87
+
88
+ def visit_For(self, node: ast.For):
89
+ self._loop_stack.append(node)
90
+ self.generic_visit(node)
91
+ self._loop_stack.pop()
92
+
93
+ def visit_While(self, node: ast.While):
94
+ self._loop_stack.append(node)
95
+ self.generic_visit(node)
96
+ self._loop_stack.pop()
97
+
98
+ def visit_Call(self, node: ast.Call):
99
+ if _is_api_call(node):
100
+ self._check_api_in_loop(node)
101
+ self._check_file_read_in_call(node)
102
+ self._check_model_routing(node)
103
+ self._check_no_caching(node)
104
+ self.generic_visit(node)
105
+
106
+ def visit_Assign(self, node: ast.Assign):
107
+ for target in node.targets:
108
+ if isinstance(target, ast.Name) and "system" in target.id.lower():
109
+ self._system_assignments.append(node.lineno)
110
+ self.generic_visit(node)
111
+
112
+ def _check_api_in_loop(self, node: ast.Call):
113
+ if not self._in_loop():
114
+ return
115
+ self.findings.append(Finding(
116
+ file=self.filename,
117
+ line=node.lineno,
118
+ rule="api-in-loop",
119
+ message="API call inside a loop — each iteration sends a full request",
120
+ estimated_waste_tokens=5000,
121
+ fix=textwrap.dedent("""\
122
+ # Batch messages or collect results, then make one call
123
+ results = []
124
+ for item in items:
125
+ results.append(item)
126
+ response = client.messages.create(
127
+ model="claude-haiku-4-5",
128
+ messages=[{"role": "user", "content": "\\n".join(results)}],
129
+ )"""),
130
+ ))
131
+
132
+ def _check_file_read_in_call(self, node: ast.Call):
133
+ subtree = ast.dump(node)
134
+ if "read" not in subtree.lower() and "open" not in subtree.lower():
135
+ return
136
+ for child in ast.walk(node):
137
+ if not isinstance(child, ast.Call):
138
+ continue
139
+ if child is node:
140
+ continue
141
+ if isinstance(child.func, ast.Attribute) and child.func.attr in ("read", "read_text"):
142
+ self.findings.append(Finding(
143
+ file=self.filename,
144
+ line=node.lineno,
145
+ rule="full-file-per-call",
146
+ message="Entire file read and passed in every API call -- chunk or summarize first",
147
+ estimated_waste_tokens=10000,
148
+ fix=textwrap.dedent("""\
149
+ # Read once, chunk, send only relevant parts
150
+ content = Path("doc.txt").read_text()
151
+ chunks = [content[i:i+4000] for i in range(0, len(content), 4000)]
152
+ response = client.messages.create(
153
+ messages=[{"role": "user", "content": chunks[0]}],
154
+ )"""),
155
+ ))
156
+ return
157
+
158
+ def _check_model_routing(self, node: ast.Call):
159
+ model_kw = _get_keyword(node, "model")
160
+ if model_kw is None:
161
+ return
162
+ model_val = _get_string_value(model_kw.value)
163
+ if model_val is None:
164
+ return
165
+ self._seen_models.append((node.lineno, model_val))
166
+ if model_val not in _EXPENSIVE_MODELS:
167
+ return
168
+ is_simple = True
169
+ msg_kw = _get_keyword(node, "messages")
170
+ if msg_kw and isinstance(msg_kw.value, ast.List) and len(msg_kw.value.elts) <= 2:
171
+ tools_kw = _get_keyword(node, "tools")
172
+ if tools_kw is None:
173
+ is_simple = True
174
+ if is_simple and model_val in _EXPENSIVE_MODELS:
175
+ self.findings.append(Finding(
176
+ file=self.filename,
177
+ line=node.lineno,
178
+ rule="no-model-routing",
179
+ message=f"Using {model_val} for a simple call — Haiku may suffice",
180
+ estimated_waste_tokens=0,
181
+ fix=textwrap.dedent(f"""\
182
+ # Route by complexity
183
+ model = "claude-haiku-4-5" # simple tasks
184
+ # model = "{model_val}" # complex tasks only"""),
185
+ ))
186
+
187
+ def _check_no_caching(self, node: ast.Call):
188
+ sys_kw = _get_keyword(node, "system")
189
+ if sys_kw is None:
190
+ return
191
+ has_cache = False
192
+ if isinstance(sys_kw.value, ast.List):
193
+ for elt in sys_kw.value.elts:
194
+ if isinstance(elt, ast.Dict):
195
+ for key in elt.keys:
196
+ if isinstance(key, ast.Constant) and key.value == "cache_control":
197
+ has_cache = True
198
+ if isinstance(sys_kw.value, (ast.Constant, ast.JoinedStr)):
199
+ pass
200
+
201
+ if not has_cache and self._in_loop():
202
+ self.findings.append(Finding(
203
+ file=self.filename,
204
+ line=node.lineno,
205
+ rule="uncached-system-prompt",
206
+ message="System prompt sent in loop without cache_control — reparsed every call",
207
+ estimated_waste_tokens=2000,
208
+ fix=textwrap.dedent("""\
209
+ system=[{
210
+ "type": "text",
211
+ "text": system_prompt,
212
+ "cache_control": {"type": "ephemeral"},
213
+ }]"""),
214
+ ))
215
+
216
+ def finalize(self):
217
+ if len(self._system_assignments) > 1:
218
+ self.findings.append(Finding(
219
+ file=self.filename,
220
+ line=self._system_assignments[-1],
221
+ rule="system-prompt-redefined",
222
+ message=f"System prompt assigned {len(self._system_assignments)} times — define once and cache",
223
+ estimated_waste_tokens=2000,
224
+ fix=textwrap.dedent("""\
225
+ # Define once at module level with cache_control
226
+ SYSTEM = [{"type": "text", "text": prompt, "cache_control": {"type": "ephemeral"}}]"""),
227
+ ))
228
+
229
+ self._check_uncompressed_history()
230
+
231
+ def _check_uncompressed_history(self):
232
+ for i, line in enumerate(self.source_lines, 1):
233
+ stripped = line.strip()
234
+ if ".append(" in stripped and "messages" in stripped.lower():
235
+ context_start = max(0, i - 5)
236
+ context = "\n".join(self.source_lines[context_start:i + 5])
237
+ if "compres" not in context.lower() and "summar" not in context.lower() and "compact" not in context.lower():
238
+ has_loop = any(
239
+ kw in context for kw in ("for ", "while ", "def chat", "def conversation")
240
+ )
241
+ if has_loop:
242
+ self.findings.append(Finding(
243
+ file=self.filename,
244
+ line=i,
245
+ rule="uncompressed-history",
246
+ message="Messages appended in a loop without compression — history grows unbounded",
247
+ estimated_waste_tokens=8000,
248
+ fix=textwrap.dedent("""\
249
+ # Compress history when it grows large
250
+ if len(messages) > 20:
251
+ result = client.compress(model=model, messages=messages)
252
+ messages = result.compressed_messages"""),
253
+ ))
254
+ return
255
+
256
+
257
+ def scan_source(source: str, filename: str = "<stdin>") -> ScanReport:
258
+ try:
259
+ tree = ast.parse(source)
260
+ except SyntaxError:
261
+ return ScanReport(file=filename)
262
+
263
+ lines = source.splitlines()
264
+ visitor = _Visitor(filename, lines)
265
+ visitor.visit(tree)
266
+ visitor.finalize()
267
+ return ScanReport(file=filename, findings=visitor.findings)
268
+
269
+
270
+ def scan_file(path: str | Path) -> ScanReport:
271
+ p = Path(path)
272
+ source = p.read_text(encoding="utf-8")
273
+ return scan_source(source, str(p))
@@ -0,0 +1,130 @@
1
+ """Token counting, cost estimation, and monthly projection using the Anthropic API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import anthropic
8
+
9
+ PRICING: dict[str, tuple[float, float]] = {
10
+ "claude-fable-5": (10.00, 50.00),
11
+ "claude-mythos-5": (10.00, 50.00),
12
+ "claude-opus-4-8": (5.00, 25.00),
13
+ "claude-opus-4-7": (5.00, 25.00),
14
+ "claude-opus-4-6": (5.00, 25.00),
15
+ "claude-sonnet-4-6": (3.00, 15.00),
16
+ "claude-haiku-4-5": (1.00, 5.00),
17
+ }
18
+
19
+ CACHE_READ_DISCOUNT = 0.1
20
+ CACHE_WRITE_MULTIPLIER = 1.25
21
+
22
+
23
+ @dataclass
24
+ class TokenCount:
25
+ input_tokens: int
26
+ model: str
27
+
28
+ @property
29
+ def input_cost(self) -> float:
30
+ rate_in, _ = PRICING.get(self.model, (3.00, 15.00))
31
+ return self.input_tokens * rate_in / 1_000_000
32
+
33
+ def format(self) -> str:
34
+ return f"{self.input_tokens:,} input tokens | est. ${self.input_cost:.4f}"
35
+
36
+
37
+ @dataclass
38
+ class CostEstimate:
39
+ input_tokens: int
40
+ estimated_output_tokens: int
41
+ model: str
42
+
43
+ @property
44
+ def input_cost(self) -> float:
45
+ rate_in, _ = PRICING.get(self.model, (3.00, 15.00))
46
+ return self.input_tokens * rate_in / 1_000_000
47
+
48
+ @property
49
+ def output_cost(self) -> float:
50
+ _, rate_out = PRICING.get(self.model, (3.00, 15.00))
51
+ return self.estimated_output_tokens * rate_out / 1_000_000
52
+
53
+ @property
54
+ def total_cost(self) -> float:
55
+ return self.input_cost + self.output_cost
56
+
57
+ def format(self) -> str:
58
+ return (
59
+ f"Input: {self.input_tokens:>10,} tokens ${self.input_cost:.4f}\n"
60
+ f"Output: {self.estimated_output_tokens:>10,} tokens ${self.output_cost:.4f} (est.)\n"
61
+ f"Total: {'':>10} ${self.total_cost:.4f}"
62
+ )
63
+
64
+
65
+ @dataclass
66
+ class MonthlyProjection:
67
+ cost_per_request: float
68
+ requests_per_day: int
69
+ days: int = 30
70
+
71
+ @property
72
+ def daily_cost(self) -> float:
73
+ return self.cost_per_request * self.requests_per_day
74
+
75
+ @property
76
+ def monthly_cost(self) -> float:
77
+ return self.daily_cost * self.days
78
+
79
+ def format(self) -> str:
80
+ return (
81
+ f"Per request: ${self.cost_per_request:.4f}\n"
82
+ f"Daily ({self.requests_per_day} req/day): ${self.daily_cost:.2f}\n"
83
+ f"Monthly ({self.days} days): ${self.monthly_cost:.2f}"
84
+ )
85
+
86
+
87
+ def count_tokens(
88
+ client: anthropic.Anthropic,
89
+ *,
90
+ model: str,
91
+ messages: list[dict],
92
+ system: str | list[dict] | None = None,
93
+ tools: list[dict] | None = None,
94
+ ) -> TokenCount:
95
+ kwargs: dict = {"model": model, "messages": messages}
96
+ if system is not None:
97
+ kwargs["system"] = system
98
+ if tools is not None:
99
+ kwargs["tools"] = tools
100
+ resp = client.messages.count_tokens(**kwargs)
101
+ return TokenCount(input_tokens=resp.input_tokens, model=model)
102
+
103
+
104
+ def estimate_cost(
105
+ client: anthropic.Anthropic,
106
+ *,
107
+ model: str,
108
+ messages: list[dict],
109
+ estimated_output_tokens: int = 1000,
110
+ system: str | list[dict] | None = None,
111
+ tools: list[dict] | None = None,
112
+ ) -> CostEstimate:
113
+ tc = count_tokens(client, model=model, messages=messages, system=system, tools=tools)
114
+ return CostEstimate(
115
+ input_tokens=tc.input_tokens,
116
+ estimated_output_tokens=estimated_output_tokens,
117
+ model=model,
118
+ )
119
+
120
+
121
+ def monthly_projection(
122
+ cost_per_request: float,
123
+ requests_per_day: int,
124
+ days: int = 30,
125
+ ) -> MonthlyProjection:
126
+ return MonthlyProjection(
127
+ cost_per_request=cost_per_request,
128
+ requests_per_day=requests_per_day,
129
+ days=days,
130
+ )
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: tsave
3
+ Version: 0.1.1
4
+ Summary: Drop-in Anthropic client wrapper with token counting, cost analysis, and semantic compression
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: anthropic>=0.40.0
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=8.0; extra == 'dev'
@@ -0,0 +1,13 @@
1
+ token_saver/__init__.py,sha256=xfH9FtGMJ6tEywgI96niLPtULSXTAf5JOnfp4n-AR6U,69
2
+ token_saver/cli.py,sha256=qR7N0EXka9KLyfQMuxcWmL5wJN7Ig4inIFQmDt6kgN0,718
3
+ token_saver/client.py,sha256=YdWSdT9y4juI0AHTAomFMQYMYc0DnMP7xDVlJ2XWc9Q,4894
4
+ token_saver/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ token_saver/core/analyzer.py,sha256=yLqCenRJL30pXJVOM2kYrGnEFApU-eaE1ZmaW_SeWek,5731
6
+ token_saver/core/compressor.py,sha256=slk8aVHITfU-Du6BamqSqyF1JDjXXpXZGue7_IqzSr8,5659
7
+ token_saver/core/static_analyzer.py,sha256=XFP0ZqCRYisTolR_yn46TdpEgUv5oIwfND3lqydhMPw,10289
8
+ token_saver/core/tokenizer.py,sha256=et2ROwTnFmAPSRRlYZ_LhU5WIY1Qzye7hu1zu3zXvdE,3595
9
+ tsave-0.1.1.dist-info/METADATA,sha256=qiAcFMNhhFW21h9gQcITPNPu-47bY2o2ABbqB1GcqLY,294
10
+ tsave-0.1.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
11
+ tsave-0.1.1.dist-info/entry_points.txt,sha256=VLggJfmMFeHKMlOxCSChaMX4YJkv-oIqtP6rZ4cym4I,47
12
+ tsave-0.1.1.dist-info/licenses/LICENSE,sha256=Ix-GirRZv0ZcXBD7Xzo8OipKG5xK3CXRBp6xnv14Ryw,1068
13
+ tsave-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ tsave = token_saver.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Remo Pulcini
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.