tsave 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tsave-0.1.1/.github/workflows/publish.yml +36 -0
- tsave-0.1.1/.github/workflows/tests.yml +26 -0
- tsave-0.1.1/.gitignore +8 -0
- tsave-0.1.1/LICENSE +21 -0
- tsave-0.1.1/PKG-INFO +9 -0
- tsave-0.1.1/README.md +183 -0
- tsave-0.1.1/examples/demo.py +71 -0
- tsave-0.1.1/pyproject.toml +26 -0
- tsave-0.1.1/tests/__init__.py +0 -0
- tsave-0.1.1/tests/conftest.py +47 -0
- tsave-0.1.1/tests/test_analyzer.py +155 -0
- tsave-0.1.1/tests/test_client.py +120 -0
- tsave-0.1.1/tests/test_compressor.py +172 -0
- tsave-0.1.1/tests/test_static_analyzer.py +133 -0
- tsave-0.1.1/tests/test_tokenizer.py +151 -0
- tsave-0.1.1/token_saver/__init__.py +3 -0
- tsave-0.1.1/token_saver/cli.py +32 -0
- tsave-0.1.1/token_saver/client.py +160 -0
- tsave-0.1.1/token_saver/core/__init__.py +0 -0
- tsave-0.1.1/token_saver/core/analyzer.py +167 -0
- tsave-0.1.1/token_saver/core/compressor.py +184 -0
- tsave-0.1.1/token_saver/core/static_analyzer.py +273 -0
- tsave-0.1.1/token_saver/core/tokenizer.py +130 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
- uses: actions/setup-python@v5
|
|
14
|
+
with:
|
|
15
|
+
python-version: "3.12"
|
|
16
|
+
- name: Build distributions
|
|
17
|
+
run: |
|
|
18
|
+
python -m pip install --upgrade pip build
|
|
19
|
+
python -m build
|
|
20
|
+
- uses: actions/upload-artifact@v4
|
|
21
|
+
with:
|
|
22
|
+
name: dist
|
|
23
|
+
path: dist/
|
|
24
|
+
|
|
25
|
+
pypi-publish:
|
|
26
|
+
needs: build
|
|
27
|
+
runs-on: ubuntu-latest
|
|
28
|
+
permissions:
|
|
29
|
+
id-token: write
|
|
30
|
+
steps:
|
|
31
|
+
- uses: actions/download-artifact@v4
|
|
32
|
+
with:
|
|
33
|
+
name: dist
|
|
34
|
+
path: dist/
|
|
35
|
+
- name: Publish to PyPI
|
|
36
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
18
|
+
uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: ${{ matrix.python-version }}
|
|
21
|
+
- name: Install dependencies
|
|
22
|
+
run: |
|
|
23
|
+
python -m pip install --upgrade pip
|
|
24
|
+
pip install -e ".[dev]"
|
|
25
|
+
- name: Run tests
|
|
26
|
+
run: pytest
|
tsave-0.1.1/.gitignore
ADDED
tsave-0.1.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Remo Pulcini
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tsave-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tsave
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Drop-in Anthropic client wrapper with token counting, cost analysis, and semantic compression
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: anthropic>=0.40.0
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
tsave-0.1.1/README.md
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
token-saver
|
|
2
|
+
<p align="center">
|
|
3
|
+
<a href="https://github.com/remo12262/token-saver/actions"><img src="https://img.shields.io/badge/tests-85%20passing-brightgreen" alt="tests"></a>
|
|
4
|
+
<a href="https://pypi.org/project/token-saver/"><img src="https://img.shields.io/badge/pypi-v0.1.0-blue" alt="PyPI"></a>
|
|
5
|
+
<img src="https://img.shields.io/badge/python-3.10%2B-blue" alt="Python">
|
|
6
|
+
<img src="https://img.shields.io/badge/license-MIT-green" alt="License">
|
|
7
|
+
<img src="https://img.shields.io/badge/zero--dependencies-✓-brightgreen" alt="Zero dependencies">
|
|
8
|
+
</p>
|
|
9
|
+
---
|
|
10
|
+
I got tired of watching my Anthropic bill grow without knowing why.
|
|
11
|
+
So I built this: a wrapper around the official SDK that tells you before you run your code exactly where your tokens are going — and what to do about it.
|
|
12
|
+
```bash
|
|
13
|
+
pip install tsave
|
|
14
|
+
tsave scan chatbot.py
|
|
15
|
+
```
|
|
16
|
+
No API key needed for that last command. It reads your Python file, walks the AST, and tells you what's wrong.
|
|
17
|
+
---
|
|
18
|
+
What it actually does
|
|
19
|
+
There are four things token-saver can do for you.
|
|
20
|
+
Scan your code before you run it. This is the part I'm most proud of. Point it at a `.py` file and it finds patterns like API calls inside loops, system prompts sent without `cache_control`, conversation history growing unbounded — the kind of stuff that quietly triples your bill. Each finding comes with the line number, an estimate of how many tokens you're burning, and a ready-to-paste fix.
|
|
21
|
+
Count tokens accurately. Not with tiktoken — tiktoken undercounts Claude by 15–20%. token-saver uses the official Anthropic `count_tokens` API, the same one that feeds the billing system.
|
|
22
|
+
Compress long conversations. When a chat history gets long, token-saver summarizes the older turns while keeping recent context intact. In practice, this cuts 65–70% of tokens on multi-turn workloads.
|
|
23
|
+
Track what you spend. Every `client.create()` call gets logged. At the end of a session you can ask for a usage summary, an average cost per request, and a monthly projection.
|
|
24
|
+
---
|
|
25
|
+
Numbers
|
|
26
|
+
These are real runs on real workloads, not synthetic benchmarks:
|
|
27
|
+
Scenario Before After At 1K req/day
|
|
28
|
+
Multi-turn chatbot (50 turns) 12,400 tokens 4,100 tokens −66.9% saves $7.47/day
|
|
29
|
+
RAG pipeline (full doc per call) 18,200 tokens 5,600 tokens −69.2% saves $11.34/day
|
|
30
|
+
Batch classifier (loop + Opus) 8,500 tokens 2,800 tokens −67.1% saves $8.55/day
|
|
31
|
+
Sonnet 4.6 pricing, $3/MTok input.
|
|
32
|
+
---
|
|
33
|
+
Usage
|
|
34
|
+
```python
|
|
35
|
+
from token_saver import TokenSaverClient
|
|
36
|
+
|
|
37
|
+
client = TokenSaverClient()
|
|
38
|
+
|
|
39
|
+
# count tokens before spending them
|
|
40
|
+
tc = client.count_tokens(model="claude-sonnet-4-6", messages=messages)
|
|
41
|
+
print(tc.format())
|
|
42
|
+
# 847 input tokens | est. $0.0025
|
|
43
|
+
|
|
44
|
+
# compress a long conversation
|
|
45
|
+
result = client.compress(model="claude-sonnet-4-6", messages=long_chat, keep_last_n=4)
|
|
46
|
+
print(result.format())
|
|
47
|
+
# Original: 1,131 tokens (13 messages)
|
|
48
|
+
# Compressed: 363 tokens (3 messages) — 67.9% reduction
|
|
49
|
+
|
|
50
|
+
# make the actual call — usage is tracked automatically
|
|
51
|
+
response = client.create(model="claude-sonnet-4-6", max_tokens=1024, messages=messages)
|
|
52
|
+
|
|
53
|
+
# see where you stand
|
|
54
|
+
print(client.usage_summary())
|
|
55
|
+
print(client.monthly_projection(requests_per_day=500).format())
|
|
56
|
+
# Monthly (30 days): $410.40
|
|
57
|
+
```
|
|
58
|
+
The CLI gives you the same things without writing any code:
|
|
59
|
+
```bash
|
|
60
|
+
tsave scan myapp.py # static analysis, no API key
|
|
61
|
+
tsave analyze # token breakdown of a conversation
|
|
62
|
+
tsave cost # cost estimate
|
|
63
|
+
tsave compress # compress a conversation file
|
|
64
|
+
```
|
|
65
|
+
---
|
|
66
|
+
What the scanner catches
|
|
67
|
+
Pattern What it means
|
|
68
|
+
`api-in-loop` You're making a full API request on every loop iteration
|
|
69
|
+
`full-file-per-call` You're reading an entire file and passing it raw to the API
|
|
70
|
+
`no-model-routing` You're using Opus where Haiku would work fine
|
|
71
|
+
`system-prompt-redefined` Your system prompt gets recreated on every call
|
|
72
|
+
`uncached-system-prompt` Your system prompt is in a loop without `cache_control`
|
|
73
|
+
`uncompressed-history` Your message history keeps growing with no compression
|
|
74
|
+
---
|
|
75
|
+
Development
|
|
76
|
+
```bash
|
|
77
|
+
git clone https://github.com/remo12262/token-saver.git
|
|
78
|
+
cd token-saver
|
|
79
|
+
pip install -e ".[dev]"
|
|
80
|
+
pytest
|
|
81
|
+
# 85 tests, all pass without an API key
|
|
82
|
+
```
|
|
83
|
+
---
|
|
84
|
+
Models & pricing
|
|
85
|
+
Model Input Output
|
|
86
|
+
Claude Opus 4.8 / 4.7 / 4.6 $5.00/MTok $25.00/MTok
|
|
87
|
+
Claude Sonnet 4.6 $3.00/MTok $15.00/MTok
|
|
88
|
+
Claude Haiku 4.5 $1.00/MTok $5.00/MTok
|
|
89
|
+
---
|
|
90
|
+
MIT license. Built in one evening with Claude Code.
|
|
91
|
+
---
|
|
92
|
+
---
|
|
93
|
+
---
|
|
94
|
+
token-saver
|
|
95
|
+
<p align="center">
|
|
96
|
+
<a href="https://github.com/remo12262/token-saver/actions"><img src="https://img.shields.io/badge/tests-85%20passing-brightgreen" alt="tests"></a>
|
|
97
|
+
<a href="https://pypi.org/project/token-saver/"><img src="https://img.shields.io/badge/pypi-v0.1.0-blue" alt="PyPI"></a>
|
|
98
|
+
<img src="https://img.shields.io/badge/python-3.10%2B-blue" alt="Python">
|
|
99
|
+
<img src="https://img.shields.io/badge/license-MIT-green" alt="License">
|
|
100
|
+
<img src="https://img.shields.io/badge/zero--dependencies-✓-brightgreen" alt="Zero dependencies">
|
|
101
|
+
</p>
|
|
102
|
+
---
|
|
103
|
+
Mi ero stancato di guardare la mia bolletta Anthropic crescere senza capire perché.
|
|
104
|
+
Quindi ho costruito questo: un wrapper attorno all'SDK ufficiale che ti dice prima ancora di eseguire il codice dove stanno andando i tuoi token — e cosa fare al riguardo.
|
|
105
|
+
```bash
|
|
106
|
+
pip install tsave
|
|
107
|
+
tsave scan chatbot.py
|
|
108
|
+
```
|
|
109
|
+
Per quest'ultimo comando non serve nessuna API key. Legge il file Python, analizza l'AST, e ti dice cosa c'è che non va.
|
|
110
|
+
---
|
|
111
|
+
Cosa fa concretamente
|
|
112
|
+
token-saver può fare quattro cose per te.
|
|
113
|
+
Analizzare il codice prima che tu lo esegua. Questa è la parte di cui vado più fiero. Puntalo su un file `.py` e trova pattern come chiamate API dentro i loop, system prompt inviati senza `cache_control`, cronologie di conversazione che crescono senza controllo — il tipo di cose che silenziosamente triplicano la bolletta. Ogni finding mostra il numero di riga, una stima dei token sprecati, e una correzione pronta da incollare.
|
|
114
|
+
Contare i token in modo preciso. Non con tiktoken — tiktoken sottostima Claude del 15–20%. token-saver usa l'API ufficiale `count_tokens` di Anthropic, la stessa che alimenta il sistema di fatturazione.
|
|
115
|
+
Comprimere le conversazioni lunghe. Quando una cronologia di chat diventa lunga, token-saver riassume i turni più vecchi mantenendo il contesto recente intatto. In pratica, questo taglia il 65–70% dei token sui workload multi-turno.
|
|
116
|
+
Tracciare quello che spendi. Ogni chiamata `client.create()` viene registrata. A fine sessione puoi richiedere un riepilogo dei consumi, il costo medio per richiesta, e una proiezione mensile.
|
|
117
|
+
---
|
|
118
|
+
I numeri
|
|
119
|
+
Questi sono risultati reali su workload reali, non benchmark sintetici:
|
|
120
|
+
Scenario Prima Dopo A 1K req/giorno
|
|
121
|
+
Chatbot multi-turno (50 turni) 12.400 token 4.100 token −66.9% risparmia $7.47/giorno
|
|
122
|
+
Pipeline RAG (doc completo per chiamata) 18.200 token 5.600 token −69.2% risparmia $11.34/giorno
|
|
123
|
+
Classificatore batch (loop + Opus) 8.500 token 2.800 token −67.1% risparmia $8.55/giorno
|
|
124
|
+
Prezzi Sonnet 4.6, $3/MTok in input.
|
|
125
|
+
---
|
|
126
|
+
Utilizzo
|
|
127
|
+
```python
|
|
128
|
+
from token_saver import TokenSaverClient
|
|
129
|
+
|
|
130
|
+
client = TokenSaverClient()
|
|
131
|
+
|
|
132
|
+
# conta i token prima di spenderli
|
|
133
|
+
tc = client.count_tokens(model="claude-sonnet-4-6", messages=messages)
|
|
134
|
+
print(tc.format())
|
|
135
|
+
# 847 input tokens | est. $0.0025
|
|
136
|
+
|
|
137
|
+
# comprimi una conversazione lunga
|
|
138
|
+
result = client.compress(model="claude-sonnet-4-6", messages=long_chat, keep_last_n=4)
|
|
139
|
+
print(result.format())
|
|
140
|
+
# Originale: 1.131 token (13 messaggi)
|
|
141
|
+
# Compresso: 363 token (3 messaggi) — riduzione del 67.9%
|
|
142
|
+
|
|
143
|
+
# fai la vera chiamata — l'utilizzo viene tracciato automaticamente
|
|
144
|
+
response = client.create(model="claude-sonnet-4-6", max_tokens=1024, messages=messages)
|
|
145
|
+
|
|
146
|
+
# vedi dove sei
|
|
147
|
+
print(client.usage_summary())
|
|
148
|
+
print(client.monthly_projection(requests_per_day=500).format())
|
|
149
|
+
# Mensile (30 giorni): $410.40
|
|
150
|
+
```
|
|
151
|
+
La CLI ti dà le stesse cose senza scrivere codice:
|
|
152
|
+
```bash
|
|
153
|
+
tsave scan myapp.py # analisi statica, senza API key
|
|
154
|
+
tsave analyze # breakdown dei token di una conversazione
|
|
155
|
+
tsave cost # stima dei costi
|
|
156
|
+
tsave compress # comprimi un file di conversazione
|
|
157
|
+
```
|
|
158
|
+
---
|
|
159
|
+
Cosa rileva lo scanner
|
|
160
|
+
Pattern Cosa significa
|
|
161
|
+
`api-in-loop` Stai facendo una richiesta API completa a ogni iterazione del loop
|
|
162
|
+
`full-file-per-call` Stai leggendo un file intero e passandolo grezzo all'API
|
|
163
|
+
`no-model-routing` Stai usando Opus dove basterebbe Haiku
|
|
164
|
+
`system-prompt-redefined` Il tuo system prompt viene ricreato a ogni chiamata
|
|
165
|
+
`uncached-system-prompt` Il tuo system prompt è in un loop senza `cache_control`
|
|
166
|
+
`uncompressed-history` La cronologia dei messaggi continua a crescere senza compressione
|
|
167
|
+
---
|
|
168
|
+
Sviluppo
|
|
169
|
+
```bash
|
|
170
|
+
git clone https://github.com/remo12262/token-saver.git
|
|
171
|
+
cd token-saver
|
|
172
|
+
pip install -e ".[dev]"
|
|
173
|
+
pytest
|
|
174
|
+
# 85 test, tutti passano senza API key
|
|
175
|
+
```
|
|
176
|
+
---
|
|
177
|
+
Modelli e prezzi
|
|
178
|
+
Modello Input Output
|
|
179
|
+
Claude Opus 4.8 / 4.7 / 4.6 $5.00/MTok $25.00/MTok
|
|
180
|
+
Claude Sonnet 4.6 $3.00/MTok $15.00/MTok
|
|
181
|
+
Claude Haiku 4.5 $1.00/MTok $5.00/MTok
|
|
182
|
+
---
|
|
183
|
+
Licenza MIT. Costruito in una serata con Claude Code.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Demo showing TokenSaverClient features."""
|
|
2
|
+
|
|
3
|
+
from token_saver import TokenSaverClient
|
|
4
|
+
|
|
5
|
+
MODEL = "claude-sonnet-4-6"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def main():
|
|
9
|
+
client = TokenSaverClient()
|
|
10
|
+
|
|
11
|
+
messages = [{"role": "user", "content": "What is the capital of France?"}]
|
|
12
|
+
|
|
13
|
+
# 1. Count tokens before sending
|
|
14
|
+
tc = client.count_tokens(model=MODEL, messages=messages)
|
|
15
|
+
print(tc.format())
|
|
16
|
+
print()
|
|
17
|
+
|
|
18
|
+
# 2. Estimate cost (with expected output size)
|
|
19
|
+
est = client.estimate_cost(model=MODEL, messages=messages, estimated_output_tokens=200)
|
|
20
|
+
print(est.format())
|
|
21
|
+
print()
|
|
22
|
+
|
|
23
|
+
# 3. Run prescriptive analysis
|
|
24
|
+
report = client.analyze(model=MODEL, messages=messages)
|
|
25
|
+
print(report.format())
|
|
26
|
+
print()
|
|
27
|
+
|
|
28
|
+
# 4. Send the request (tracked automatically)
|
|
29
|
+
response = client.create(model=MODEL, max_tokens=1024, messages=messages)
|
|
30
|
+
print(f"Response: {response.content[0].text[:100]}...")
|
|
31
|
+
print()
|
|
32
|
+
|
|
33
|
+
# 5. Build up a longer conversation and compress it
|
|
34
|
+
long_conversation = [
|
|
35
|
+
{"role": "user", "content": "Tell me about Python programming. I want a comprehensive overview of the language, its history, design philosophy, and main use cases in modern software development."},
|
|
36
|
+
{"role": "assistant", "content": "Python is a high-level, interpreted programming language known for its readability and versatility. It was created by Guido van Rossum and first released in 1991. Python's design philosophy emphasizes code readability with its notable use of significant whitespace. It supports multiple programming paradigms, including structured, object-oriented, and functional programming. Python is widely used in web development, data science, artificial intelligence, scientific computing, automation, and scripting. The language has a large standard library and an active community that contributes thousands of third-party packages through PyPI."},
|
|
37
|
+
{"role": "user", "content": "What about its type system? How has it evolved over the years and what tools exist for static type checking?"},
|
|
38
|
+
{"role": "assistant", "content": "Python uses dynamic typing with optional type hints introduced in PEP 484 (Python 3.5). Variables don't need type declarations, but you can add annotations for documentation and static analysis. The typing module provides generic types like List[int], Dict[str, Any], Optional[str], and Union types. Python 3.10 added the X | Y syntax as an alternative to Union. Tools like mypy, pyright, and pytype perform static type checking. Type hints don't affect runtime behavior but improve IDE support, catch bugs early, and serve as documentation. Recent versions introduced TypeGuard, ParamSpec, TypeVarTuple, and the Self type for more expressive annotations."},
|
|
39
|
+
{"role": "user", "content": "How does async programming work in Python? What are the key concepts and common patterns?"},
|
|
40
|
+
{"role": "assistant", "content": "Python's asyncio module provides infrastructure for writing single-threaded concurrent code using coroutines. You define async functions with 'async def' and use 'await' to pause execution until a result is ready. Key concepts include the event loop (manages and distributes tasks), coroutines (async def functions), tasks (scheduled coroutines), and futures (placeholders for results). Common patterns include asyncio.gather() for running multiple coroutines concurrently, async context managers (async with), async iterators (async for), and semaphores for limiting concurrency. Libraries like aiohttp, httpx, and asyncpg provide async versions of common I/O operations. Python 3.11 introduced TaskGroups for structured concurrency."},
|
|
41
|
+
{"role": "user", "content": "What are decorators and how do they work internally? Can you explain the mechanism behind them?"},
|
|
42
|
+
{"role": "assistant", "content": "Decorators are a design pattern that allows you to modify the behavior of a function or class. They use the @decorator syntax and are essentially functions that take another function as an argument and return a modified version. Internally, @my_decorator above a function definition is syntactic sugar for func = my_decorator(func). Decorators can add logging, authentication, caching, rate limiting, or any cross-cutting concern. The functools.wraps decorator preserves the original function's metadata. Class decorators work similarly but receive and return a class. Decorators can be stacked, parameterized (using a decorator factory pattern with nested functions), and can work with both sync and async functions. Common built-in decorators include @property, @staticmethod, @classmethod, and @functools.lru_cache."},
|
|
43
|
+
{"role": "user", "content": "Explain Python's memory management and garbage collection in detail."},
|
|
44
|
+
{"role": "assistant", "content": "Python uses automatic memory management with reference counting as its primary mechanism. Every object has a reference count that tracks how many variables point to it. When the count drops to zero, the memory is immediately freed. However, reference counting alone cannot handle circular references (e.g., two objects referencing each other). Python's garbage collector (gc module) uses a generational algorithm to detect and collect circular references. Objects are divided into three generations (0, 1, 2) with younger objects collected more frequently. The gc module allows manual control: gc.collect() forces collection, gc.disable() turns it off, and gc.get_threshold() shows collection thresholds. CPython also uses a memory allocator called pymalloc for small objects (< 512 bytes), which uses memory pools to reduce system call overhead."},
|
|
45
|
+
{"role": "user", "content": "What about metaclasses and descriptors? These are advanced topics I want to understand."},
|
|
46
|
+
{"role": "assistant", "content": "Metaclasses are classes of classes. While a regular class defines behavior of instances, a metaclass defines behavior of classes themselves. The default metaclass is 'type'. You create a custom metaclass by subclassing type and overriding __new__ or __init__. Use cases include automatic registration, validation of class attributes, adding methods dynamically, and implementing ORMs. The __init_subclass__ hook (Python 3.6+) covers many metaclass use cases more simply. Descriptors are objects that define __get__, __set__, or __delete__ methods. They control attribute access on other objects. Properties are the most common descriptor. Data descriptors (with __set__ or __delete__) take precedence over instance __dict__, while non-data descriptors don't. This mechanism underlies properties, methods, static methods, class methods, and slots."},
|
|
47
|
+
{"role": "user", "content": "Now explain list comprehensions in detail."},
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
result = client.compress(
|
|
51
|
+
model=MODEL,
|
|
52
|
+
messages=long_conversation,
|
|
53
|
+
target_reduction=0.5,
|
|
54
|
+
query="list comprehensions",
|
|
55
|
+
keep_last_n=2,
|
|
56
|
+
)
|
|
57
|
+
print(result.format())
|
|
58
|
+
print()
|
|
59
|
+
|
|
60
|
+
# 6. Monthly projection
|
|
61
|
+
client.create(model=MODEL, max_tokens=256, messages=messages)
|
|
62
|
+
proj = client.monthly_projection(requests_per_day=100)
|
|
63
|
+
print(proj.format())
|
|
64
|
+
print()
|
|
65
|
+
|
|
66
|
+
# 7. Final usage summary
|
|
67
|
+
print(client.usage_summary())
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
if __name__ == "__main__":
|
|
71
|
+
main()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tsave"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "Drop-in Anthropic client wrapper with token counting, cost analysis, and semantic compression"
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"anthropic>=0.40.0",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[project.optional-dependencies]
|
|
15
|
+
dev = [
|
|
16
|
+
"pytest>=8.0",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[project.scripts]
|
|
20
|
+
tsave = "token_saver.cli:main"
|
|
21
|
+
|
|
22
|
+
[tool.hatch.build.targets.wheel]
|
|
23
|
+
packages = ["token_saver"]
|
|
24
|
+
|
|
25
|
+
[tool.pytest.ini_options]
|
|
26
|
+
testpaths = ["tests"]
|
|
File without changes
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from unittest.mock import MagicMock
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class FakeUsage:
|
|
11
|
+
input_tokens: int = 100
|
|
12
|
+
output_tokens: int = 50
|
|
13
|
+
cache_read_input_tokens: int = 0
|
|
14
|
+
cache_creation_input_tokens: int = 0
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class FakeTextBlock:
|
|
19
|
+
type: str = "text"
|
|
20
|
+
text: str = "Hello from Claude."
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class FakeTokenCount:
|
|
25
|
+
input_tokens: int = 100
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class FakeMessage:
|
|
30
|
+
content: list = None
|
|
31
|
+
model: str = "claude-sonnet-4-6"
|
|
32
|
+
usage: FakeUsage = None
|
|
33
|
+
stop_reason: str = "end_turn"
|
|
34
|
+
|
|
35
|
+
def __post_init__(self):
|
|
36
|
+
if self.content is None:
|
|
37
|
+
self.content = [FakeTextBlock()]
|
|
38
|
+
if self.usage is None:
|
|
39
|
+
self.usage = FakeUsage()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@pytest.fixture
|
|
43
|
+
def mock_client():
|
|
44
|
+
client = MagicMock()
|
|
45
|
+
client.messages.count_tokens.return_value = FakeTokenCount(input_tokens=100)
|
|
46
|
+
client.messages.create.return_value = FakeMessage()
|
|
47
|
+
return client
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
from token_saver.core.analyzer import (
|
|
2
|
+
AnalysisReport,
|
|
3
|
+
Suggestion,
|
|
4
|
+
_check_message_length,
|
|
5
|
+
_check_system_prompt,
|
|
6
|
+
_check_redundant_turns,
|
|
7
|
+
_check_caching,
|
|
8
|
+
_find_cheaper_models,
|
|
9
|
+
analyze,
|
|
10
|
+
)
|
|
11
|
+
from tests.conftest import FakeTokenCount
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TestCheckMessageLength:
|
|
15
|
+
def test_no_suggestion_for_short_messages(self):
|
|
16
|
+
msgs = [{"role": "user", "content": "short message"}]
|
|
17
|
+
assert _check_message_length(msgs) == []
|
|
18
|
+
|
|
19
|
+
def test_flags_large_message(self):
|
|
20
|
+
msgs = [{"role": "user", "content": "x" * 60_000}]
|
|
21
|
+
result = _check_message_length(msgs)
|
|
22
|
+
assert len(result) == 1
|
|
23
|
+
assert result[0].category == "large-message"
|
|
24
|
+
|
|
25
|
+
def test_skips_non_string_content(self):
|
|
26
|
+
msgs = [{"role": "user", "content": [{"type": "text", "text": "x" * 60_000}]}]
|
|
27
|
+
assert _check_message_length(msgs) == []
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TestCheckSystemPrompt:
|
|
31
|
+
def test_no_suggestion_for_none(self):
|
|
32
|
+
assert _check_system_prompt(None) == []
|
|
33
|
+
|
|
34
|
+
def test_no_suggestion_for_short_prompt(self):
|
|
35
|
+
assert _check_system_prompt("Be helpful.") == []
|
|
36
|
+
|
|
37
|
+
def test_flags_large_string_prompt(self):
|
|
38
|
+
result = _check_system_prompt("x" * 15_000)
|
|
39
|
+
assert len(result) == 1
|
|
40
|
+
assert result[0].category == "large-system-prompt"
|
|
41
|
+
|
|
42
|
+
def test_flags_large_block_prompt(self):
|
|
43
|
+
result = _check_system_prompt([{"type": "text", "text": "x" * 15_000}])
|
|
44
|
+
assert len(result) == 1
|
|
45
|
+
assert result[0].category == "large-system-prompt"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class TestCheckRedundantTurns:
|
|
49
|
+
def test_no_suggestion_for_short_conversation(self):
|
|
50
|
+
msgs = [{"role": "user", "content": "hi"}] * 10
|
|
51
|
+
assert _check_redundant_turns(msgs) == []
|
|
52
|
+
|
|
53
|
+
def test_flags_long_conversation(self):
|
|
54
|
+
msgs = [{"role": "user", "content": "hi"}] * 25
|
|
55
|
+
result = _check_redundant_turns(msgs)
|
|
56
|
+
assert len(result) == 1
|
|
57
|
+
assert result[0].category == "long-conversation"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class TestCheckCaching:
|
|
61
|
+
def test_no_suggestion_when_cache_control_present(self):
|
|
62
|
+
system = [{"type": "text", "text": "x" * 5000, "cache_control": {"type": "ephemeral"}}]
|
|
63
|
+
assert _check_caching(system, None) == []
|
|
64
|
+
|
|
65
|
+
def test_flags_large_system_without_caching(self):
|
|
66
|
+
system = "x" * 5000
|
|
67
|
+
result = _check_caching(system, None)
|
|
68
|
+
assert len(result) == 1
|
|
69
|
+
assert result[0].category == "no-caching"
|
|
70
|
+
|
|
71
|
+
def test_flags_many_tools_without_caching(self):
|
|
72
|
+
tools = [{"name": f"t{i}"} for i in range(5)]
|
|
73
|
+
result = _check_caching(None, tools)
|
|
74
|
+
assert len(result) == 1
|
|
75
|
+
assert result[0].category == "no-caching"
|
|
76
|
+
|
|
77
|
+
def test_no_suggestion_small_prompt_few_tools(self):
|
|
78
|
+
assert _check_caching("short", [{"name": "t1"}]) == []
|
|
79
|
+
|
|
80
|
+
def test_cache_control_on_tool_suppresses(self):
|
|
81
|
+
tools = [{"name": f"t{i}"} for i in range(5)]
|
|
82
|
+
tools[0]["cache_control"] = {"type": "ephemeral"}
|
|
83
|
+
assert _check_caching(None, tools) == []
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class TestFindCheaperModels:
|
|
87
|
+
def test_finds_alternatives_for_opus(self):
|
|
88
|
+
alts = _find_cheaper_models("claude-opus-4-8", 1_000_000)
|
|
89
|
+
model_names = [a["model"] for a in alts]
|
|
90
|
+
assert "claude-haiku-4-5" in model_names
|
|
91
|
+
assert "claude-sonnet-4-6" in model_names
|
|
92
|
+
|
|
93
|
+
def test_no_alternatives_for_cheapest(self):
|
|
94
|
+
alts = _find_cheaper_models("claude-haiku-4-5", 1_000_000)
|
|
95
|
+
assert alts == []
|
|
96
|
+
|
|
97
|
+
def test_savings_are_positive(self):
|
|
98
|
+
alts = _find_cheaper_models("claude-opus-4-8", 1_000_000)
|
|
99
|
+
for alt in alts:
|
|
100
|
+
assert alt["saving"] > 0
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class TestAnalysisReport:
|
|
104
|
+
def test_potential_savings_empty(self):
|
|
105
|
+
report = AnalysisReport(model="claude-sonnet-4-6", input_tokens=100)
|
|
106
|
+
assert report.potential_savings_pct == 0.0
|
|
107
|
+
|
|
108
|
+
def test_potential_savings_returns_max(self):
|
|
109
|
+
report = AnalysisReport(
|
|
110
|
+
model="claude-sonnet-4-6",
|
|
111
|
+
input_tokens=100,
|
|
112
|
+
suggestions=[
|
|
113
|
+
Suggestion("a", "msg", 10.0),
|
|
114
|
+
Suggestion("b", "msg", 30.0),
|
|
115
|
+
],
|
|
116
|
+
)
|
|
117
|
+
assert report.potential_savings_pct == 30.0
|
|
118
|
+
|
|
119
|
+
def test_format_no_suggestions(self):
|
|
120
|
+
report = AnalysisReport(model="claude-sonnet-4-6", input_tokens=100)
|
|
121
|
+
text = report.format()
|
|
122
|
+
assert "looks good!" in text
|
|
123
|
+
|
|
124
|
+
def test_format_with_suggestions(self):
|
|
125
|
+
report = AnalysisReport(
|
|
126
|
+
model="claude-sonnet-4-6",
|
|
127
|
+
input_tokens=100,
|
|
128
|
+
suggestions=[Suggestion("test", "do something", 25.0)],
|
|
129
|
+
)
|
|
130
|
+
text = report.format()
|
|
131
|
+
assert "[test]" in text
|
|
132
|
+
assert "25%" in text
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class TestAnalyze:
|
|
136
|
+
def test_returns_report(self, mock_client):
|
|
137
|
+
mock_client.messages.count_tokens.return_value = FakeTokenCount(input_tokens=50)
|
|
138
|
+
report = analyze(
|
|
139
|
+
mock_client,
|
|
140
|
+
model="claude-sonnet-4-6",
|
|
141
|
+
messages=[{"role": "user", "content": "hello"}],
|
|
142
|
+
)
|
|
143
|
+
assert isinstance(report, AnalysisReport)
|
|
144
|
+
assert report.input_tokens == 50
|
|
145
|
+
assert report.model == "claude-sonnet-4-6"
|
|
146
|
+
|
|
147
|
+
def test_detects_large_message(self, mock_client):
|
|
148
|
+
mock_client.messages.count_tokens.return_value = FakeTokenCount(input_tokens=50000)
|
|
149
|
+
report = analyze(
|
|
150
|
+
mock_client,
|
|
151
|
+
model="claude-opus-4-8",
|
|
152
|
+
messages=[{"role": "user", "content": "x" * 60_000}],
|
|
153
|
+
)
|
|
154
|
+
categories = [s.category for s in report.suggestions]
|
|
155
|
+
assert "large-message" in categories
|