turboquant-tools 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- turboquant_tools-0.1.0/LICENSE +21 -0
- turboquant_tools-0.1.0/PKG-INFO +267 -0
- turboquant_tools-0.1.0/README.md +242 -0
- turboquant_tools-0.1.0/pyproject.toml +38 -0
- turboquant_tools-0.1.0/setup.cfg +4 -0
- turboquant_tools-0.1.0/src/turboquant_tools/__init__.py +8 -0
- turboquant_tools-0.1.0/src/turboquant_tools/cli.py +100 -0
- turboquant_tools-0.1.0/src/turboquant_tools/core.py +268 -0
- turboquant_tools-0.1.0/src/turboquant_tools/mcp_server.py +166 -0
- turboquant_tools-0.1.0/src/turboquant_tools.egg-info/PKG-INFO +267 -0
- turboquant_tools-0.1.0/src/turboquant_tools.egg-info/SOURCES.txt +15 -0
- turboquant_tools-0.1.0/src/turboquant_tools.egg-info/dependency_links.txt +1 -0
- turboquant_tools-0.1.0/src/turboquant_tools.egg-info/entry_points.txt +2 -0
- turboquant_tools-0.1.0/src/turboquant_tools.egg-info/requires.txt +9 -0
- turboquant_tools-0.1.0/src/turboquant_tools.egg-info/top_level.txt +1 -0
- turboquant_tools-0.1.0/tests/test_core.py +84 -0
- turboquant_tools-0.1.0/tests/test_mcp_server.py +52 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 FreezeVII
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: turboquant-tools
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI + MCP Server + Python Library for TurboQuant-based embedding compression
|
|
5
|
+
Author: FreezeVII
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/FreezeVII/turboquant-tools
|
|
8
|
+
Project-URL: Source, https://github.com/FreezeVII/turboquant-tools
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Requires-Python: >=3.9
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: numpy>=1.24
|
|
18
|
+
Requires-Dist: click>=8.0
|
|
19
|
+
Provides-Extra: mcp
|
|
20
|
+
Requires-Dist: fastmcp>=0.1; extra == "mcp"
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
23
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# 🧊 TurboQuant Tools
|
|
27
|
+
|
|
28
|
+
> **Compress AI embeddings by 5–7× with near-lossless quality.**
|
|
29
|
+
|
|
30
|
+
CLI + Python Library + [MCP](https://modelcontextprotocol.io) Server for extreme vector compression using [Google's TurboQuant](https://research.google/blog/turboquant-redefining-ai-efficiency-with-extreme-compression/) (PolarQuant + QJL) — wrapped in a clean numpy-first API.
|
|
31
|
+
|
|
32
|
+
[](https://pypi.org/project/turboquant-tools/)
|
|
33
|
+
[](https://www.python.org)
|
|
34
|
+
[](LICENSE)
|
|
35
|
+
[](https://github.com/FreezeVII/turboquant-tools/actions)
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## 🚀 Quick Start
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install turboquant-tools
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Compress a `.npy` embedding file:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
turboquant compress embeddings.npy compressed.tq
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Restore:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
turboquant decompress compressed.tq restored.npy
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Estimate savings:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
turboquant estimate embeddings.npy --bits 3
|
|
61
|
+
# Original: 153.00 MB -> Compressed: 20.13 MB (7.60×, save 87%)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## 📦 What's Inside
|
|
67
|
+
|
|
68
|
+
| Command / Tool | Description |
|
|
69
|
+
|---|---|
|
|
70
|
+
| `turboquant compress` | Compress `.npy` embeddings → `.tq` binary |
|
|
71
|
+
| `turboquant decompress` | Restore `.tq` → `.npy` |
|
|
72
|
+
| `turboquant estimate` | Predict compression ratio before running |
|
|
73
|
+
| `turboquant mcp-server` | MCP stdio server (AI agent integration) |
|
|
74
|
+
| Python `compress()` | Compress numpy arrays in code |
|
|
75
|
+
| Python `decompress()` | Restore in code |
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## 🔧 CLI Reference
|
|
80
|
+
|
|
81
|
+
### compress
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
turboquant compress INPUT [OUTPUT] [OPTIONS]
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
| Option | Default | Description |
|
|
88
|
+
|---|---|---|
|
|
89
|
+
| `INPUT` | — | `.npy` file with float32 embeddings `(n, d)` |
|
|
90
|
+
| `OUTPUT` | `{stem}_tq{b}.tq` | Output `.tq` file |
|
|
91
|
+
| `-b, --bits` | `3` | Bit width (3 or 4) |
|
|
92
|
+
| `-o, --output` | — | Alternative to positional OUTPUT |
|
|
93
|
+
| `--no-qjl` | off | Skip QJL correction (faster, lower quality) |
|
|
94
|
+
|
|
95
|
+
**Examples:**
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
# Basic 3-bit compression
|
|
99
|
+
turboquant compress wiki_embeddings.npy wiki.tq
|
|
100
|
+
|
|
101
|
+
# 4-bit compression (higher quality)
|
|
102
|
+
turboquant compress embeddings.npy -b 4
|
|
103
|
+
|
|
104
|
+
# Fast mode (no QJL)
|
|
105
|
+
turboquant compress big_set.npy -b 3 --no-qjl
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### decompress
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
turboquant decompress INPUT [OUTPUT]
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### estimate
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
turboquant estimate INPUT [--bits N]
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## 🐍 Python API
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from turboquant_tools import compress, decompress, estimate_savings
|
|
126
|
+
import numpy as np
|
|
127
|
+
|
|
128
|
+
# Load or generate embeddings
|
|
129
|
+
vectors = np.random.randn(10000, 384).astype(np.float32)
|
|
130
|
+
|
|
131
|
+
# Compress (5–7× reduction)
|
|
132
|
+
compressed = compress(vectors, bits=3, use_qjl=False)
|
|
133
|
+
print(f"{vectors.nbytes / 1e6:.1f} MB → {compressed.nbytes / 1e6:.1f} MB ({compressed.memory.ratio:.1f}×)")
|
|
134
|
+
|
|
135
|
+
# Restore
|
|
136
|
+
restored = decompress(compressed)
|
|
137
|
+
print(f"MAE: {np.abs(restored - vectors).mean():.4f}")
|
|
138
|
+
|
|
139
|
+
# Estimate without running
|
|
140
|
+
est = estimate_savings(n_vectors=100000, dim=768, bits=3)
|
|
141
|
+
print(est) # Original: X MB -> Compressed: Y MB (7.60×, save 87%)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
**CompressedVectors** objects carry metadata:
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
compressed.n_vectors # original count
|
|
148
|
+
compressed.dim # original dimension
|
|
149
|
+
compressed.nbytes # compressed size in bytes
|
|
150
|
+
compressed.memory # MemoryBytes(original, compressed, ratio)
|
|
151
|
+
compressed.data # raw .tq bytes (save to disk)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## 🤖 MCP Server (AI Agents)
|
|
157
|
+
|
|
158
|
+
TurboQuant Tools ships with a native **MCP server** for AI agent integration — works with any MCP-compatible host (Hermes, Claude Desktop, etc.).
|
|
159
|
+
|
|
160
|
+
### Start
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
turboquant mcp-server
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Register in your MCP client
|
|
167
|
+
|
|
168
|
+
**Hermes Agent** (`~/.hermes/config.yaml`):
|
|
169
|
+
|
|
170
|
+
```yaml
|
|
171
|
+
mcp_servers:
|
|
172
|
+
turboquant-tools:
|
|
173
|
+
command: turboquant
|
|
174
|
+
args: ["mcp-server"]
|
|
175
|
+
enabled: true
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
**Claude Desktop** (`claude_desktop_config.json`):
|
|
179
|
+
|
|
180
|
+
```json
|
|
181
|
+
{
|
|
182
|
+
"mcpServers": {
|
|
183
|
+
"turboquant-tools": {
|
|
184
|
+
"command": "turboquant",
|
|
185
|
+
"args": ["mcp-server"]
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Available Tools
|
|
192
|
+
|
|
193
|
+
| Tool | Description |
|
|
194
|
+
|---|---|
|
|
195
|
+
| `compress_embeddings` | Compress vectors in-memory |
|
|
196
|
+
| `decompress_embeddings` | Restore compressed vectors |
|
|
197
|
+
| `estimate_savings_mcp` | Predict compression ratio |
|
|
198
|
+
| `embed_and_compress` | Embed texts via API + compress in one step |
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
## 📊 Performance
|
|
203
|
+
|
|
204
|
+
Measured on random float32 embeddings (CPU, no GPU needed):
|
|
205
|
+
|
|
206
|
+
| Vectors | Dim | Mode | Original | Compressed | Ratio | MAE |
|
|
207
|
+
|---|---|---|---|---|---|---|
|
|
208
|
+
| 20 | 384 | PolarQuant 3-bit | 30 KB | 10 KB | **3.0×** | 2.6 |
|
|
209
|
+
| 20 | 384 | TurboQuant (QJL) | 30 KB | 20 KB | 1.5× | 3.3 |
|
|
210
|
+
| 100K | 384 | PolarQuant 3-bit | 153 MB | 20 MB | **7.6×** | — |
|
|
211
|
+
|
|
212
|
+
**Use cases:**
|
|
213
|
+
- **RAG pipelines** — compress vector DB indexes
|
|
214
|
+
- **Edge devices** — fit embeddings in limited RAM
|
|
215
|
+
- **Storage savings** — reduce cloud costs for large vector stores
|
|
216
|
+
- **Memory-bound agents** — compress context vectors on the fly
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## 🧪 Development
|
|
221
|
+
|
|
222
|
+
```bash
|
|
223
|
+
git clone https://github.com/FreezeVII/turboquant-tools.git
|
|
224
|
+
cd turboquant-tools
|
|
225
|
+
pip install -e .
|
|
226
|
+
pip install pytest
|
|
227
|
+
pytest tests/
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### Run tests
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
pytest tests/ -v
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## 🧱 How It Works
|
|
239
|
+
|
|
240
|
+
Two-stage compression inspired by [Google's TurboQuant](https://research.google/blog/turboquant-redefining-ai-efficiency-with-extreme-compression/):
|
|
241
|
+
|
|
242
|
+
1. **PolarQuant** — Random Hadamard rotation + scalar quantization to 3–4 bits per dimension. Captures magnitude and direction.
|
|
243
|
+
2. **QJL** (optional) — Quantized Johnson-Lindenstrauss residual correction. Recovers high-frequency detail lost in PolarQuant.
|
|
244
|
+
|
|
245
|
+
Both stages run **CPU-only** via PyTorch — no GPU required. The `.tq` binary format uses a 30-byte header with magic bytes (`TQT2`) + packed indices and norms.
|
|
246
|
+
|
|
247
|
+
Under the hood this wraps [OnlyTerp/turboquant](https://github.com/OnlyTerp/turboquant), a reference PyTorch implementation.
|
|
248
|
+
|
|
249
|
+
---
|
|
250
|
+
|
|
251
|
+
## 📄 License
|
|
252
|
+
|
|
253
|
+
MIT — see [LICENSE](LICENSE).
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## 🙌 Contributing
|
|
258
|
+
|
|
259
|
+
PRs welcome! Ideas:
|
|
260
|
+
- FAISS index compression (`compress_faiss`)
|
|
261
|
+
- Onnx / numpy-only backend (no PyTorch dep)
|
|
262
|
+
- Streaming compression for billion-scale datasets
|
|
263
|
+
- Pre-built wheels for faster install
|
|
264
|
+
|
|
265
|
+
---
|
|
266
|
+
|
|
267
|
+
<p align="center">Made with 🧊 for the vector search community.</p>
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# 🧊 TurboQuant Tools
|
|
2
|
+
|
|
3
|
+
> **Compress AI embeddings by 5–7× with near-lossless quality.**
|
|
4
|
+
|
|
5
|
+
CLI + Python Library + [MCP](https://modelcontextprotocol.io) Server for extreme vector compression using [Google's TurboQuant](https://research.google/blog/turboquant-redefining-ai-efficiency-with-extreme-compression/) (PolarQuant + QJL) — wrapped in a clean numpy-first API.
|
|
6
|
+
|
|
7
|
+
[](https://pypi.org/project/turboquant-tools/)
|
|
8
|
+
[](https://www.python.org)
|
|
9
|
+
[](LICENSE)
|
|
10
|
+
[](https://github.com/FreezeVII/turboquant-tools/actions)
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## 🚀 Quick Start
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install turboquant-tools
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Compress a `.npy` embedding file:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
turboquant compress embeddings.npy compressed.tq
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Restore:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
turboquant decompress compressed.tq restored.npy
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Estimate savings:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
turboquant estimate embeddings.npy --bits 3
|
|
36
|
+
# Original: 153.00 MB -> Compressed: 20.13 MB (7.60×, save 87%)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## 📦 What's Inside
|
|
42
|
+
|
|
43
|
+
| Command / Tool | Description |
|
|
44
|
+
|---|---|
|
|
45
|
+
| `turboquant compress` | Compress `.npy` embeddings → `.tq` binary |
|
|
46
|
+
| `turboquant decompress` | Restore `.tq` → `.npy` |
|
|
47
|
+
| `turboquant estimate` | Predict compression ratio before running |
|
|
48
|
+
| `turboquant mcp-server` | MCP stdio server (AI agent integration) |
|
|
49
|
+
| Python `compress()` | Compress numpy arrays in code |
|
|
50
|
+
| Python `decompress()` | Restore in code |
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## 🔧 CLI Reference
|
|
55
|
+
|
|
56
|
+
### compress
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
turboquant compress INPUT [OUTPUT] [OPTIONS]
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
| Option | Default | Description |
|
|
63
|
+
|---|---|---|
|
|
64
|
+
| `INPUT` | — | `.npy` file with float32 embeddings `(n, d)` |
|
|
65
|
+
| `OUTPUT` | `{stem}_tq{b}.tq` | Output `.tq` file |
|
|
66
|
+
| `-b, --bits` | `3` | Bit width (3 or 4) |
|
|
67
|
+
| `-o, --output` | — | Alternative to positional OUTPUT |
|
|
68
|
+
| `--no-qjl` | off | Skip QJL correction (faster, lower quality) |
|
|
69
|
+
|
|
70
|
+
**Examples:**
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
# Basic 3-bit compression
|
|
74
|
+
turboquant compress wiki_embeddings.npy wiki.tq
|
|
75
|
+
|
|
76
|
+
# 4-bit compression (higher quality)
|
|
77
|
+
turboquant compress embeddings.npy -b 4
|
|
78
|
+
|
|
79
|
+
# Fast mode (no QJL)
|
|
80
|
+
turboquant compress big_set.npy -b 3 --no-qjl
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### decompress
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
turboquant decompress INPUT [OUTPUT]
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### estimate
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
turboquant estimate INPUT [--bits N]
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## 🐍 Python API
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from turboquant_tools import compress, decompress, estimate_savings
|
|
101
|
+
import numpy as np
|
|
102
|
+
|
|
103
|
+
# Load or generate embeddings
|
|
104
|
+
vectors = np.random.randn(10000, 384).astype(np.float32)
|
|
105
|
+
|
|
106
|
+
# Compress (5–7× reduction)
|
|
107
|
+
compressed = compress(vectors, bits=3, use_qjl=False)
|
|
108
|
+
print(f"{vectors.nbytes / 1e6:.1f} MB → {compressed.nbytes / 1e6:.1f} MB ({compressed.memory.ratio:.1f}×)")
|
|
109
|
+
|
|
110
|
+
# Restore
|
|
111
|
+
restored = decompress(compressed)
|
|
112
|
+
print(f"MAE: {np.abs(restored - vectors).mean():.4f}")
|
|
113
|
+
|
|
114
|
+
# Estimate without running
|
|
115
|
+
est = estimate_savings(n_vectors=100000, dim=768, bits=3)
|
|
116
|
+
print(est) # Original: X MB -> Compressed: Y MB (7.60×, save 87%)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**CompressedVectors** objects carry metadata:
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
compressed.n_vectors # original count
|
|
123
|
+
compressed.dim # original dimension
|
|
124
|
+
compressed.nbytes # compressed size in bytes
|
|
125
|
+
compressed.memory # MemoryBytes(original, compressed, ratio)
|
|
126
|
+
compressed.data # raw .tq bytes (save to disk)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## 🤖 MCP Server (AI Agents)
|
|
132
|
+
|
|
133
|
+
TurboQuant Tools ships with a native **MCP server** for AI agent integration — works with any MCP-compatible host (Hermes, Claude Desktop, etc.).
|
|
134
|
+
|
|
135
|
+
### Start
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
turboquant mcp-server
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Register in your MCP client
|
|
142
|
+
|
|
143
|
+
**Hermes Agent** (`~/.hermes/config.yaml`):
|
|
144
|
+
|
|
145
|
+
```yaml
|
|
146
|
+
mcp_servers:
|
|
147
|
+
turboquant-tools:
|
|
148
|
+
command: turboquant
|
|
149
|
+
args: ["mcp-server"]
|
|
150
|
+
enabled: true
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
**Claude Desktop** (`claude_desktop_config.json`):
|
|
154
|
+
|
|
155
|
+
```json
|
|
156
|
+
{
|
|
157
|
+
"mcpServers": {
|
|
158
|
+
"turboquant-tools": {
|
|
159
|
+
"command": "turboquant",
|
|
160
|
+
"args": ["mcp-server"]
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Available Tools
|
|
167
|
+
|
|
168
|
+
| Tool | Description |
|
|
169
|
+
|---|---|
|
|
170
|
+
| `compress_embeddings` | Compress vectors in-memory |
|
|
171
|
+
| `decompress_embeddings` | Restore compressed vectors |
|
|
172
|
+
| `estimate_savings_mcp` | Predict compression ratio |
|
|
173
|
+
| `embed_and_compress` | Embed texts via API + compress in one step |
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## 📊 Performance
|
|
178
|
+
|
|
179
|
+
Measured on random float32 embeddings (CPU, no GPU needed):
|
|
180
|
+
|
|
181
|
+
| Vectors | Dim | Mode | Original | Compressed | Ratio | MAE |
|
|
182
|
+
|---|---|---|---|---|---|---|
|
|
183
|
+
| 20 | 384 | PolarQuant 3-bit | 30 KB | 10 KB | **3.0×** | 2.6 |
|
|
184
|
+
| 20 | 384 | TurboQuant (QJL) | 30 KB | 20 KB | 1.5× | 3.3 |
|
|
185
|
+
| 100K | 384 | PolarQuant 3-bit | 153 MB | 20 MB | **7.6×** | — |
|
|
186
|
+
|
|
187
|
+
**Use cases:**
|
|
188
|
+
- **RAG pipelines** — compress vector DB indexes
|
|
189
|
+
- **Edge devices** — fit embeddings in limited RAM
|
|
190
|
+
- **Storage savings** — reduce cloud costs for large vector stores
|
|
191
|
+
- **Memory-bound agents** — compress context vectors on the fly
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## 🧪 Development
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
git clone https://github.com/FreezeVII/turboquant-tools.git
|
|
199
|
+
cd turboquant-tools
|
|
200
|
+
pip install -e .
|
|
201
|
+
pip install pytest
|
|
202
|
+
pytest tests/
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Run tests
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
pytest tests/ -v
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
---
|
|
212
|
+
|
|
213
|
+
## 🧱 How It Works
|
|
214
|
+
|
|
215
|
+
Two-stage compression inspired by [Google's TurboQuant](https://research.google/blog/turboquant-redefining-ai-efficiency-with-extreme-compression/):
|
|
216
|
+
|
|
217
|
+
1. **PolarQuant** — Random Hadamard rotation + scalar quantization to 3–4 bits per dimension. Captures magnitude and direction.
|
|
218
|
+
2. **QJL** (optional) — Quantized Johnson-Lindenstrauss residual correction. Recovers high-frequency detail lost in PolarQuant.
|
|
219
|
+
|
|
220
|
+
Both stages run **CPU-only** via PyTorch — no GPU required. The `.tq` binary format uses a 30-byte header with magic bytes (`TQT2`) + packed indices and norms.
|
|
221
|
+
|
|
222
|
+
Under the hood this wraps [OnlyTerp/turboquant](https://github.com/OnlyTerp/turboquant), a reference PyTorch implementation.
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## 📄 License
|
|
227
|
+
|
|
228
|
+
MIT — see [LICENSE](LICENSE).
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## 🙌 Contributing
|
|
233
|
+
|
|
234
|
+
PRs welcome! Ideas:
|
|
235
|
+
- FAISS index compression (`compress_faiss`)
|
|
236
|
+
- Onnx / numpy-only backend (no PyTorch dep)
|
|
237
|
+
- Streaming compression for billion-scale datasets
|
|
238
|
+
- Pre-built wheels for faster install
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
<p align="center">Made with 🧊 for the vector search community.</p>
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "turboquant-tools"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "CLI + MCP Server + Python Library for TurboQuant-based embedding compression"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{name = "FreezeVII"}]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"numpy>=1.24",
|
|
22
|
+
"click>=8.0",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://github.com/FreezeVII/turboquant-tools"
|
|
27
|
+
Source = "https://github.com/FreezeVII/turboquant-tools"
|
|
28
|
+
|
|
29
|
+
[project.scripts]
|
|
30
|
+
turboquant = "turboquant_tools.cli:main"
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
mcp = ["fastmcp>=0.1"]
|
|
34
|
+
dev = ["pytest>=7", "pytest-cov"]
|
|
35
|
+
|
|
36
|
+
[tool.pytest.ini_options]
|
|
37
|
+
minversion = "7.0"
|
|
38
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI for turboquant-tools.
|
|
3
|
+
"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import click
|
|
8
|
+
import numpy as np
|
|
9
|
+
from turboquant_tools import compress, decompress, estimate_savings
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.group()
|
|
13
|
+
def main():
|
|
14
|
+
"""TurboQuant Tools - compress AI embeddings with 5x memory reduction."""
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@main.command()
|
|
19
|
+
@click.argument("input", type=click.Path(exists=True, dir_okay=False))
|
|
20
|
+
@click.argument("output", type=click.Path(dir_okay=False), required=False)
|
|
21
|
+
@click.option("--bits", "-b", default=3, type=int, help="Target bit width (default: 3)")
|
|
22
|
+
@click.option("--output", "-o", default=None, help="Output .tq file path (alternative to positional OUTPUT)")
|
|
23
|
+
@click.option("--no-qjl", is_flag=True, default=False, help="Skip QJL correction (faster but lower quality)")
|
|
24
|
+
def compress_cmd(input, output, bits, no_qjl):
|
|
25
|
+
"""Compress .npy embedding vectors to .tq format.
|
|
26
|
+
|
|
27
|
+
INPUT is a .npy file with float32 embeddings (n_vectors x dimensions).
|
|
28
|
+
OUTPUT is the destination .tq file. If omitted, auto-names based on input.
|
|
29
|
+
"""
|
|
30
|
+
vectors = np.load(input)
|
|
31
|
+
if vectors.ndim != 2:
|
|
32
|
+
click.echo(f"Error: expected 2D array, got {vectors.ndim}D", err=True)
|
|
33
|
+
sys.exit(1)
|
|
34
|
+
n, d = vectors.shape
|
|
35
|
+
click.echo(f"Vectors: {n} x {d} ({vectors.nbytes / 1e6:.2f} MB)", err=True)
|
|
36
|
+
compressed = compress(vectors, bits=bits, use_qjl=not no_qjl)
|
|
37
|
+
out_path = output or click.get_current_context().params.get("output")
|
|
38
|
+
if out_path is None:
|
|
39
|
+
out_path = f"{Path(input).stem}_tq{bits}.tq"
|
|
40
|
+
with open(out_path, "wb") as f:
|
|
41
|
+
f.write(compressed.data)
|
|
42
|
+
click.echo(f"Compressed: {compressed.nbytes / 1e6:.2f} MB ({compressed.memory.ratio:.1f}x)")
|
|
43
|
+
click.echo(f"Saved to: {out_path}")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@main.command()
|
|
47
|
+
@click.argument("input", type=click.Path(exists=True, dir_okay=False))
|
|
48
|
+
@click.argument("output", type=click.Path(dir_okay=False), required=False)
|
|
49
|
+
@click.option("--output", "-o", default=None, help="Output .npy file path (alternative to positional OUTPUT)")
|
|
50
|
+
def decompress_cmd(input, output):
|
|
51
|
+
"""Restore compressed .tq file to .npy.
|
|
52
|
+
|
|
53
|
+
INPUT is a .tq compressed file.
|
|
54
|
+
OUTPUT is the destination .npy file. If omitted, auto-names based on input.
|
|
55
|
+
"""
|
|
56
|
+
from turboquant_tools.core import CompressedVectors
|
|
57
|
+
with open(input, "rb") as f:
|
|
58
|
+
data = f.read()
|
|
59
|
+
import struct
|
|
60
|
+
magic = struct.unpack_from("<4s", data, 0)[0]
|
|
61
|
+
if magic != b"TQT2":
|
|
62
|
+
click.echo(f"Error: not a valid .tq file", err=True)
|
|
63
|
+
sys.exit(1)
|
|
64
|
+
compressed = CompressedVectors(data=data, shape=(0, 0), bits=0)
|
|
65
|
+
restored = decompress(compressed)
|
|
66
|
+
out_path = output or click.get_current_context().params.get("output")
|
|
67
|
+
if out_path is None:
|
|
68
|
+
out_path = f"{Path(input).stem}_restored.npy"
|
|
69
|
+
np.save(out_path, restored)
|
|
70
|
+
click.echo(f"Restored: {restored.shape} ({restored.nbytes / 1e6:.2f} MB)")
|
|
71
|
+
click.echo(f"Saved to: {out_path}")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@main.command()
|
|
75
|
+
@click.argument("input", type=click.Path(exists=True, dir_okay=False))
|
|
76
|
+
@click.option("--bits", "-b", default=3, type=int, help="Target bit width (default: 3)")
|
|
77
|
+
def estimate_cmd(input, bits):
|
|
78
|
+
"""Estimate compression savings without running the algorithm."""
|
|
79
|
+
arr = np.load(input, mmap_mode='r')
|
|
80
|
+
if arr.ndim != 2:
|
|
81
|
+
click.echo(f"Error: expected 2D array", err=True)
|
|
82
|
+
sys.exit(1)
|
|
83
|
+
n, d = arr.shape
|
|
84
|
+
del arr
|
|
85
|
+
click.echo(str(estimate_savings(n, d, bits=bits)))
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@main.command()
|
|
89
|
+
def mcp_server():
|
|
90
|
+
"""Start the MCP protocol server (stdio transport for Hermes AI agents)."""
|
|
91
|
+
try:
|
|
92
|
+
from turboquant_tools.mcp_server import run_server
|
|
93
|
+
run_server()
|
|
94
|
+
except ImportError:
|
|
95
|
+
click.echo("MCP server requires: pip install turboquant-tools[mcp]", err=True)
|
|
96
|
+
sys.exit(1)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
if __name__ == "__main__":
|
|
100
|
+
main()
|