turboquant-tools 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 FreezeVII
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,267 @@
1
+ Metadata-Version: 2.4
2
+ Name: turboquant-tools
3
+ Version: 0.1.0
4
+ Summary: CLI + MCP Server + Python Library for TurboQuant-based embedding compression
5
+ Author: FreezeVII
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/FreezeVII/turboquant-tools
8
+ Project-URL: Source, https://github.com/FreezeVII/turboquant-tools
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Requires-Python: >=3.9
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: numpy>=1.24
18
+ Requires-Dist: click>=8.0
19
+ Provides-Extra: mcp
20
+ Requires-Dist: fastmcp>=0.1; extra == "mcp"
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest>=7; extra == "dev"
23
+ Requires-Dist: pytest-cov; extra == "dev"
24
+ Dynamic: license-file
25
+
26
+ # 🧊 TurboQuant Tools
27
+
28
+ > **Compress AI embeddings by 5–7× with near-lossless quality.**
29
+
30
+ CLI + Python Library + [MCP](https://modelcontextprotocol.io) Server for extreme vector compression using [Google's TurboQuant](https://research.google/blog/turboquant-redefining-ai-efficiency-with-extreme-compression/) (PolarQuant + QJL) — wrapped in a clean numpy-first API.
31
+
32
+ [![PyPI](https://img.shields.io/pypi/v/turboquant-tools)](https://pypi.org/project/turboquant-tools/)
33
+ [![Python](https://img.shields.io/pypi/pyversions/turboquant-tools)](https://www.python.org)
34
+ [![License](https://img.shields.io/github/license/FreezeVII/turboquant-tools)](LICENSE)
35
+ [![Tests](https://github.com/FreezeVII/turboquant-tools/actions/workflows/python-tests.yml/badge.svg)](https://github.com/FreezeVII/turboquant-tools/actions)
36
+
37
+ ---
38
+
39
+ ## 🚀 Quick Start
40
+
41
+ ```bash
42
+ pip install turboquant-tools
43
+ ```
44
+
45
+ Compress a `.npy` embedding file:
46
+
47
+ ```bash
48
+ turboquant compress embeddings.npy compressed.tq
49
+ ```
50
+
51
+ Restore:
52
+
53
+ ```bash
54
+ turboquant decompress compressed.tq restored.npy
55
+ ```
56
+
57
+ Estimate savings:
58
+
59
+ ```bash
60
+ turboquant estimate embeddings.npy --bits 3
61
+ # Original: 153.00 MB -> Compressed: 20.13 MB (7.60×, save 87%)
62
+ ```
63
+
64
+ ---
65
+
66
+ ## 📦 What's Inside
67
+
68
+ | Command / Tool | Description |
69
+ |---|---|
70
+ | `turboquant compress` | Compress `.npy` embeddings → `.tq` binary |
71
+ | `turboquant decompress` | Restore `.tq` → `.npy` |
72
+ | `turboquant estimate` | Predict compression ratio before running |
73
+ | `turboquant mcp-server` | MCP stdio server (AI agent integration) |
74
+ | Python `compress()` | Compress numpy arrays in code |
75
+ | Python `decompress()` | Restore in code |
76
+
77
+ ---
78
+
79
+ ## 🔧 CLI Reference
80
+
81
+ ### compress
82
+
83
+ ```bash
84
+ turboquant compress INPUT [OUTPUT] [OPTIONS]
85
+ ```
86
+
87
+ | Option | Default | Description |
88
+ |---|---|---|
89
+ | `INPUT` | — | `.npy` file with float32 embeddings `(n, d)` |
90
+ | `OUTPUT` | `{stem}_tq{b}.tq` | Output `.tq` file |
91
+ | `-b, --bits` | `3` | Bit width (3 or 4) |
92
+ | `-o, --output` | — | Alternative to positional OUTPUT |
93
+ | `--no-qjl` | off | Skip QJL correction (faster, lower quality) |
94
+
95
+ **Examples:**
96
+
97
+ ```bash
98
+ # Basic 3-bit compression
99
+ turboquant compress wiki_embeddings.npy wiki.tq
100
+
101
+ # 4-bit compression (higher quality)
102
+ turboquant compress embeddings.npy -b 4
103
+
104
+ # Fast mode (no QJL)
105
+ turboquant compress big_set.npy -b 3 --no-qjl
106
+ ```
107
+
108
+ ### decompress
109
+
110
+ ```bash
111
+ turboquant decompress INPUT [OUTPUT]
112
+ ```
113
+
114
+ ### estimate
115
+
116
+ ```bash
117
+ turboquant estimate INPUT [--bits N]
118
+ ```
119
+
120
+ ---
121
+
122
+ ## 🐍 Python API
123
+
124
+ ```python
125
+ from turboquant_tools import compress, decompress, estimate_savings
126
+ import numpy as np
127
+
128
+ # Load or generate embeddings
129
+ vectors = np.random.randn(10000, 384).astype(np.float32)
130
+
131
+ # Compress (5–7× reduction)
132
+ compressed = compress(vectors, bits=3, use_qjl=False)
133
+ print(f"{vectors.nbytes / 1e6:.1f} MB → {compressed.nbytes / 1e6:.1f} MB ({compressed.memory.ratio:.1f}×)")
134
+
135
+ # Restore
136
+ restored = decompress(compressed)
137
+ print(f"MAE: {np.abs(restored - vectors).mean():.4f}")
138
+
139
+ # Estimate without running
140
+ est = estimate_savings(n_vectors=100000, dim=768, bits=3)
141
+ print(est) # Original: X MB -> Compressed: Y MB (7.60×, save 87%)
142
+ ```
143
+
144
+ **CompressedVectors** objects carry metadata:
145
+
146
+ ```python
147
+ compressed.n_vectors # original count
148
+ compressed.dim # original dimension
149
+ compressed.nbytes # compressed size in bytes
150
+ compressed.memory # MemoryBytes(original, compressed, ratio)
151
+ compressed.data # raw .tq bytes (save to disk)
152
+ ```
153
+
154
+ ---
155
+
156
+ ## 🤖 MCP Server (AI Agents)
157
+
158
+ TurboQuant Tools ships with a native **MCP server** for AI agent integration — works with any MCP-compatible host (Hermes, Claude Desktop, etc.).
159
+
160
+ ### Start
161
+
162
+ ```bash
163
+ turboquant mcp-server
164
+ ```
165
+
166
+ ### Register in your MCP client
167
+
168
+ **Hermes Agent** (`~/.hermes/config.yaml`):
169
+
170
+ ```yaml
171
+ mcp_servers:
172
+ turboquant-tools:
173
+ command: turboquant
174
+ args: ["mcp-server"]
175
+ enabled: true
176
+ ```
177
+
178
+ **Claude Desktop** (`claude_desktop_config.json`):
179
+
180
+ ```json
181
+ {
182
+ "mcpServers": {
183
+ "turboquant-tools": {
184
+ "command": "turboquant",
185
+ "args": ["mcp-server"]
186
+ }
187
+ }
188
+ }
189
+ ```
190
+
191
+ ### Available Tools
192
+
193
+ | Tool | Description |
194
+ |---|---|
195
+ | `compress_embeddings` | Compress vectors in-memory |
196
+ | `decompress_embeddings` | Restore compressed vectors |
197
+ | `estimate_savings_mcp` | Predict compression ratio |
198
+ | `embed_and_compress` | Embed texts via API + compress in one step |
199
+
200
+ ---
201
+
202
+ ## 📊 Performance
203
+
204
+ Measured on random float32 embeddings (CPU, no GPU needed):
205
+
206
+ | Vectors | Dim | Mode | Original | Compressed | Ratio | MAE |
207
+ |---|---|---|---|---|---|---|
208
+ | 20 | 384 | PolarQuant 3-bit | 30 KB | 10 KB | **3.0×** | 2.6 |
209
+ | 20 | 384 | TurboQuant (QJL) | 30 KB | 20 KB | 1.5× | 3.3 |
210
+ | 100K | 384 | PolarQuant 3-bit | 153 MB | 20 MB | **7.6×** | — |
211
+
212
+ **Use cases:**
213
+ - **RAG pipelines** — compress vector DB indexes
214
+ - **Edge devices** — fit embeddings in limited RAM
215
+ - **Storage savings** — reduce cloud costs for large vector stores
216
+ - **Memory-bound agents** — compress context vectors on the fly
217
+
218
+ ---
219
+
220
+ ## 🧪 Development
221
+
222
+ ```bash
223
+ git clone https://github.com/FreezeVII/turboquant-tools.git
224
+ cd turboquant-tools
225
+ pip install -e .
226
+ pip install pytest
227
+ pytest tests/
228
+ ```
229
+
230
+ ### Run tests
231
+
232
+ ```bash
233
+ pytest tests/ -v
234
+ ```
235
+
236
+ ---
237
+
238
+ ## 🧱 How It Works
239
+
240
+ Two-stage compression inspired by [Google's TurboQuant](https://research.google/blog/turboquant-redefining-ai-efficiency-with-extreme-compression/):
241
+
242
+ 1. **PolarQuant** — Random Hadamard rotation + scalar quantization to 3–4 bits per dimension. Captures magnitude and direction.
243
+ 2. **QJL** (optional) — Quantized Johnson-Lindenstrauss residual correction. Recovers high-frequency detail lost in PolarQuant.
244
+
245
+ Both stages run **CPU-only** via PyTorch — no GPU required. The `.tq` binary format uses a 30-byte header with magic bytes (`TQT2`) + packed indices and norms.
246
+
247
+ Under the hood this wraps [OnlyTerp/turboquant](https://github.com/OnlyTerp/turboquant), a reference PyTorch implementation.
248
+
249
+ ---
250
+
251
+ ## 📄 License
252
+
253
+ MIT — see [LICENSE](LICENSE).
254
+
255
+ ---
256
+
257
+ ## 🙌 Contributing
258
+
259
+ PRs welcome! Ideas:
260
+ - FAISS index compression (`compress_faiss`)
261
+ - Onnx / numpy-only backend (no PyTorch dep)
262
+ - Streaming compression for billion-scale datasets
263
+ - Pre-built wheels for faster install
264
+
265
+ ---
266
+
267
+ <p align="center">Made with 🧊 for the vector search community.</p>
@@ -0,0 +1,242 @@
1
+ # 🧊 TurboQuant Tools
2
+
3
+ > **Compress AI embeddings by 5–7× with near-lossless quality.**
4
+
5
+ CLI + Python Library + [MCP](https://modelcontextprotocol.io) Server for extreme vector compression using [Google's TurboQuant](https://research.google/blog/turboquant-redefining-ai-efficiency-with-extreme-compression/) (PolarQuant + QJL) — wrapped in a clean numpy-first API.
6
+
7
+ [![PyPI](https://img.shields.io/pypi/v/turboquant-tools)](https://pypi.org/project/turboquant-tools/)
8
+ [![Python](https://img.shields.io/pypi/pyversions/turboquant-tools)](https://www.python.org)
9
+ [![License](https://img.shields.io/github/license/FreezeVII/turboquant-tools)](LICENSE)
10
+ [![Tests](https://github.com/FreezeVII/turboquant-tools/actions/workflows/python-tests.yml/badge.svg)](https://github.com/FreezeVII/turboquant-tools/actions)
11
+
12
+ ---
13
+
14
+ ## 🚀 Quick Start
15
+
16
+ ```bash
17
+ pip install turboquant-tools
18
+ ```
19
+
20
+ Compress a `.npy` embedding file:
21
+
22
+ ```bash
23
+ turboquant compress embeddings.npy compressed.tq
24
+ ```
25
+
26
+ Restore:
27
+
28
+ ```bash
29
+ turboquant decompress compressed.tq restored.npy
30
+ ```
31
+
32
+ Estimate savings:
33
+
34
+ ```bash
35
+ turboquant estimate embeddings.npy --bits 3
36
+ # Original: 153.00 MB -> Compressed: 20.13 MB (7.60×, save 87%)
37
+ ```
38
+
39
+ ---
40
+
41
+ ## 📦 What's Inside
42
+
43
+ | Command / Tool | Description |
44
+ |---|---|
45
+ | `turboquant compress` | Compress `.npy` embeddings → `.tq` binary |
46
+ | `turboquant decompress` | Restore `.tq` → `.npy` |
47
+ | `turboquant estimate` | Predict compression ratio before running |
48
+ | `turboquant mcp-server` | MCP stdio server (AI agent integration) |
49
+ | Python `compress()` | Compress numpy arrays in code |
50
+ | Python `decompress()` | Restore in code |
51
+
52
+ ---
53
+
54
+ ## 🔧 CLI Reference
55
+
56
+ ### compress
57
+
58
+ ```bash
59
+ turboquant compress INPUT [OUTPUT] [OPTIONS]
60
+ ```
61
+
62
+ | Option | Default | Description |
63
+ |---|---|---|
64
+ | `INPUT` | — | `.npy` file with float32 embeddings `(n, d)` |
65
+ | `OUTPUT` | `{stem}_tq{b}.tq` | Output `.tq` file |
66
+ | `-b, --bits` | `3` | Bit width (3 or 4) |
67
+ | `-o, --output` | — | Alternative to positional OUTPUT |
68
+ | `--no-qjl` | off | Skip QJL correction (faster, lower quality) |
69
+
70
+ **Examples:**
71
+
72
+ ```bash
73
+ # Basic 3-bit compression
74
+ turboquant compress wiki_embeddings.npy wiki.tq
75
+
76
+ # 4-bit compression (higher quality)
77
+ turboquant compress embeddings.npy -b 4
78
+
79
+ # Fast mode (no QJL)
80
+ turboquant compress big_set.npy -b 3 --no-qjl
81
+ ```
82
+
83
+ ### decompress
84
+
85
+ ```bash
86
+ turboquant decompress INPUT [OUTPUT]
87
+ ```
88
+
89
+ ### estimate
90
+
91
+ ```bash
92
+ turboquant estimate INPUT [--bits N]
93
+ ```
94
+
95
+ ---
96
+
97
+ ## 🐍 Python API
98
+
99
+ ```python
100
+ from turboquant_tools import compress, decompress, estimate_savings
101
+ import numpy as np
102
+
103
+ # Load or generate embeddings
104
+ vectors = np.random.randn(10000, 384).astype(np.float32)
105
+
106
+ # Compress (5–7× reduction)
107
+ compressed = compress(vectors, bits=3, use_qjl=False)
108
+ print(f"{vectors.nbytes / 1e6:.1f} MB → {compressed.nbytes / 1e6:.1f} MB ({compressed.memory.ratio:.1f}×)")
109
+
110
+ # Restore
111
+ restored = decompress(compressed)
112
+ print(f"MAE: {np.abs(restored - vectors).mean():.4f}")
113
+
114
+ # Estimate without running
115
+ est = estimate_savings(n_vectors=100000, dim=768, bits=3)
116
+ print(est) # Original: X MB -> Compressed: Y MB (7.60×, save 87%)
117
+ ```
118
+
119
+ **CompressedVectors** objects carry metadata:
120
+
121
+ ```python
122
+ compressed.n_vectors # original count
123
+ compressed.dim # original dimension
124
+ compressed.nbytes # compressed size in bytes
125
+ compressed.memory # MemoryBytes(original, compressed, ratio)
126
+ compressed.data # raw .tq bytes (save to disk)
127
+ ```
128
+
129
+ ---
130
+
131
+ ## 🤖 MCP Server (AI Agents)
132
+
133
+ TurboQuant Tools ships with a native **MCP server** for AI agent integration — works with any MCP-compatible host (Hermes, Claude Desktop, etc.).
134
+
135
+ ### Start
136
+
137
+ ```bash
138
+ turboquant mcp-server
139
+ ```
140
+
141
+ ### Register in your MCP client
142
+
143
+ **Hermes Agent** (`~/.hermes/config.yaml`):
144
+
145
+ ```yaml
146
+ mcp_servers:
147
+ turboquant-tools:
148
+ command: turboquant
149
+ args: ["mcp-server"]
150
+ enabled: true
151
+ ```
152
+
153
+ **Claude Desktop** (`claude_desktop_config.json`):
154
+
155
+ ```json
156
+ {
157
+ "mcpServers": {
158
+ "turboquant-tools": {
159
+ "command": "turboquant",
160
+ "args": ["mcp-server"]
161
+ }
162
+ }
163
+ }
164
+ ```
165
+
166
+ ### Available Tools
167
+
168
+ | Tool | Description |
169
+ |---|---|
170
+ | `compress_embeddings` | Compress vectors in-memory |
171
+ | `decompress_embeddings` | Restore compressed vectors |
172
+ | `estimate_savings_mcp` | Predict compression ratio |
173
+ | `embed_and_compress` | Embed texts via API + compress in one step |
174
+
175
+ ---
176
+
177
+ ## 📊 Performance
178
+
179
+ Measured on random float32 embeddings (CPU, no GPU needed):
180
+
181
+ | Vectors | Dim | Mode | Original | Compressed | Ratio | MAE |
182
+ |---|---|---|---|---|---|---|
183
+ | 20 | 384 | PolarQuant 3-bit | 30 KB | 10 KB | **3.0×** | 2.6 |
184
+ | 20 | 384 | TurboQuant (QJL) | 30 KB | 20 KB | 1.5× | 3.3 |
185
+ | 100K | 384 | PolarQuant 3-bit | 153 MB | 20 MB | **7.6×** | — |
186
+
187
+ **Use cases:**
188
+ - **RAG pipelines** — compress vector DB indexes
189
+ - **Edge devices** — fit embeddings in limited RAM
190
+ - **Storage savings** — reduce cloud costs for large vector stores
191
+ - **Memory-bound agents** — compress context vectors on the fly
192
+
193
+ ---
194
+
195
+ ## 🧪 Development
196
+
197
+ ```bash
198
+ git clone https://github.com/FreezeVII/turboquant-tools.git
199
+ cd turboquant-tools
200
+ pip install -e .
201
+ pip install pytest
202
+ pytest tests/
203
+ ```
204
+
205
+ ### Run tests
206
+
207
+ ```bash
208
+ pytest tests/ -v
209
+ ```
210
+
211
+ ---
212
+
213
+ ## 🧱 How It Works
214
+
215
+ Two-stage compression inspired by [Google's TurboQuant](https://research.google/blog/turboquant-redefining-ai-efficiency-with-extreme-compression/):
216
+
217
+ 1. **PolarQuant** — Random Hadamard rotation + scalar quantization to 3–4 bits per dimension. Captures magnitude and direction.
218
+ 2. **QJL** (optional) — Quantized Johnson-Lindenstrauss residual correction. Recovers high-frequency detail lost in PolarQuant.
219
+
220
+ Both stages run **CPU-only** via PyTorch — no GPU required. The `.tq` binary format uses a 30-byte header with magic bytes (`TQT2`) + packed indices and norms.
221
+
222
+ Under the hood this wraps [OnlyTerp/turboquant](https://github.com/OnlyTerp/turboquant), a reference PyTorch implementation.
223
+
224
+ ---
225
+
226
+ ## 📄 License
227
+
228
+ MIT — see [LICENSE](LICENSE).
229
+
230
+ ---
231
+
232
+ ## 🙌 Contributing
233
+
234
+ PRs welcome! Ideas:
235
+ - FAISS index compression (`compress_faiss`)
236
+ - Onnx / numpy-only backend (no PyTorch dep)
237
+ - Streaming compression for billion-scale datasets
238
+ - Pre-built wheels for faster install
239
+
240
+ ---
241
+
242
+ <p align="center">Made with 🧊 for the vector search community.</p>
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "turboquant-tools"
7
+ version = "0.1.0"
8
+ description = "CLI + MCP Server + Python Library for TurboQuant-based embedding compression"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [{name = "FreezeVII"}]
13
+ classifiers = [
14
+ "Development Status :: 3 - Alpha",
15
+ "Intended Audience :: Developers",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Programming Language :: Python :: 3",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
19
+ ]
20
+ dependencies = [
21
+ "numpy>=1.24",
22
+ "click>=8.0",
23
+ ]
24
+
25
+ [project.urls]
26
+ Homepage = "https://github.com/FreezeVII/turboquant-tools"
27
+ Source = "https://github.com/FreezeVII/turboquant-tools"
28
+
29
+ [project.scripts]
30
+ turboquant = "turboquant_tools.cli:main"
31
+
32
+ [project.optional-dependencies]
33
+ mcp = ["fastmcp>=0.1"]
34
+ dev = ["pytest>=7", "pytest-cov"]
35
+
36
+ [tool.pytest.ini_options]
37
+ minversion = "7.0"
38
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,8 @@
1
+ """
2
+ turboquant_tools — CLI + MCP Server + Library for TurboQuant embedding compression.
3
+ """
4
+
5
+ from .core import compress, decompress, estimate_savings
6
+ from .core import CompressedVectors, MemoryBytes
7
+
8
+ __version__ = "0.1.0"
@@ -0,0 +1,100 @@
1
+ """
2
+ CLI for turboquant-tools.
3
+ """
4
+ from __future__ import annotations
5
+ import sys
6
+ from pathlib import Path
7
+ import click
8
+ import numpy as np
9
+ from turboquant_tools import compress, decompress, estimate_savings
10
+
11
+
12
+ @click.group()
13
+ def main():
14
+ """TurboQuant Tools - compress AI embeddings with 5x memory reduction."""
15
+ pass
16
+
17
+
18
+ @main.command()
19
+ @click.argument("input", type=click.Path(exists=True, dir_okay=False))
20
+ @click.argument("output", type=click.Path(dir_okay=False), required=False)
21
+ @click.option("--bits", "-b", default=3, type=int, help="Target bit width (default: 3)")
22
+ @click.option("--output", "-o", default=None, help="Output .tq file path (alternative to positional OUTPUT)")
23
+ @click.option("--no-qjl", is_flag=True, default=False, help="Skip QJL correction (faster but lower quality)")
24
+ def compress_cmd(input, output, bits, no_qjl):
25
+ """Compress .npy embedding vectors to .tq format.
26
+
27
+ INPUT is a .npy file with float32 embeddings (n_vectors x dimensions).
28
+ OUTPUT is the destination .tq file. If omitted, auto-names based on input.
29
+ """
30
+ vectors = np.load(input)
31
+ if vectors.ndim != 2:
32
+ click.echo(f"Error: expected 2D array, got {vectors.ndim}D", err=True)
33
+ sys.exit(1)
34
+ n, d = vectors.shape
35
+ click.echo(f"Vectors: {n} x {d} ({vectors.nbytes / 1e6:.2f} MB)", err=True)
36
+ compressed = compress(vectors, bits=bits, use_qjl=not no_qjl)
37
+ out_path = output or click.get_current_context().params.get("output")
38
+ if out_path is None:
39
+ out_path = f"{Path(input).stem}_tq{bits}.tq"
40
+ with open(out_path, "wb") as f:
41
+ f.write(compressed.data)
42
+ click.echo(f"Compressed: {compressed.nbytes / 1e6:.2f} MB ({compressed.memory.ratio:.1f}x)")
43
+ click.echo(f"Saved to: {out_path}")
44
+
45
+
46
+ @main.command()
47
+ @click.argument("input", type=click.Path(exists=True, dir_okay=False))
48
+ @click.argument("output", type=click.Path(dir_okay=False), required=False)
49
+ @click.option("--output", "-o", default=None, help="Output .npy file path (alternative to positional OUTPUT)")
50
+ def decompress_cmd(input, output):
51
+ """Restore compressed .tq file to .npy.
52
+
53
+ INPUT is a .tq compressed file.
54
+ OUTPUT is the destination .npy file. If omitted, auto-names based on input.
55
+ """
56
+ from turboquant_tools.core import CompressedVectors
57
+ with open(input, "rb") as f:
58
+ data = f.read()
59
+ import struct
60
+ magic = struct.unpack_from("<4s", data, 0)[0]
61
+ if magic != b"TQT2":
62
+ click.echo(f"Error: not a valid .tq file", err=True)
63
+ sys.exit(1)
64
+ compressed = CompressedVectors(data=data, shape=(0, 0), bits=0)
65
+ restored = decompress(compressed)
66
+ out_path = output or click.get_current_context().params.get("output")
67
+ if out_path is None:
68
+ out_path = f"{Path(input).stem}_restored.npy"
69
+ np.save(out_path, restored)
70
+ click.echo(f"Restored: {restored.shape} ({restored.nbytes / 1e6:.2f} MB)")
71
+ click.echo(f"Saved to: {out_path}")
72
+
73
+
74
+ @main.command()
75
+ @click.argument("input", type=click.Path(exists=True, dir_okay=False))
76
+ @click.option("--bits", "-b", default=3, type=int, help="Target bit width (default: 3)")
77
+ def estimate_cmd(input, bits):
78
+ """Estimate compression savings without running the algorithm."""
79
+ arr = np.load(input, mmap_mode='r')
80
+ if arr.ndim != 2:
81
+ click.echo(f"Error: expected 2D array", err=True)
82
+ sys.exit(1)
83
+ n, d = arr.shape
84
+ del arr
85
+ click.echo(str(estimate_savings(n, d, bits=bits)))
86
+
87
+
88
+ @main.command()
89
+ def mcp_server():
90
+ """Start the MCP protocol server (stdio transport for Hermes AI agents)."""
91
+ try:
92
+ from turboquant_tools.mcp_server import run_server
93
+ run_server()
94
+ except ImportError:
95
+ click.echo("MCP server requires: pip install turboquant-tools[mcp]", err=True)
96
+ sys.exit(1)
97
+
98
+
99
+ if __name__ == "__main__":
100
+ main()