zeusdb-vector-database 0.1.2__cp311-cp311-musllinux_1_2_armv7l.whl → 0.2.1__cp311-cp311-musllinux_1_2_armv7l.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zeusdb_vector_database/__init__.py +1 -1
- zeusdb_vector_database/vector_database.py +272 -13
- zeusdb_vector_database/zeusdb_vector_database.cpython-311-arm-linux-musleabihf.so +0 -0
- {zeusdb_vector_database-0.1.2.dist-info → zeusdb_vector_database-0.2.1.dist-info}/METADATA +208 -11
- zeusdb_vector_database-0.2.1.dist-info/RECORD +10 -0
- {zeusdb_vector_database-0.1.2.dist-info → zeusdb_vector_database-0.2.1.dist-info}/WHEEL +1 -1
- zeusdb_vector_database-0.1.2.dist-info/RECORD +0 -10
- {zeusdb_vector_database-0.1.2.dist-info → zeusdb_vector_database-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {zeusdb_vector_database-0.1.2.dist-info → zeusdb_vector_database-0.2.1.dist-info}/licenses/NOTICE +0 -0
@@ -1,16 +1,24 @@
|
|
1
1
|
"""
|
2
2
|
vector_database.py
|
3
3
|
|
4
|
-
Factory for creating vector indexes with support for multiple types.
|
4
|
+
Factory for creating vector indexes with support for multiple types and quantization.
|
5
5
|
Currently supports HNSW (Hierarchical Navigable Small World) with extensible design.
|
6
6
|
"""
|
7
|
-
from typing import Callable, Dict, Any
|
7
|
+
from typing import Callable, Dict, Any, Optional, TypedDict
|
8
8
|
from .zeusdb_vector_database import HNSWIndex
|
9
9
|
# from .zeusdb_vector_database import HNSWIndex, IVFIndex, LSHIndex, AnnoyIndex, FlatIndex # Future support planned
|
10
10
|
|
11
|
+
class MemoryInfo(TypedDict):
|
12
|
+
"""Type definition for quantization memory information."""
|
13
|
+
centroid_storage_mb: float
|
14
|
+
compression_ratio: float
|
15
|
+
centroids_per_subvector: int
|
16
|
+
total_centroids: int
|
17
|
+
calculated_training_size: int
|
18
|
+
|
11
19
|
class VectorDatabase:
|
12
20
|
"""
|
13
|
-
Factory for creating various types of vector indexes.
|
21
|
+
Factory for creating various types of vector indexes with optional quantization.
|
14
22
|
Each index type is registered via _index_constructors.
|
15
23
|
"""
|
16
24
|
|
@@ -26,12 +34,13 @@ class VectorDatabase:
|
|
26
34
|
"""Initialize the vector database factory."""
|
27
35
|
pass
|
28
36
|
|
29
|
-
def create(self, index_type: str = "hnsw", **kwargs) -> Any:
|
37
|
+
def create(self, index_type: str = "hnsw", quantization_config: Optional[Dict[str, Any]] = None, **kwargs) -> Any:
|
30
38
|
"""
|
31
|
-
Create a vector index of the specified type.
|
39
|
+
Create a vector index of the specified type with optional quantization.
|
32
40
|
|
33
41
|
Args:
|
34
42
|
index_type: The type of index to create (case-insensitive: "hnsw", "ivf", etc.)
|
43
|
+
quantization_config: Optional quantization configuration dictionary
|
35
44
|
**kwargs: Parameters specific to the chosen index type (validated by Rust backend)
|
36
45
|
|
37
46
|
For "hnsw", supported parameters are:
|
@@ -41,22 +50,57 @@ class VectorDatabase:
|
|
41
50
|
- ef_construction (int): Construction candidate list size (default: 200)
|
42
51
|
- expected_size (int): Expected number of vectors (default: 10000)
|
43
52
|
|
53
|
+
Quantization config format:
|
54
|
+
{
|
55
|
+
'type': 'pq', # Currently only 'pq' (Product Quantization) supported
|
56
|
+
'subvectors': 8, # Number of subvectors (must divide dim evenly, default: 8)
|
57
|
+
'bits': 8, # Bits per subvector (1-8, controls centroids, default: 8)
|
58
|
+
'training_size': None, # Auto-calculated based on subvectors & bits (or specify manually)
|
59
|
+
'max_training_vectors': None, # Optional limit on training vectors used
|
60
|
+
'storage_mode': 'quantized_only' # Storage mode for quantized vectors (or 'quantized_with_raw')
|
61
|
+
}
|
62
|
+
|
63
|
+
Note: Quantization reduces memory usage (typically 4-32x compression) but may
|
64
|
+
slightly degrade recall accuracy. Training triggers automatically on the first
|
65
|
+
.add() call that reaches the training_size threshold.
|
66
|
+
|
44
67
|
Returns:
|
45
68
|
An instance of the created vector index.
|
46
69
|
|
47
70
|
Examples:
|
48
|
-
# HNSW index with defaults
|
71
|
+
# HNSW index with defaults (no quantization)
|
49
72
|
vdb = VectorDatabase()
|
50
73
|
index = vdb.create("hnsw", dim=1536)
|
51
74
|
|
52
|
-
# HNSW index with
|
53
|
-
|
75
|
+
# HNSW index with Product Quantization (auto-calculated training size)
|
76
|
+
quantization_config = {
|
77
|
+
'type': 'pq',
|
78
|
+
'subvectors': 8,
|
79
|
+
'bits': 8
|
80
|
+
}
|
81
|
+
index = vdb.create(
|
82
|
+
index_type="hnsw",
|
83
|
+
dim=1536,
|
84
|
+
quantization_config=quantization_config
|
85
|
+
)
|
54
86
|
|
55
|
-
#
|
56
|
-
|
87
|
+
# Memory-optimized configuration with manual training size
|
88
|
+
memory_optimized_config = {
|
89
|
+
'type': 'pq',
|
90
|
+
'subvectors': 16, # More subvectors = better compression
|
91
|
+
'bits': 6, # Fewer bits = less memory per centroid
|
92
|
+
'training_size': 75000, # Override auto-calculation
|
93
|
+
'storage_mode': 'quantized_only' # Only store quantized vectors
|
94
|
+
}
|
95
|
+
index = vdb.create(
|
96
|
+
index_type="hnsw",
|
97
|
+
dim=1536,
|
98
|
+
quantization_config=memory_optimized_config,
|
99
|
+
expected_size=1000000 # Large dataset
|
100
|
+
)
|
57
101
|
|
58
102
|
Raises:
|
59
|
-
ValueError: If index_type is not supported.
|
103
|
+
ValueError: If index_type is not supported or quantization config is invalid.
|
60
104
|
RuntimeError: If index creation fails due to backend validation.
|
61
105
|
"""
|
62
106
|
index_type = (index_type or "").strip().lower()
|
@@ -65,9 +109,16 @@ class VectorDatabase:
|
|
65
109
|
available = ', '.join(sorted(self._index_constructors.keys()))
|
66
110
|
raise ValueError(f"Unknown index type '{index_type}'. Available: {available}")
|
67
111
|
|
112
|
+
# Centralize dim early to ensure consistency
|
113
|
+
dim = kwargs.get('dim', 1536)
|
114
|
+
|
115
|
+
# Validate and process quantization config
|
116
|
+
if quantization_config is not None:
|
117
|
+
quantization_config = self._validate_quantization_config(quantization_config, dim)
|
118
|
+
|
68
119
|
# Apply index-specific defaults
|
69
120
|
if index_type == "hnsw":
|
70
|
-
kwargs.setdefault("dim",
|
121
|
+
kwargs.setdefault("dim", dim)
|
71
122
|
kwargs.setdefault("space", "cosine")
|
72
123
|
kwargs.setdefault("m", 16)
|
73
124
|
kwargs.setdefault("ef_construction", 200)
|
@@ -76,11 +127,219 @@ class VectorDatabase:
|
|
76
127
|
constructor = self._index_constructors[index_type]
|
77
128
|
|
78
129
|
try:
|
79
|
-
|
130
|
+
# Always pass quantization_config parameter
|
131
|
+
if quantization_config is not None:
|
132
|
+
# Remove keys with None values and internal keys
|
133
|
+
clean_config = {k: v for k, v in quantization_config.items() if not k.startswith('_') and v is not None}
|
134
|
+
else:
|
135
|
+
clean_config = None
|
136
|
+
|
137
|
+
return constructor(quantization_config=clean_config, **kwargs)
|
80
138
|
except Exception as e:
|
81
139
|
raise RuntimeError(f"Failed to create {index_type.upper()} index: {e}") from e
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
def _validate_quantization_config(self, config: Dict[str, Any], dim: int) -> Dict[str, Any]:
|
144
|
+
"""
|
145
|
+
Validate and normalize quantization configuration.
|
146
|
+
|
147
|
+
Args:
|
148
|
+
config: Raw quantization configuration
|
149
|
+
dim: Vector dimension for validation
|
82
150
|
|
151
|
+
Returns:
|
152
|
+
Validated and normalized configuration
|
153
|
+
|
154
|
+
Raises:
|
155
|
+
ValueError: If configuration is invalid
|
156
|
+
"""
|
157
|
+
if not isinstance(config, dict):
|
158
|
+
raise ValueError("quantization_config must be a dictionary")
|
159
|
+
|
160
|
+
# Create a copy to avoid modifying the original
|
161
|
+
validated_config = config.copy()
|
162
|
+
|
163
|
+
# Validate quantization type
|
164
|
+
qtype = validated_config.get('type', '').lower()
|
165
|
+
if qtype != 'pq':
|
166
|
+
raise ValueError(f"Unsupported quantization type: '{qtype}'. Currently only 'pq' is supported.")
|
167
|
+
|
168
|
+
validated_config['type'] = 'pq'
|
169
|
+
|
170
|
+
# Validate subvectors
|
171
|
+
subvectors = validated_config.get('subvectors', 8)
|
172
|
+
if not isinstance(subvectors, int) or subvectors <= 0:
|
173
|
+
raise ValueError(f"subvectors must be a positive integer, got {subvectors}")
|
174
|
+
|
175
|
+
if dim % subvectors != 0:
|
176
|
+
raise ValueError(
|
177
|
+
f"subvectors ({subvectors}) must divide dimension ({dim}) evenly. "
|
178
|
+
f"Consider using subvectors: {', '.join(map(str, self._suggest_subvector_divisors(dim)))}"
|
179
|
+
)
|
180
|
+
|
181
|
+
if subvectors > dim:
|
182
|
+
raise ValueError(f"subvectors ({subvectors}) cannot exceed dimension ({dim})")
|
183
|
+
|
184
|
+
validated_config['subvectors'] = subvectors
|
185
|
+
|
186
|
+
# Validate bits per subvector
|
187
|
+
bits = validated_config.get('bits', 8)
|
188
|
+
if not isinstance(bits, int) or bits < 1 or bits > 8:
|
189
|
+
raise ValueError(f"bits must be an integer between 1 and 8, got {bits}")
|
190
|
+
|
191
|
+
validated_config['bits'] = bits
|
192
|
+
|
193
|
+
# Calculate smart training size if not provided
|
194
|
+
training_size = validated_config.get('training_size')
|
195
|
+
if training_size is None:
|
196
|
+
training_size = self._calculate_smart_training_size(subvectors, bits)
|
197
|
+
else:
|
198
|
+
if not isinstance(training_size, int) or training_size < 1000:
|
199
|
+
raise ValueError(f"training_size must be at least 1000 for stable k-means clustering, got {training_size}")
|
200
|
+
|
201
|
+
validated_config['training_size'] = training_size
|
202
|
+
|
203
|
+
# Validate max training vectors if provided
|
204
|
+
max_training_vectors = validated_config.get('max_training_vectors')
|
205
|
+
if max_training_vectors is not None:
|
206
|
+
if not isinstance(max_training_vectors, int) or max_training_vectors < training_size:
|
207
|
+
raise ValueError(
|
208
|
+
f"max_training_vectors ({max_training_vectors}) must be >= training_size ({training_size})"
|
209
|
+
)
|
210
|
+
validated_config['max_training_vectors'] = max_training_vectors
|
211
|
+
|
212
|
+
# Validate storage mode
|
213
|
+
storage_mode = str(validated_config.get('storage_mode', 'quantized_only')).lower()
|
214
|
+
valid_modes = {'quantized_only', 'quantized_with_raw'}
|
215
|
+
if storage_mode not in valid_modes:
|
216
|
+
raise ValueError(
|
217
|
+
f"Invalid storage_mode: '{storage_mode}'. Supported modes: {', '.join(sorted(valid_modes))}"
|
218
|
+
)
|
219
|
+
|
220
|
+
validated_config['storage_mode'] = storage_mode
|
221
|
+
|
222
|
+
# Calculate and warn about memory usage
|
223
|
+
self._check_memory_usage(validated_config, dim)
|
224
|
+
|
225
|
+
# Add helpful warnings about storage mode
|
226
|
+
if storage_mode == 'quantized_with_raw':
|
227
|
+
import warnings
|
228
|
+
compression_ratio = validated_config.get('__memory_info__', {}).get('compression_ratio', 1.0)
|
229
|
+
warnings.warn(
|
230
|
+
f"storage_mode='quantized_with_raw' will use ~{compression_ratio:.1f}x more memory "
|
231
|
+
f"than 'quantized_only' but enables exact vector reconstruction.",
|
232
|
+
UserWarning,
|
233
|
+
stacklevel=2
|
234
|
+
)
|
235
|
+
|
236
|
+
# Final safety check: ensure all expected keys are present
|
237
|
+
# This is a final defensive programming - all the keys should already be set above, but added just in case
|
238
|
+
validated_config.setdefault('type', 'pq')
|
239
|
+
validated_config.setdefault('subvectors', 8)
|
240
|
+
validated_config.setdefault('bits', 8)
|
241
|
+
validated_config.setdefault('max_training_vectors', None)
|
242
|
+
validated_config.setdefault('storage_mode', 'quantized_only')
|
243
|
+
|
244
|
+
return validated_config
|
245
|
+
|
246
|
+
def _calculate_smart_training_size(self, subvectors: int, bits: int) -> int:
|
247
|
+
"""
|
248
|
+
Calculate optimal training size based on quantization parameters.
|
249
|
+
|
250
|
+
Args:
|
251
|
+
subvectors: Number of subvectors
|
252
|
+
bits: Bits per subvector
|
253
|
+
|
254
|
+
Returns:
|
255
|
+
Recommended training size for stable k-means clustering
|
256
|
+
"""
|
257
|
+
# Statistical requirement: need enough samples per centroid for stable clustering
|
258
|
+
# Training is done per subvector, so we need (2^bits * min_samples) total
|
259
|
+
centroids_per_subvector = 2 ** bits
|
260
|
+
min_samples_per_centroid = 20 # Statistical guideline for k-means stability
|
261
|
+
|
262
|
+
# Calculate minimum samples needed for stable clustering across all subvectors
|
263
|
+
statistical_minimum = centroids_per_subvector * min_samples_per_centroid
|
264
|
+
|
265
|
+
# Practical bounds
|
266
|
+
reasonable_minimum = 10000 # Always need at least this for diversity
|
267
|
+
reasonable_maximum = 200000 # Diminishing returns beyond this point
|
268
|
+
|
269
|
+
return min(max(statistical_minimum, reasonable_minimum), reasonable_maximum)
|
270
|
+
|
271
|
+
|
272
|
+
def _suggest_subvector_divisors(self, dim: int) -> list[int]:
|
273
|
+
"""Return valid subvector counts that divide the dimension evenly (up to 32)."""
|
274
|
+
return [i for i in range(1, min(33, dim + 1)) if dim % i == 0]
|
275
|
+
|
276
|
+
|
277
|
+
|
278
|
+
|
279
|
+
|
280
|
+
def _check_memory_usage(self, config: Dict[str, Any], dim: int) -> None:
|
281
|
+
"""
|
282
|
+
Calculate and warn about memory usage for the quantization configuration.
|
283
|
+
|
284
|
+
Args:
|
285
|
+
config: Validated quantization configuration
|
286
|
+
dim: Vector dimension
|
287
|
+
"""
|
288
|
+
subvectors = config['subvectors']
|
289
|
+
bits = config['bits']
|
290
|
+
sub_dim = dim // subvectors
|
291
|
+
|
292
|
+
# Calculate centroid storage requirements
|
293
|
+
num_centroids_per_subvector = 2 ** bits
|
294
|
+
total_centroids = subvectors * num_centroids_per_subvector
|
295
|
+
centroid_memory_mb = (total_centroids * sub_dim * 4) / (1024 * 1024) # 4 bytes per float32
|
296
|
+
|
297
|
+
# Calculate compression ratio
|
298
|
+
original_bytes_per_vector = dim * 4 # float32
|
299
|
+
compressed_bytes_per_vector = subvectors # 1 byte per subvector code
|
300
|
+
compression_ratio = original_bytes_per_vector / compressed_bytes_per_vector
|
301
|
+
|
302
|
+
# Add memory info to config for user reference (internal)
|
303
|
+
memory_info: MemoryInfo = {
|
304
|
+
'centroid_storage_mb': round(centroid_memory_mb, 2),
|
305
|
+
'compression_ratio': round(compression_ratio, 1),
|
306
|
+
'centroids_per_subvector': num_centroids_per_subvector,
|
307
|
+
'total_centroids': total_centroids,
|
308
|
+
'calculated_training_size': config['training_size']
|
309
|
+
}
|
310
|
+
config['__memory_info__'] = memory_info
|
311
|
+
# Warn about large memory usage
|
312
|
+
if centroid_memory_mb > 100:
|
313
|
+
import warnings
|
314
|
+
warnings.warn(
|
315
|
+
f"Large centroid storage required: {centroid_memory_mb:.1f}MB. "
|
316
|
+
f"Consider reducing bits ({bits}) or subvectors ({subvectors}) for memory efficiency.",
|
317
|
+
UserWarning,
|
318
|
+
stacklevel=2
|
319
|
+
)
|
320
|
+
|
321
|
+
# Warn about low compression
|
322
|
+
if compression_ratio < 4:
|
323
|
+
import warnings
|
324
|
+
warnings.warn(
|
325
|
+
f"Low compression ratio: {compression_ratio:.1f}x. "
|
326
|
+
f"Consider increasing subvectors ({subvectors}) or reducing bits ({bits}) for better compression.",
|
327
|
+
UserWarning,
|
328
|
+
stacklevel=2
|
329
|
+
)
|
330
|
+
|
331
|
+
# Warn about extremely high compression
|
332
|
+
if compression_ratio > 50:
|
333
|
+
import warnings
|
334
|
+
warnings.warn(
|
335
|
+
f"Very high compression ratio: {compression_ratio:.1f}x may significantly impact recall quality. "
|
336
|
+
f"Consider reducing subvectors ({subvectors}) or increasing bits ({bits}) for better accuracy.",
|
337
|
+
UserWarning,
|
338
|
+
stacklevel=2
|
339
|
+
)
|
340
|
+
|
83
341
|
@classmethod
|
84
342
|
def available_index_types(cls) -> list[str]:
|
85
343
|
"""Return list of all supported index types."""
|
86
344
|
return sorted(cls._index_constructors.keys())
|
345
|
+
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: zeusdb-vector-database
|
3
|
-
Version: 0.1
|
3
|
+
Version: 0.2.1
|
4
4
|
Classifier: Programming Language :: Rust
|
5
5
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
6
6
|
Requires-Dist: numpy>=2.2.6,<3.0.0
|
@@ -11,7 +11,7 @@ License-File: LICENSE
|
|
11
11
|
License-File: NOTICE
|
12
12
|
Summary: Blazing-fast vector DB with real-time similarity search and metadata filtering.
|
13
13
|
Author-email: ZeusDB <contact@zeusdb.com>
|
14
|
-
License: Apache-2.0
|
14
|
+
License-Expression: Apache-2.0
|
15
15
|
Requires-Python: >=3.10
|
16
16
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
17
17
|
Project-URL: Repository, https://github.com/zeusdb/zeusdb-vector-database
|
@@ -56,22 +56,30 @@ ZeusDB leverages the HNSW (Hierarchical Navigable Small World) algorithm for spe
|
|
56
56
|
|
57
57
|
## ⭐ Features
|
58
58
|
|
59
|
-
|
59
|
+
🐍 User-friendly Python API for adding vectors and running similarity searches
|
60
60
|
|
61
|
-
|
61
|
+
🔥 High-performance Rust backend optimized for speed and concurrency
|
62
62
|
|
63
|
-
|
63
|
+
🔍 Approximate Nearest Neighbor (ANN) search using HNSW for fast, accurate results
|
64
64
|
|
65
|
-
|
65
|
+
📦 Product Quantization (PQ) for compact storage, faster distance computations, and scalability for Big Data
|
66
|
+
|
67
|
+
📥 Flexible input formats, including native Python types and zero-copy NumPy arrays
|
66
68
|
|
67
|
-
🗂️ Metadata-aware filtering
|
69
|
+
🗂️ Metadata-aware filtering for precise and contextual querying
|
68
70
|
|
69
|
-
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
<!--
|
75
|
+
📋 Supports multiple distance metrics: `cosine`, `L1`, `L2`
|
76
|
+
|
77
|
+
📥 Supports multiple input formats using a single, easy-to-use Python method
|
70
78
|
|
71
79
|
⚡ Smart multi-threaded inserts that automatically speed up large batch uploads
|
72
80
|
|
73
81
|
🚀 Fast, concurrent searches so you can run multiple queries at the same time
|
74
|
-
|
82
|
+
-->
|
75
83
|
|
76
84
|
<br/>
|
77
85
|
|
@@ -215,10 +223,11 @@ index = vdb.create(
|
|
215
223
|
|------------------|--------|-----------|-----------------------------------------------------------------------------|
|
216
224
|
| `index_type` | `str` | `"hnsw"` | The type of vector index to create. Currently supports `"hnsw"`. Future options include `"ivf"`, `"flat"`, etc. Case-insensitive. |
|
217
225
|
| `dim` | `int` | `1536` | Dimensionality of the vectors to be indexed. Each vector must have this length. The default dim=1536 is chosen to match the output dimensionality of OpenAI’s text-embedding-ada-002 model. |
|
218
|
-
| `space` | `str` | `"cosine"`| Distance metric used for similarity search. Options include `"cosine"
|
226
|
+
| `space` | `str` | `"cosine"`| Distance metric used for similarity search. Options include `"cosine"`, `"L1"` and `"L2"`.|
|
219
227
|
| `m` | `int` | `16` | Number of bi-directional connections created for each new node. Higher `m` improves recall but increases index size and build time. |
|
220
228
|
| `ef_construction`| `int` | `200` | Size of the dynamic list used during index construction. Larger values increase indexing time and memory, but improve quality. |
|
221
229
|
| `expected_size` | `int` | `10000` | Estimated number of elements to be inserted. Used for preallocating internal data structures. Not a hard limit. |
|
230
|
+
| `quantization_config` | `dict` | `None` | Product Quantization configuration for memory-efficient vector compression. |
|
222
231
|
|
223
232
|
<br/>
|
224
233
|
|
@@ -411,7 +420,7 @@ results = index.search(vector=query_vector, top_k=3)
|
|
411
420
|
print(results)
|
412
421
|
```
|
413
422
|
|
414
|
-
#### 🔍 Search Example 6 - Batch Search with
|
423
|
+
#### 🔍 Search Example 6 - Batch Search with metadata filter
|
415
424
|
|
416
425
|
Performs similarity search on multiple query vectors with metadata filtering, returning filtered results for each query.
|
417
426
|
|
@@ -555,6 +564,194 @@ print(partial)
|
|
555
564
|
|
556
565
|
⚠️ `get_records()` only returns results for IDs that exist in the index. Missing IDs are silently skipped.
|
557
566
|
|
567
|
+
<br />
|
568
|
+
|
569
|
+
|
570
|
+
## 🗜️ Product Quantization
|
571
|
+
|
572
|
+
Product Quantization (PQ) is a vector compression technique that significantly reduces memory usage while preserving high search accuracy. Commonly used in HNSW-based vector databases, PQ works by dividing each vector into subvectors and quantizing them independently. This enables compression ratios of 4× to 256×, making it ideal for large-scale, high-dimensional datasets.
|
573
|
+
|
574
|
+
ZeusDB Vector Database’s PQ implementation features:
|
575
|
+
|
576
|
+
✅ Intelligent Training – PQ model trains automatically at defined thresholds
|
577
|
+
|
578
|
+
✅ Efficient Memory Use – Store 4× to 256× more vectors in the same RAM footprint
|
579
|
+
|
580
|
+
✅ Fast Approximate Search – Uses Asymmetric Distance Computation (ADC) for high-speed search computation
|
581
|
+
|
582
|
+
✅ Seamless Operation – Index automatically switches from raw to quantized storage modes
|
583
|
+
|
584
|
+
<br />
|
585
|
+
|
586
|
+
### 📘 Quantization Configuration Parameters
|
587
|
+
|
588
|
+
To enable PQ, pass a `quantization_config` dictionary to the `.create()` index method:
|
589
|
+
|
590
|
+
| Parameter | Type | Description | Valid Range | Default |
|
591
|
+
|-----------|------|-------------|-------------|---------|
|
592
|
+
| `type` | `str` | Quantization algorithm type | `"pq"` | *required* |
|
593
|
+
| `subvectors` | `int` | Number of vector subspaces (must divide dimension evenly) | 1 to dimension | `8` |
|
594
|
+
| `bits` | `int` | Bits per quantized code (controls centroids per subvector) | 1-8 | `8` |
|
595
|
+
| `training_size` | `int` | Minimum vectors needed for stable k-means clustering | ≥ 1000 | 1000 |
|
596
|
+
| `max_training_vectors` | `int` | Maximum vectors used during training (optional limit) | ≥ training_size | `None` |
|
597
|
+
| `storage_mode` | `str` | Storage strategy: "quantized_only" (memory optimized) or "quantized_with_raw" (keep raw vectors for exact reconstruction) | "quantized_only", "quantized_with_raw" | `"quantized_only"` |
|
598
|
+
|
599
|
+
|
600
|
+
<br/>
|
601
|
+
|
602
|
+
|
603
|
+
### 🔧 Usage Example 1
|
604
|
+
|
605
|
+
```python
|
606
|
+
from zeusdb_vector_database import VectorDatabase
|
607
|
+
import numpy as np
|
608
|
+
|
609
|
+
# Create index with product quantization
|
610
|
+
vdb = VectorDatabase()
|
611
|
+
|
612
|
+
# Configure quantization for memory efficiency
|
613
|
+
quantization_config = {
|
614
|
+
'type': 'pq', # `pq` for Product Quantization
|
615
|
+
'subvectors': 8, # Divide 1536-dim vectors into 8 subvectors of 192 dims each
|
616
|
+
'bits': 8, # 256 centroids per subvector (2^8)
|
617
|
+
'training_size': 10000, # Train when 10k vectors are collected
|
618
|
+
'max_training_vectors': 50000 # Use max 50k vectors for training
|
619
|
+
}
|
620
|
+
|
621
|
+
# Create index with quantization
|
622
|
+
# This will automatically handle training when enough vectors are added
|
623
|
+
index = vdb.create(
|
624
|
+
index_type="hnsw",
|
625
|
+
dim=1536, # OpenAI `text-embedding-3-small` dimension
|
626
|
+
quantization_config=quantization_config # Add the compression configuration
|
627
|
+
)
|
628
|
+
|
629
|
+
# Add vectors - training triggers automatically at threshold
|
630
|
+
documents = [
|
631
|
+
{
|
632
|
+
"id": f"doc_{i}",
|
633
|
+
"values": np.random.rand(1536).astype(float).tolist(),
|
634
|
+
"metadata": {"category": "tech", "year": 2026}
|
635
|
+
}
|
636
|
+
for i in range(15000)
|
637
|
+
]
|
638
|
+
|
639
|
+
# Training will trigger automatically when 10k vectors are added
|
640
|
+
result = index.add(documents)
|
641
|
+
print(f"Added {result.total_inserted} vectors")
|
642
|
+
|
643
|
+
# Check quantization status
|
644
|
+
print(f"Training progress: {index.get_training_progress():.1f}%")
|
645
|
+
print(f"Storage mode: {index.get_storage_mode()}")
|
646
|
+
print(f"Is quantized: {index.is_quantized()}")
|
647
|
+
|
648
|
+
# Get compression statistics
|
649
|
+
quant_info = index.get_quantization_info()
|
650
|
+
if quant_info:
|
651
|
+
print(f"Compression ratio: {quant_info['compression_ratio']:.1f}x")
|
652
|
+
print(f"Memory usage: {quant_info['memory_mb']:.1f} MB")
|
653
|
+
|
654
|
+
# Search works seamlessly with quantized storage
|
655
|
+
query_vector = np.random.rand(1536).astype(float).tolist()
|
656
|
+
results = index.search(vector=query_vector, top_k=3)
|
657
|
+
|
658
|
+
# Simply print raw results
|
659
|
+
print(results)
|
660
|
+
```
|
661
|
+
|
662
|
+
Results
|
663
|
+
```python
|
664
|
+
[
|
665
|
+
{'id': 'doc_9719', 'score': 0.5133496522903442, 'metadata': {'category': 'tech', 'year': 2026}},
|
666
|
+
{'id': 'doc_8148', 'score': 0.5139288306236267, 'metadata': {'category': 'tech', 'year': 2026}},
|
667
|
+
{'id': 'doc_7822', 'score': 0.5151920914649963, 'metadata': {'category': 'tech', 'year': 2026}},
|
668
|
+
]
|
669
|
+
```
|
670
|
+
<br />
|
671
|
+
|
672
|
+
### 🔧 Usage Example 2 - with explicit storage mode
|
673
|
+
|
674
|
+
```python
|
675
|
+
from zeusdb_vector_database import VectorDatabase
|
676
|
+
import numpy as np
|
677
|
+
|
678
|
+
# Create index with product quantization
|
679
|
+
vdb = VectorDatabase()
|
680
|
+
|
681
|
+
# Configure quantization for memory efficiency
|
682
|
+
quantization_config = {
|
683
|
+
'type': 'pq', # `pq` for Product Quantization
|
684
|
+
'subvectors': 8, # Divide 1536-dim vectors into 8 subvectors of 192 dims each
|
685
|
+
'bits': 8, # 256 centroids per subvector (2^8)
|
686
|
+
'training_size': 10000, # Train when 10k vectors are collected
|
687
|
+
'max_training_vectors': 50000, # Use max 50k vectors for training
|
688
|
+
'storage_mode': 'quantized_only' # Explicitly set storage mode to only keep quantized values
|
689
|
+
}
|
690
|
+
|
691
|
+
# Create index with quantization
|
692
|
+
# This will automatically handle training when enough vectors are added
|
693
|
+
index = vdb.create(
|
694
|
+
index_type="hnsw",
|
695
|
+
dim=3072, # OpenAI `text-embedding-3-large` dimension
|
696
|
+
quantization_config=quantization_config # Add the compression configuration
|
697
|
+
)
|
698
|
+
|
699
|
+
```
|
700
|
+
|
701
|
+
<br />
|
702
|
+
|
703
|
+
### ⚙️ Configuration Guidelines
|
704
|
+
|
705
|
+
For Balanced Memory & Accuracy (Recommended to start with)
|
706
|
+
```python
|
707
|
+
quantization_config = {
|
708
|
+
'type': 'pq',
|
709
|
+
'subvectors': 8, # Balanced: moderate compression, good accuracy
|
710
|
+
'bits': 8, # 256 centroids per subvector (high precision)
|
711
|
+
'training_size': 10000, # Or higher for large datasets
|
712
|
+
'storage_mode': 'quantized_only' # Default, memory efficient
|
713
|
+
}
|
714
|
+
# Achieves ~16x–32x compression with strong recall for most applications
|
715
|
+
```
|
716
|
+
|
717
|
+
|
718
|
+
For Memory Optimization:
|
719
|
+
```python
|
720
|
+
quantization_config = {
|
721
|
+
'type': 'pq',
|
722
|
+
'subvectors': 16, # More subvectors = better compression
|
723
|
+
'bits': 6, # Fewer bits = less memory per centroid
|
724
|
+
'training_size': 20000,
|
725
|
+
'storage_mode': 'quantized_only'
|
726
|
+
}
|
727
|
+
# Achieves ~32x compression ratio
|
728
|
+
```
|
729
|
+
|
730
|
+
For Accuracy Optimization:
|
731
|
+
```python
|
732
|
+
quantization_config = {
|
733
|
+
'type': 'pq',
|
734
|
+
'subvectors': 4, # Fewer subvectors = better accuracy
|
735
|
+
'bits': 8, # More bits = more precise quantization
|
736
|
+
'training_size': 50000 # More training data = better centroids
|
737
|
+
'storage_mode': 'quantized_with_raw' # Keep raw vectors for exact recall
|
738
|
+
}
|
739
|
+
# Achieves ~4x compression ratio with minimal accuracy loss
|
740
|
+
```
|
741
|
+
|
742
|
+
### 📊 Performance Characteristics
|
743
|
+
|
744
|
+
- Training: Occurs once when threshold is reached (typically 1-5 minutes for 50k vectors)
|
745
|
+
- Memory Reduction: 4x-256x depending on configuration
|
746
|
+
- Search Speed: Comparable or faster than raw vectors due to ADC optimization
|
747
|
+
- Accuracy Impact: Typically 1-5% recall reduction with proper tuning
|
748
|
+
|
749
|
+
Quantization is ideal for production deployments with large vector datasets (100k+ vectors) where memory efficiency is critical.
|
750
|
+
|
751
|
+
`"quantized_only"` is recommended for most use cases and maximizes memory savings.
|
752
|
+
|
753
|
+
`"quantized_with_raw"` keeps both quantized and raw vectors for exact reconstruction, but uses more memory.
|
754
|
+
|
558
755
|
|
559
756
|
<br/>
|
560
757
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
zeusdb_vector_database-0.2.1.dist-info/METADATA,sha256=OmbOu55sPIUBfAUtO2Wz1UksOrOQPw7MhFvhH1IO4VE,31415
|
2
|
+
zeusdb_vector_database-0.2.1.dist-info/WHEEL,sha256=1KLx1bwTImE5-jtZSbxSob3xd9PdtxkVWD0RVE5Y4y8,107
|
3
|
+
zeusdb_vector_database-0.2.1.dist-info/licenses/LICENSE,sha256=82Hi3E_KqpDOBk00HrY6fGiErqL3QJquGQ6dUu9wJzE,11336
|
4
|
+
zeusdb_vector_database-0.2.1.dist-info/licenses/NOTICE,sha256=GDGZ9V3p4Uvaj-1RT9Pbeczps-rSeZz8q8wSxb_Q13o,971
|
5
|
+
zeusdb_vector_database.libs/libgcc_s-5b5488a6.so.1,sha256=HGKUsVmTeNAxEdSy7Ua5Vh_I9FN3RCbPWzvZ7H_TrwE,2749061
|
6
|
+
zeusdb_vector_database/__init__.py,sha256=ywLk8n8oB_zUCZCuFZAfAIBc7vh3sQ3EJA1YEo7oVQw,202
|
7
|
+
zeusdb_vector_database/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
zeusdb_vector_database/vector_database.py,sha256=x7Log7dtdsldHCc9ogRTzi7reEqJ1e-5HG1ABo1Py14,15102
|
9
|
+
zeusdb_vector_database/zeusdb_vector_database.cpython-311-arm-linux-musleabihf.so,sha256=MBq09FgtGwml4dnBUXNZiJDENfO0gFTNVnZaatTA31I,4563157
|
10
|
+
zeusdb_vector_database-0.2.1.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
zeusdb_vector_database-0.1.2.dist-info/METADATA,sha256=0bduX_0CGyHp8S6Yl0k1Cd6ELU9XlnoZR6zg-OKTMq4,23893
|
2
|
-
zeusdb_vector_database-0.1.2.dist-info/WHEEL,sha256=A0NA77PxChGQFi1IYP9OCFEqCQA-RdpKrrZdN1LBaAI,107
|
3
|
-
zeusdb_vector_database-0.1.2.dist-info/licenses/LICENSE,sha256=82Hi3E_KqpDOBk00HrY6fGiErqL3QJquGQ6dUu9wJzE,11336
|
4
|
-
zeusdb_vector_database-0.1.2.dist-info/licenses/NOTICE,sha256=GDGZ9V3p4Uvaj-1RT9Pbeczps-rSeZz8q8wSxb_Q13o,971
|
5
|
-
zeusdb_vector_database.libs/libgcc_s-5b5488a6.so.1,sha256=HGKUsVmTeNAxEdSy7Ua5Vh_I9FN3RCbPWzvZ7H_TrwE,2749061
|
6
|
-
zeusdb_vector_database/__init__.py,sha256=k6Jt99gmCjTCce4CfzFmZcFt1JbICHK4ya5KDJwN3js,202
|
7
|
-
zeusdb_vector_database/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
zeusdb_vector_database/vector_database.py,sha256=7Gs9FiasO5suqc3E8mxxtW1ORIXBh-8AXvp0pQo1VYc,3463
|
9
|
-
zeusdb_vector_database/zeusdb_vector_database.cpython-311-arm-linux-musleabihf.so,sha256=9rCO1w_S051JnCMRbzsjsULbVwLMj7aLdIMb1btwlVU,4391109
|
10
|
-
zeusdb_vector_database-0.1.2.dist-info/RECORD,,
|
{zeusdb_vector_database-0.1.2.dist-info → zeusdb_vector_database-0.2.1.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
{zeusdb_vector_database-0.1.2.dist-info → zeusdb_vector_database-0.2.1.dist-info}/licenses/NOTICE
RENAMED
File without changes
|