zeusdb-vector-database 0.1.2__cp311-cp311-macosx_11_0_arm64.whl → 0.2.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  """
2
2
  ZeusDB Vector Database Module
3
3
  """
4
- __version__ = "0.1.2"
4
+ __version__ = "0.2.0"
5
5
 
6
6
  from .vector_database import VectorDatabase # imports the VectorDatabase class from the vector_database.py file
7
7
 
@@ -1,16 +1,24 @@
1
1
  """
2
2
  vector_database.py
3
3
 
4
- Factory for creating vector indexes with support for multiple types.
4
+ Factory for creating vector indexes with support for multiple types and quantization.
5
5
  Currently supports HNSW (Hierarchical Navigable Small World) with extensible design.
6
6
  """
7
- from typing import Callable, Dict, Any
7
+ from typing import Callable, Dict, Any, Optional, TypedDict
8
8
  from .zeusdb_vector_database import HNSWIndex
9
9
  # from .zeusdb_vector_database import HNSWIndex, IVFIndex, LSHIndex, AnnoyIndex, FlatIndex # Future support planned
10
10
 
11
+ class MemoryInfo(TypedDict):
12
+ """Type definition for quantization memory information."""
13
+ centroid_storage_mb: float
14
+ compression_ratio: float
15
+ centroids_per_subvector: int
16
+ total_centroids: int
17
+ calculated_training_size: int
18
+
11
19
  class VectorDatabase:
12
20
  """
13
- Factory for creating various types of vector indexes.
21
+ Factory for creating various types of vector indexes with optional quantization.
14
22
  Each index type is registered via _index_constructors.
15
23
  """
16
24
 
@@ -26,12 +34,13 @@ class VectorDatabase:
26
34
  """Initialize the vector database factory."""
27
35
  pass
28
36
 
29
- def create(self, index_type: str = "hnsw", **kwargs) -> Any:
37
+ def create(self, index_type: str = "hnsw", quantization_config: Optional[Dict[str, Any]] = None, **kwargs) -> Any:
30
38
  """
31
- Create a vector index of the specified type.
39
+ Create a vector index of the specified type with optional quantization.
32
40
 
33
41
  Args:
34
42
  index_type: The type of index to create (case-insensitive: "hnsw", "ivf", etc.)
43
+ quantization_config: Optional quantization configuration dictionary
35
44
  **kwargs: Parameters specific to the chosen index type (validated by Rust backend)
36
45
 
37
46
  For "hnsw", supported parameters are:
@@ -41,22 +50,55 @@ class VectorDatabase:
41
50
  - ef_construction (int): Construction candidate list size (default: 200)
42
51
  - expected_size (int): Expected number of vectors (default: 10000)
43
52
 
53
+ Quantization config format:
54
+ {
55
+ 'type': 'pq', # Currently only 'pq' (Product Quantization) supported
56
+ 'subvectors': 8, # Number of subvectors (must divide dim evenly, default: 8)
57
+ 'bits': 8, # Bits per subvector (1-8, controls centroids, default: 8)
58
+ 'training_size': None, # Auto-calculated based on subvectors & bits (or specify manually)
59
+ 'max_training_vectors': None # Optional limit on training vectors used
60
+ }
61
+
62
+ Note: Quantization reduces memory usage (typically 4-32x compression) but may
63
+ slightly degrade recall accuracy. Training triggers automatically on the first
64
+ .add() call that reaches the training_size threshold.
65
+
44
66
  Returns:
45
67
  An instance of the created vector index.
46
68
 
47
69
  Examples:
48
- # HNSW index with defaults
70
+ # HNSW index with defaults (no quantization)
49
71
  vdb = VectorDatabase()
50
72
  index = vdb.create("hnsw", dim=1536)
51
73
 
52
- # HNSW index with custom parameters
53
- index = vdb.create("hnsw", dim=768, m=16, ef_construction=200, space="cosine", expected_size=10000)
74
+ # HNSW index with Product Quantization (auto-calculated training size)
75
+ quantization_config = {
76
+ 'type': 'pq',
77
+ 'subvectors': 8,
78
+ 'bits': 8
79
+ }
80
+ index = vdb.create(
81
+ index_type="hnsw",
82
+ dim=1536,
83
+ quantization_config=quantization_config
84
+ )
54
85
 
55
- # Future IVF index
56
- # index = vdb.create("ivf", dim=1536, nlist=100, nprobe=10)
86
+ # Memory-optimized configuration with manual training size
87
+ memory_optimized_config = {
88
+ 'type': 'pq',
89
+ 'subvectors': 16, # More subvectors = better compression
90
+ 'bits': 6, # Fewer bits = less memory per centroid
91
+ 'training_size': 75000 # Override auto-calculation
92
+ }
93
+ index = vdb.create(
94
+ index_type="hnsw",
95
+ dim=1536,
96
+ quantization_config=memory_optimized_config,
97
+ expected_size=1000000 # Large dataset
98
+ )
57
99
 
58
100
  Raises:
59
- ValueError: If index_type is not supported.
101
+ ValueError: If index_type is not supported or quantization config is invalid.
60
102
  RuntimeError: If index creation fails due to backend validation.
61
103
  """
62
104
  index_type = (index_type or "").strip().lower()
@@ -65,9 +107,16 @@ class VectorDatabase:
65
107
  available = ', '.join(sorted(self._index_constructors.keys()))
66
108
  raise ValueError(f"Unknown index type '{index_type}'. Available: {available}")
67
109
 
110
+ # Centralize dim early to ensure consistency
111
+ dim = kwargs.get('dim', 1536)
112
+
113
+ # Validate and process quantization config
114
+ if quantization_config is not None:
115
+ quantization_config = self._validate_quantization_config(quantization_config, dim)
116
+
68
117
  # Apply index-specific defaults
69
118
  if index_type == "hnsw":
70
- kwargs.setdefault("dim", 1536)
119
+ kwargs.setdefault("dim", dim)
71
120
  kwargs.setdefault("space", "cosine")
72
121
  kwargs.setdefault("m", 16)
73
122
  kwargs.setdefault("ef_construction", 200)
@@ -76,11 +125,188 @@ class VectorDatabase:
76
125
  constructor = self._index_constructors[index_type]
77
126
 
78
127
  try:
79
- return constructor(**kwargs)
128
+ # Always pass quantization_config parameter
129
+ clean_config = None
130
+ if quantization_config is not None:
131
+ # Clean quantization_config before passing to Rust (remove internal keys)
132
+ clean_config = {k: v for k, v in quantization_config.items() if not k.startswith('_')}
133
+
134
+ return constructor(quantization_config=clean_config, **kwargs)
80
135
  except Exception as e:
81
136
  raise RuntimeError(f"Failed to create {index_type.upper()} index: {e}") from e
137
+
138
+
139
+
140
+ def _validate_quantization_config(self, config: Dict[str, Any], dim: int) -> Dict[str, Any]:
141
+ """
142
+ Validate and normalize quantization configuration.
143
+
144
+ Args:
145
+ config: Raw quantization configuration
146
+ dim: Vector dimension for validation
147
+
148
+ Returns:
149
+ Validated and normalized configuration
150
+
151
+ Raises:
152
+ ValueError: If configuration is invalid
153
+ """
154
+ if not isinstance(config, dict):
155
+ raise ValueError("quantization_config must be a dictionary")
156
+
157
+ # Create a copy to avoid modifying the original
158
+ validated_config = config.copy()
159
+
160
+ # Validate quantization type
161
+ qtype = validated_config.get('type', '').lower()
162
+ if qtype != 'pq':
163
+ raise ValueError(f"Unsupported quantization type: '{qtype}'. Currently only 'pq' is supported.")
164
+
165
+ validated_config['type'] = 'pq'
166
+
167
+ # Validate subvectors
168
+ subvectors = validated_config.get('subvectors', 8)
169
+ if not isinstance(subvectors, int) or subvectors <= 0:
170
+ raise ValueError(f"subvectors must be a positive integer, got {subvectors}")
171
+
172
+ if dim % subvectors != 0:
173
+ raise ValueError(
174
+ f"subvectors ({subvectors}) must divide dimension ({dim}) evenly. "
175
+ f"Consider using subvectors: {self._suggest_subvector_divisors(dim)}"
176
+ )
177
+
178
+ if subvectors > dim:
179
+ raise ValueError(f"subvectors ({subvectors}) cannot exceed dimension ({dim})")
180
+
181
+ validated_config['subvectors'] = subvectors
182
+
183
+ # Validate bits per subvector
184
+ bits = validated_config.get('bits', 8)
185
+ if not isinstance(bits, int) or bits < 1 or bits > 8:
186
+ raise ValueError(f"bits must be an integer between 1 and 8, got {bits}")
187
+
188
+ validated_config['bits'] = bits
189
+
190
+ # Calculate smart training size if not provided
191
+ training_size = validated_config.get('training_size')
192
+ if training_size is None:
193
+ training_size = self._calculate_smart_training_size(subvectors, bits)
194
+ else:
195
+ if not isinstance(training_size, int) or training_size < 1000:
196
+ raise ValueError(f"training_size must be at least 1000 for stable k-means clustering, got {training_size}")
197
+
198
+ validated_config['training_size'] = training_size
199
+
200
+ # Validate max training vectors if provided
201
+ max_training_vectors = validated_config.get('max_training_vectors')
202
+ if max_training_vectors is not None:
203
+ if not isinstance(max_training_vectors, int) or max_training_vectors < training_size:
204
+ raise ValueError(
205
+ f"max_training_vectors ({max_training_vectors}) must be >= training_size ({training_size})"
206
+ )
207
+ validated_config['max_training_vectors'] = max_training_vectors
208
+
209
+ # Calculate and warn about memory usage
210
+ self._check_memory_usage(validated_config, dim)
211
+
212
+ return validated_config
213
+
214
+ def _calculate_smart_training_size(self, subvectors: int, bits: int) -> int:
215
+ """
216
+ Calculate optimal training size based on quantization parameters.
217
+
218
+ Args:
219
+ subvectors: Number of subvectors
220
+ bits: Bits per subvector
82
221
 
222
+ Returns:
223
+ Recommended training size for stable k-means clustering
224
+ """
225
+ # Statistical requirement: need enough samples per centroid for stable clustering
226
+ # Training is done per subvector, so we need (2^bits * min_samples) total
227
+ centroids_per_subvector = 2 ** bits
228
+ min_samples_per_centroid = 20 # Statistical guideline for k-means stability
229
+
230
+ # Calculate minimum samples needed for stable clustering across all subvectors
231
+ statistical_minimum = centroids_per_subvector * min_samples_per_centroid
232
+
233
+ # Practical bounds
234
+ reasonable_minimum = 10000 # Always need at least this for diversity
235
+ reasonable_maximum = 200000 # Diminishing returns beyond this point
236
+
237
+ return min(max(statistical_minimum, reasonable_minimum), reasonable_maximum)
238
+
239
+ def _suggest_subvector_divisors(self, dim: int) -> str:
240
+ """Suggest valid subvector counts that divide the dimension evenly."""
241
+ divisors = []
242
+ for i in range(1, min(33, dim + 1)): # Common subvector counts up to 32
243
+ if dim % i == 0:
244
+ divisors.append(str(i))
245
+ return ', '.join(divisors[:8]) # Show first 8 suggestions
246
+
247
+ def _check_memory_usage(self, config: Dict[str, Any], dim: int) -> None:
248
+ """
249
+ Calculate and warn about memory usage for the quantization configuration.
250
+
251
+ Args:
252
+ config: Validated quantization configuration
253
+ dim: Vector dimension
254
+ """
255
+ subvectors = config['subvectors']
256
+ bits = config['bits']
257
+ sub_dim = dim // subvectors
258
+
259
+ # Calculate centroid storage requirements
260
+ num_centroids_per_subvector = 2 ** bits
261
+ total_centroids = subvectors * num_centroids_per_subvector
262
+ centroid_memory_mb = (total_centroids * sub_dim * 4) / (1024 * 1024) # 4 bytes per float32
263
+
264
+ # Calculate compression ratio
265
+ original_bytes_per_vector = dim * 4 # float32
266
+ compressed_bytes_per_vector = subvectors # 1 byte per subvector code
267
+ compression_ratio = original_bytes_per_vector / compressed_bytes_per_vector
268
+
269
+ # Add memory info to config for user reference (internal)
270
+ memory_info: MemoryInfo = {
271
+ 'centroid_storage_mb': round(centroid_memory_mb, 2),
272
+ 'compression_ratio': round(compression_ratio, 1),
273
+ 'centroids_per_subvector': num_centroids_per_subvector,
274
+ 'total_centroids': total_centroids,
275
+ 'calculated_training_size': config['training_size']
276
+ }
277
+ config['__memory_info__'] = memory_info
278
+ # Warn about large memory usage
279
+ if centroid_memory_mb > 100:
280
+ import warnings
281
+ warnings.warn(
282
+ f"Large centroid storage required: {centroid_memory_mb:.1f}MB. "
283
+ f"Consider reducing bits ({bits}) or subvectors ({subvectors}) for memory efficiency.",
284
+ UserWarning,
285
+ stacklevel=2
286
+ )
287
+
288
+ # Warn about low compression
289
+ if compression_ratio < 4:
290
+ import warnings
291
+ warnings.warn(
292
+ f"Low compression ratio: {compression_ratio:.1f}x. "
293
+ f"Consider increasing subvectors ({subvectors}) or reducing bits ({bits}) for better compression.",
294
+ UserWarning,
295
+ stacklevel=2
296
+ )
297
+
298
+ # Warn about extremely high compression
299
+ if compression_ratio > 50:
300
+ import warnings
301
+ warnings.warn(
302
+ f"Very high compression ratio: {compression_ratio:.1f}x may significantly impact recall quality. "
303
+ f"Consider reducing subvectors ({subvectors}) or increasing bits ({bits}) for better accuracy.",
304
+ UserWarning,
305
+ stacklevel=2
306
+ )
307
+
83
308
  @classmethod
84
309
  def available_index_types(cls) -> list[str]:
85
310
  """Return list of all supported index types."""
86
311
  return sorted(cls._index_constructors.keys())
312
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zeusdb-vector-database
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Requires-Dist: numpy>=2.2.6,<3.0.0
@@ -11,7 +11,7 @@ License-File: LICENSE
11
11
  License-File: NOTICE
12
12
  Summary: Blazing-fast vector DB with real-time similarity search and metadata filtering.
13
13
  Author-email: ZeusDB <contact@zeusdb.com>
14
- License: Apache-2.0
14
+ License-Expression: Apache-2.0
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
17
17
  Project-URL: Repository, https://github.com/zeusdb/zeusdb-vector-database
@@ -56,22 +56,30 @@ ZeusDB leverages the HNSW (Hierarchical Navigable Small World) algorithm for spe
56
56
 
57
57
  ## ⭐ Features
58
58
 
59
- 🔍 Approximate Nearest Neighbor (ANN) search with HNSW
59
+ 🐍 User-friendly Python API for adding vectors and running similarity searches
60
60
 
61
- 📋 Supports multiple distance metrics: `cosine`, `L1`, `L2`
61
+ 🔥 High-performance Rust backend optimized for speed and concurrency
62
62
 
63
- 🔥 High-performance Rust backend
63
+ 🔍 Approximate Nearest Neighbor (ANN) search using HNSW for fast, accurate results
64
+
65
+ 📦 Product Quantization (PQ) for compact storage, faster distance computations, and scalability for Big Data
66
+
67
+ 📥 Flexible input formats, including native Python types and zero-copy NumPy arrays
68
+
69
+ 🗂️ Metadata-aware filtering for precise and contextual querying
64
70
 
65
- 📥 Supports multiple input formats using a single, easy-to-use Python method
66
71
 
67
- 🗂️ Metadata-aware filtering at query time
68
72
 
69
- 🐍 Simple and intuitive Python API
73
+
74
+ <!--
75
+ 📋 Supports multiple distance metrics: `cosine`, `L1`, `L2`
76
+
77
+ 📥 Supports multiple input formats using a single, easy-to-use Python method
70
78
 
71
79
  ⚡ Smart multi-threaded inserts that automatically speed up large batch uploads
72
80
 
73
81
  🚀 Fast, concurrent searches so you can run multiple queries at the same time
74
-
82
+ -->
75
83
 
76
84
  <br/>
77
85
 
@@ -215,10 +223,11 @@ index = vdb.create(
215
223
  |------------------|--------|-----------|-----------------------------------------------------------------------------|
216
224
  | `index_type` | `str` | `"hnsw"` | The type of vector index to create. Currently supports `"hnsw"`. Future options include `"ivf"`, `"flat"`, etc. Case-insensitive. |
217
225
  | `dim` | `int` | `1536` | Dimensionality of the vectors to be indexed. Each vector must have this length. The default dim=1536 is chosen to match the output dimensionality of OpenAI’s text-embedding-ada-002 model. |
218
- | `space` | `str` | `"cosine"`| Distance metric used for similarity search. Options include `"cosine"`. Additional metrics such as `"l2"`, and `"dot"` will be added in future versions. |
226
+ | `space` | `str` | `"cosine"`| Distance metric used for similarity search. Options include `"cosine"`, `"L1"` and `"L2"`.|
219
227
  | `m` | `int` | `16` | Number of bi-directional connections created for each new node. Higher `m` improves recall but increases index size and build time. |
220
228
  | `ef_construction`| `int` | `200` | Size of the dynamic list used during index construction. Larger values increase indexing time and memory, but improve quality. |
221
229
  | `expected_size` | `int` | `10000` | Estimated number of elements to be inserted. Used for preallocating internal data structures. Not a hard limit. |
230
+ | `quantization_config` | `dict` | `None` | Product Quantization configuration for memory-efficient vector compression. |
222
231
 
223
232
  <br/>
224
233
 
@@ -411,7 +420,7 @@ results = index.search(vector=query_vector, top_k=3)
411
420
  print(results)
412
421
  ```
413
422
 
414
- #### 🔍 Search Example 6 - Batch Search with with metadata filter
423
+ #### 🔍 Search Example 6 - Batch Search with metadata filter
415
424
 
416
425
  Performs similarity search on multiple query vectors with metadata filtering, returning filtered results for each query.
417
426
 
@@ -555,6 +564,156 @@ print(partial)
555
564
 
556
565
  ⚠️ `get_records()` only returns results for IDs that exist in the index. Missing IDs are silently skipped.
557
566
 
567
+ <br />
568
+
569
+
570
+ ## 🗜️ Product Quantization
571
+
572
+ Product Quantization (PQ) is a vector compression technique that significantly reduces memory usage while preserving high search accuracy. Commonly used in HNSW-based vector databases, PQ works by dividing each vector into subvectors and quantizing them independently. This enables compression ratios of 4× to 256×, making it ideal for large-scale, high-dimensional datasets.
573
+
574
+ ZeusDB Vector Database’s PQ implementation features:
575
+
576
+ ✅ Intelligent Training – PQ model trains automatically at defined thresholds
577
+
578
+ ✅ Efficient Memory Use – Store 4× to 256× more vectors in the same RAM footprint
579
+
580
+ ✅ Fast Approximate Search – Uses Asymmetric Distance Computation (ADC) for high-speed search computation
581
+
582
+ ✅ Seamless Operation – Index automatically switches from raw to quantized storage modes
583
+
584
+ <br />
585
+
586
+ ### 📘 Quantization Configuration Parameters
587
+
588
+ To enable PQ, pass a `quantization_config` dictionary to the `.create()` index method:
589
+
590
+ | Parameter | Type | Description | Valid Range | Default |
591
+ |-----------|------|-------------|-------------|---------|
592
+ | `type` | `str` | Quantization algorithm type | `"pq"` | *required* |
593
+ | `subvectors` | `int` | Number of vector subspaces (must divide dimension evenly) | 1 to dimension | `8` |
594
+ | `bits` | `int` | Bits per quantized code (controls centroids per subvector) | 1-8 | `8` |
595
+ | `training_size` | `int` | Minimum vectors needed for stable k-means clustering | ≥ 1000 | 1000 |
596
+ | `max_training_vectors` | `int` | Maximum vectors used during training (optional limit) | ≥ training_size | `None` |
597
+
598
+
599
+ <br/>
600
+
601
+
602
+ ### 🔧 Usage Example
603
+
604
+ ```python
605
+ from zeusdb_vector_database import VectorDatabase
606
+ import numpy as np
607
+
608
+ # Create index with product quantization
609
+ vdb = VectorDatabase()
610
+
611
+ # Configure quantization for memory efficiency
612
+ quantization_config = {
613
+ 'type': 'pq', # `pq` for Product Quantization
614
+ 'subvectors': 8, # Divide 1536-dim vectors into 8 subvectors of 192 dims each
615
+ 'bits': 8, # 256 centroids per subvector (2^8)
616
+ 'training_size': 10000, # Train when 10k vectors are collected
617
+ 'max_training_vectors': 50000 # Use max 50k vectors for training
618
+ }
619
+
620
+ # Create index with quantization
621
+ # This will automatically handle training when enough vectors are added
622
+ index = vdb.create(
623
+ index_type="hnsw",
624
+ dim=1536, # OpenAI `text-embedding-3-small` dimension
625
+ quantization_config=quantization_config # Add the compression configuration
626
+ )
627
+
628
+ # Add vectors - training triggers automatically at threshold
629
+ documents = [
630
+ {
631
+ "id": f"doc_{i}",
632
+ "values": np.random.rand(1536).astype(float).tolist(),
633
+ "metadata": {"category": "tech", "year": 2026}
634
+ }
635
+ for i in range(15000)
636
+ ]
637
+
638
+ # Training will trigger automatically when 10k vectors are added
639
+ result = index.add(documents)
640
+ print(f"Added {result.total_inserted} vectors")
641
+
642
+ # Check quantization status
643
+ print(f"Training progress: {index.get_training_progress():.1f}%")
644
+ print(f"Storage mode: {index.get_storage_mode()}")
645
+ print(f"Is quantized: {index.is_quantized()}")
646
+
647
+ # Get compression statistics
648
+ quant_info = index.get_quantization_info()
649
+ if quant_info:
650
+ print(f"Compression ratio: {quant_info['compression_ratio']:.1f}x")
651
+ print(f"Memory usage: {quant_info['memory_mb']:.1f} MB")
652
+
653
+ # Search works seamlessly with quantized storage
654
+ query_vector = np.random.rand(1536).astype(float).tolist()
655
+ results = index.search(vector=query_vector, top_k=3)
656
+
657
+ # Simply print raw results
658
+ print(results)
659
+ ```
660
+
661
+ Results
662
+ ```python
663
+ [
664
+ {'id': 'doc_9719', 'score': 0.5133496522903442, 'metadata': {'category': 'tech', 'year': 2026}},
665
+ {'id': 'doc_8148', 'score': 0.5139288306236267, 'metadata': {'category': 'tech', 'year': 2026}},
666
+ {'id': 'doc_7822', 'score': 0.5151920914649963, 'metadata': {'category': 'tech', 'year': 2026}},
667
+ ]
668
+ ```
669
+
670
+ <br />
671
+
672
+ ### ⚙️ Configuration Guidelines
673
+
674
+ For Balanced Memory & Accuracy (Recommended to start with)
675
+ ```python
676
+ quantization_config = {
677
+ 'type': 'pq',
678
+ 'subvectors': 8, # Balanced: moderate compression, good accuracy
679
+ 'bits': 8, # 256 centroids per subvector (high precision)
680
+ 'training_size': 10000 # Or higher for large datasets
681
+ }
682
+ # Achieves ~16x–32x compression with strong recall for most applications
683
+ ```
684
+
685
+
686
+ For Memory Optimization:
687
+ ```python
688
+ quantization_config = {
689
+ 'type': 'pq',
690
+ 'subvectors': 16, # More subvectors = better compression
691
+ 'bits': 6, # Fewer bits = less memory per centroid
692
+ 'training_size': 20000
693
+ }
694
+ # Achieves ~32x compression ratio
695
+ ```
696
+
697
+ For Accuracy Optimization:
698
+ ```python
699
+ quantization_config = {
700
+ 'type': 'pq',
701
+ 'subvectors': 4, # Fewer subvectors = better accuracy
702
+ 'bits': 8, # More bits = more precise quantization
703
+ 'training_size': 50000 # More training data = better centroids
704
+ }
705
+ # Achieves ~4x compression ratio with minimal accuracy loss
706
+ ```
707
+
708
+ ### 📊 Performance Characteristics
709
+
710
+ - Training: Occurs once when threshold is reached (typically 1-5 minutes for 50k vectors)
711
+ - Memory Reduction: 4x-256x depending on configuration
712
+ - Search Speed: Comparable or faster than raw vectors due to ADC optimization
713
+ - Accuracy Impact: Typically 1-5% recall reduction with proper tuning
714
+
715
+ Quantization is ideal for production deployments with large vector datasets (100k+ vectors) where memory efficiency is critical.
716
+
558
717
 
559
718
  <br/>
560
719
 
@@ -0,0 +1,9 @@
1
+ zeusdb_vector_database-0.2.0.dist-info/METADATA,sha256=lWikG9QZDEEMRYY1zh45XBiTL7A8Mh4FTJ_cVN7TNfE,29736
2
+ zeusdb_vector_database-0.2.0.dist-info/WHEEL,sha256=ZfdBJytWHFi2WZ1f_KKHwROhhQEId-N1oZJ4ZBt_MTs,104
3
+ zeusdb_vector_database-0.2.0.dist-info/licenses/LICENSE,sha256=82Hi3E_KqpDOBk00HrY6fGiErqL3QJquGQ6dUu9wJzE,11336
4
+ zeusdb_vector_database-0.2.0.dist-info/licenses/NOTICE,sha256=GDGZ9V3p4Uvaj-1RT9Pbeczps-rSeZz8q8wSxb_Q13o,971
5
+ zeusdb_vector_database/__init__.py,sha256=w2XfHqGQVShv1zDZfvLY-sSw4zzV1kdAFLjSCdGWlkU,202
6
+ zeusdb_vector_database/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ zeusdb_vector_database/vector_database.py,sha256=n1fiDvzpyyvLhOOaLfQSge9p4uhm8YWjX6i8m-rYoRI,13581
8
+ zeusdb_vector_database/zeusdb_vector_database.cpython-311-darwin.so,sha256=qpthvkSYoXklnMg6VnYFNtX8cb-gCxxb7n09WhH9jjM,4116096
9
+ zeusdb_vector_database-0.2.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.9.1)
2
+ Generator: maturin (1.9.2)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp311-cp311-macosx_11_0_arm64
@@ -1,9 +0,0 @@
1
- zeusdb_vector_database-0.1.2.dist-info/METADATA,sha256=0bduX_0CGyHp8S6Yl0k1Cd6ELU9XlnoZR6zg-OKTMq4,23893
2
- zeusdb_vector_database-0.1.2.dist-info/WHEEL,sha256=4POUqOUvk-fNEqEa1NBlmMsgWQGl6FnEg9vsbsvEmNM,104
3
- zeusdb_vector_database-0.1.2.dist-info/licenses/LICENSE,sha256=82Hi3E_KqpDOBk00HrY6fGiErqL3QJquGQ6dUu9wJzE,11336
4
- zeusdb_vector_database-0.1.2.dist-info/licenses/NOTICE,sha256=GDGZ9V3p4Uvaj-1RT9Pbeczps-rSeZz8q8wSxb_Q13o,971
5
- zeusdb_vector_database/__init__.py,sha256=k6Jt99gmCjTCce4CfzFmZcFt1JbICHK4ya5KDJwN3js,202
6
- zeusdb_vector_database/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- zeusdb_vector_database/vector_database.py,sha256=7Gs9FiasO5suqc3E8mxxtW1ORIXBh-8AXvp0pQo1VYc,3463
8
- zeusdb_vector_database/zeusdb_vector_database.cpython-311-darwin.so,sha256=WxjyirwYhfOWZnt6WySKvkkYUjgnv2UGpUZyvlT-17I,3985776
9
- zeusdb_vector_database-0.1.2.dist-info/RECORD,,