zeusdb-vector-database 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/PKG-INFO +42 -4
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/README.md +41 -3
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/pyproject.toml +1 -1
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/src/zeusdb_vector_database/__init__.py +1 -1
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/src/zeusdb_vector_database/vector_database.py +47 -14
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/vdb-core/Cargo.lock +37 -4
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/vdb-core/Cargo.toml +2 -1
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/vdb-core/src/hnsw_index.rs +375 -23
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/LICENSE +0 -0
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/NOTICE +0 -0
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/src/zeusdb_vector_database/py.typed +0 -0
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/vdb-core/src/lib.rs +0 -0
- {zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/vdb-core/src/pq.rs +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: zeusdb-vector-database
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.1
|
4
4
|
Classifier: Programming Language :: Rust
|
5
5
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
6
6
|
Requires-Dist: numpy>=2.2.6,<3.0.0
|
@@ -594,12 +594,13 @@ To enable PQ, pass a `quantization_config` dictionary to the `.create()` index m
|
|
594
594
|
| `bits` | `int` | Bits per quantized code (controls centroids per subvector) | 1-8 | `8` |
|
595
595
|
| `training_size` | `int` | Minimum vectors needed for stable k-means clustering | ≥ 1000 | 1000 |
|
596
596
|
| `max_training_vectors` | `int` | Maximum vectors used during training (optional limit) | ≥ training_size | `None` |
|
597
|
+
| `storage_mode` | `str` | Storage strategy: "quantized_only" (memory optimized) or "quantized_with_raw" (keep raw vectors for exact reconstruction) | "quantized_only", "quantized_with_raw" | `"quantized_only"` |
|
597
598
|
|
598
599
|
|
599
600
|
<br/>
|
600
601
|
|
601
602
|
|
602
|
-
### 🔧 Usage Example
|
603
|
+
### 🔧 Usage Example 1
|
603
604
|
|
604
605
|
```python
|
605
606
|
from zeusdb_vector_database import VectorDatabase
|
@@ -665,6 +666,36 @@ Results
|
|
665
666
|
{'id': 'doc_8148', 'score': 0.5139288306236267, 'metadata': {'category': 'tech', 'year': 2026}},
|
666
667
|
{'id': 'doc_7822', 'score': 0.5151920914649963, 'metadata': {'category': 'tech', 'year': 2026}},
|
667
668
|
]
|
669
|
+
```
|
670
|
+
<br />
|
671
|
+
|
672
|
+
### 🔧 Usage Example 2 - with explicit storage mode
|
673
|
+
|
674
|
+
```python
|
675
|
+
from zeusdb_vector_database import VectorDatabase
|
676
|
+
import numpy as np
|
677
|
+
|
678
|
+
# Create index with product quantization
|
679
|
+
vdb = VectorDatabase()
|
680
|
+
|
681
|
+
# Configure quantization for memory efficiency
|
682
|
+
quantization_config = {
|
683
|
+
'type': 'pq', # `pq` for Product Quantization
|
684
|
+
'subvectors': 8, # Divide 1536-dim vectors into 8 subvectors of 192 dims each
|
685
|
+
'bits': 8, # 256 centroids per subvector (2^8)
|
686
|
+
'training_size': 10000, # Train when 10k vectors are collected
|
687
|
+
'max_training_vectors': 50000, # Use max 50k vectors for training
|
688
|
+
'storage_mode': 'quantized_only' # Explicitly set storage mode to only keep quantized values
|
689
|
+
}
|
690
|
+
|
691
|
+
# Create index with quantization
|
692
|
+
# This will automatically handle training when enough vectors are added
|
693
|
+
index = vdb.create(
|
694
|
+
index_type="hnsw",
|
695
|
+
dim=3072, # OpenAI `text-embedding-3-large` dimension
|
696
|
+
quantization_config=quantization_config # Add the compression configuration
|
697
|
+
)
|
698
|
+
|
668
699
|
```
|
669
700
|
|
670
701
|
<br />
|
@@ -677,7 +708,8 @@ quantization_config = {
|
|
677
708
|
'type': 'pq',
|
678
709
|
'subvectors': 8, # Balanced: moderate compression, good accuracy
|
679
710
|
'bits': 8, # 256 centroids per subvector (high precision)
|
680
|
-
'training_size': 10000 # Or higher for large datasets
|
711
|
+
'training_size': 10000, # Or higher for large datasets
|
712
|
+
'storage_mode': 'quantized_only' # Default, memory efficient
|
681
713
|
}
|
682
714
|
# Achieves ~16x–32x compression with strong recall for most applications
|
683
715
|
```
|
@@ -689,7 +721,8 @@ quantization_config = {
|
|
689
721
|
'type': 'pq',
|
690
722
|
'subvectors': 16, # More subvectors = better compression
|
691
723
|
'bits': 6, # Fewer bits = less memory per centroid
|
692
|
-
'training_size': 20000
|
724
|
+
'training_size': 20000,
|
725
|
+
'storage_mode': 'quantized_only'
|
693
726
|
}
|
694
727
|
# Achieves ~32x compression ratio
|
695
728
|
```
|
@@ -701,6 +734,7 @@ quantization_config = {
|
|
701
734
|
'subvectors': 4, # Fewer subvectors = better accuracy
|
702
735
|
'bits': 8, # More bits = more precise quantization
|
703
736
|
'training_size': 50000 # More training data = better centroids
|
737
|
+
'storage_mode': 'quantized_with_raw' # Keep raw vectors for exact recall
|
704
738
|
}
|
705
739
|
# Achieves ~4x compression ratio with minimal accuracy loss
|
706
740
|
```
|
@@ -714,6 +748,10 @@ quantization_config = {
|
|
714
748
|
|
715
749
|
Quantization is ideal for production deployments with large vector datasets (100k+ vectors) where memory efficiency is critical.
|
716
750
|
|
751
|
+
`"quantized_only"` is recommended for most use cases and maximizes memory savings.
|
752
|
+
|
753
|
+
`"quantized_with_raw"` keeps both quantized and raw vectors for exact reconstruction, but uses more memory.
|
754
|
+
|
717
755
|
|
718
756
|
<br/>
|
719
757
|
|
@@ -575,12 +575,13 @@ To enable PQ, pass a `quantization_config` dictionary to the `.create()` index m
|
|
575
575
|
| `bits` | `int` | Bits per quantized code (controls centroids per subvector) | 1-8 | `8` |
|
576
576
|
| `training_size` | `int` | Minimum vectors needed for stable k-means clustering | ≥ 1000 | 1000 |
|
577
577
|
| `max_training_vectors` | `int` | Maximum vectors used during training (optional limit) | ≥ training_size | `None` |
|
578
|
+
| `storage_mode` | `str` | Storage strategy: "quantized_only" (memory optimized) or "quantized_with_raw" (keep raw vectors for exact reconstruction) | "quantized_only", "quantized_with_raw" | `"quantized_only"` |
|
578
579
|
|
579
580
|
|
580
581
|
<br/>
|
581
582
|
|
582
583
|
|
583
|
-
### 🔧 Usage Example
|
584
|
+
### 🔧 Usage Example 1
|
584
585
|
|
585
586
|
```python
|
586
587
|
from zeusdb_vector_database import VectorDatabase
|
@@ -646,6 +647,36 @@ Results
|
|
646
647
|
{'id': 'doc_8148', 'score': 0.5139288306236267, 'metadata': {'category': 'tech', 'year': 2026}},
|
647
648
|
{'id': 'doc_7822', 'score': 0.5151920914649963, 'metadata': {'category': 'tech', 'year': 2026}},
|
648
649
|
]
|
650
|
+
```
|
651
|
+
<br />
|
652
|
+
|
653
|
+
### 🔧 Usage Example 2 - with explicit storage mode
|
654
|
+
|
655
|
+
```python
|
656
|
+
from zeusdb_vector_database import VectorDatabase
|
657
|
+
import numpy as np
|
658
|
+
|
659
|
+
# Create index with product quantization
|
660
|
+
vdb = VectorDatabase()
|
661
|
+
|
662
|
+
# Configure quantization for memory efficiency
|
663
|
+
quantization_config = {
|
664
|
+
'type': 'pq', # `pq` for Product Quantization
|
665
|
+
'subvectors': 8, # Divide 1536-dim vectors into 8 subvectors of 192 dims each
|
666
|
+
'bits': 8, # 256 centroids per subvector (2^8)
|
667
|
+
'training_size': 10000, # Train when 10k vectors are collected
|
668
|
+
'max_training_vectors': 50000, # Use max 50k vectors for training
|
669
|
+
'storage_mode': 'quantized_only' # Explicitly set storage mode to only keep quantized values
|
670
|
+
}
|
671
|
+
|
672
|
+
# Create index with quantization
|
673
|
+
# This will automatically handle training when enough vectors are added
|
674
|
+
index = vdb.create(
|
675
|
+
index_type="hnsw",
|
676
|
+
dim=3072, # OpenAI `text-embedding-3-large` dimension
|
677
|
+
quantization_config=quantization_config # Add the compression configuration
|
678
|
+
)
|
679
|
+
|
649
680
|
```
|
650
681
|
|
651
682
|
<br />
|
@@ -658,7 +689,8 @@ quantization_config = {
|
|
658
689
|
'type': 'pq',
|
659
690
|
'subvectors': 8, # Balanced: moderate compression, good accuracy
|
660
691
|
'bits': 8, # 256 centroids per subvector (high precision)
|
661
|
-
'training_size': 10000 # Or higher for large datasets
|
692
|
+
'training_size': 10000, # Or higher for large datasets
|
693
|
+
'storage_mode': 'quantized_only' # Default, memory efficient
|
662
694
|
}
|
663
695
|
# Achieves ~16x–32x compression with strong recall for most applications
|
664
696
|
```
|
@@ -670,7 +702,8 @@ quantization_config = {
|
|
670
702
|
'type': 'pq',
|
671
703
|
'subvectors': 16, # More subvectors = better compression
|
672
704
|
'bits': 6, # Fewer bits = less memory per centroid
|
673
|
-
'training_size': 20000
|
705
|
+
'training_size': 20000,
|
706
|
+
'storage_mode': 'quantized_only'
|
674
707
|
}
|
675
708
|
# Achieves ~32x compression ratio
|
676
709
|
```
|
@@ -682,6 +715,7 @@ quantization_config = {
|
|
682
715
|
'subvectors': 4, # Fewer subvectors = better accuracy
|
683
716
|
'bits': 8, # More bits = more precise quantization
|
684
717
|
'training_size': 50000 # More training data = better centroids
|
718
|
+
'storage_mode': 'quantized_with_raw' # Keep raw vectors for exact recall
|
685
719
|
}
|
686
720
|
# Achieves ~4x compression ratio with minimal accuracy loss
|
687
721
|
```
|
@@ -695,6 +729,10 @@ quantization_config = {
|
|
695
729
|
|
696
730
|
Quantization is ideal for production deployments with large vector datasets (100k+ vectors) where memory efficiency is critical.
|
697
731
|
|
732
|
+
`"quantized_only"` is recommended for most use cases and maximizes memory savings.
|
733
|
+
|
734
|
+
`"quantized_with_raw"` keeps both quantized and raw vectors for exact reconstruction, but uses more memory.
|
735
|
+
|
698
736
|
|
699
737
|
<br/>
|
700
738
|
|
@@ -56,7 +56,8 @@ class VectorDatabase:
|
|
56
56
|
'subvectors': 8, # Number of subvectors (must divide dim evenly, default: 8)
|
57
57
|
'bits': 8, # Bits per subvector (1-8, controls centroids, default: 8)
|
58
58
|
'training_size': None, # Auto-calculated based on subvectors & bits (or specify manually)
|
59
|
-
'max_training_vectors': None # Optional limit on training vectors used
|
59
|
+
'max_training_vectors': None, # Optional limit on training vectors used
|
60
|
+
'storage_mode': 'quantized_only' # Storage mode for quantized vectors (or 'quantized_with_raw')
|
60
61
|
}
|
61
62
|
|
62
63
|
Note: Quantization reduces memory usage (typically 4-32x compression) but may
|
@@ -88,7 +89,8 @@ class VectorDatabase:
|
|
88
89
|
'type': 'pq',
|
89
90
|
'subvectors': 16, # More subvectors = better compression
|
90
91
|
'bits': 6, # Fewer bits = less memory per centroid
|
91
|
-
'training_size': 75000 # Override auto-calculation
|
92
|
+
'training_size': 75000, # Override auto-calculation
|
93
|
+
'storage_mode': 'quantized_only' # Only store quantized vectors
|
92
94
|
}
|
93
95
|
index = vdb.create(
|
94
96
|
index_type="hnsw",
|
@@ -126,11 +128,12 @@ class VectorDatabase:
|
|
126
128
|
|
127
129
|
try:
|
128
130
|
# Always pass quantization_config parameter
|
129
|
-
clean_config = None
|
130
131
|
if quantization_config is not None:
|
131
|
-
#
|
132
|
-
clean_config = {k: v for k, v in quantization_config.items() if not k.startswith('_')}
|
133
|
-
|
132
|
+
# Remove keys with None values and internal keys
|
133
|
+
clean_config = {k: v for k, v in quantization_config.items() if not k.startswith('_') and v is not None}
|
134
|
+
else:
|
135
|
+
clean_config = None
|
136
|
+
|
134
137
|
return constructor(quantization_config=clean_config, **kwargs)
|
135
138
|
except Exception as e:
|
136
139
|
raise RuntimeError(f"Failed to create {index_type.upper()} index: {e}") from e
|
@@ -172,7 +175,7 @@ class VectorDatabase:
|
|
172
175
|
if dim % subvectors != 0:
|
173
176
|
raise ValueError(
|
174
177
|
f"subvectors ({subvectors}) must divide dimension ({dim}) evenly. "
|
175
|
-
f"Consider using subvectors: {self._suggest_subvector_divisors(dim)}"
|
178
|
+
f"Consider using subvectors: {', '.join(map(str, self._suggest_subvector_divisors(dim)))}"
|
176
179
|
)
|
177
180
|
|
178
181
|
if subvectors > dim:
|
@@ -206,9 +209,38 @@ class VectorDatabase:
|
|
206
209
|
)
|
207
210
|
validated_config['max_training_vectors'] = max_training_vectors
|
208
211
|
|
212
|
+
# Validate storage mode
|
213
|
+
storage_mode = str(validated_config.get('storage_mode', 'quantized_only')).lower()
|
214
|
+
valid_modes = {'quantized_only', 'quantized_with_raw'}
|
215
|
+
if storage_mode not in valid_modes:
|
216
|
+
raise ValueError(
|
217
|
+
f"Invalid storage_mode: '{storage_mode}'. Supported modes: {', '.join(sorted(valid_modes))}"
|
218
|
+
)
|
219
|
+
|
220
|
+
validated_config['storage_mode'] = storage_mode
|
221
|
+
|
209
222
|
# Calculate and warn about memory usage
|
210
223
|
self._check_memory_usage(validated_config, dim)
|
224
|
+
|
225
|
+
# Add helpful warnings about storage mode
|
226
|
+
if storage_mode == 'quantized_with_raw':
|
227
|
+
import warnings
|
228
|
+
compression_ratio = validated_config.get('__memory_info__', {}).get('compression_ratio', 1.0)
|
229
|
+
warnings.warn(
|
230
|
+
f"storage_mode='quantized_with_raw' will use ~{compression_ratio:.1f}x more memory "
|
231
|
+
f"than 'quantized_only' but enables exact vector reconstruction.",
|
232
|
+
UserWarning,
|
233
|
+
stacklevel=2
|
234
|
+
)
|
211
235
|
|
236
|
+
# Final safety check: ensure all expected keys are present
|
237
|
+
# This is a final defensive programming - all the keys should already be set above, but added just in case
|
238
|
+
validated_config.setdefault('type', 'pq')
|
239
|
+
validated_config.setdefault('subvectors', 8)
|
240
|
+
validated_config.setdefault('bits', 8)
|
241
|
+
validated_config.setdefault('max_training_vectors', None)
|
242
|
+
validated_config.setdefault('storage_mode', 'quantized_only')
|
243
|
+
|
212
244
|
return validated_config
|
213
245
|
|
214
246
|
def _calculate_smart_training_size(self, subvectors: int, bits: int) -> int:
|
@@ -236,13 +268,14 @@ class VectorDatabase:
|
|
236
268
|
|
237
269
|
return min(max(statistical_minimum, reasonable_minimum), reasonable_maximum)
|
238
270
|
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
for i in range(1, min(33, dim + 1))
|
243
|
-
|
244
|
-
|
245
|
-
|
271
|
+
|
272
|
+
def _suggest_subvector_divisors(self, dim: int) -> list[int]:
|
273
|
+
"""Return valid subvector counts that divide the dimension evenly (up to 32)."""
|
274
|
+
return [i for i in range(1, min(33, dim + 1)) if dim % i == 0]
|
275
|
+
|
276
|
+
|
277
|
+
|
278
|
+
|
246
279
|
|
247
280
|
def _check_memory_usage(self, config: Dict[str, Any], dim: int) -> None:
|
248
281
|
"""
|
@@ -105,6 +105,26 @@ dependencies = [
|
|
105
105
|
"serde",
|
106
106
|
]
|
107
107
|
|
108
|
+
[[package]]
|
109
|
+
name = "bincode"
|
110
|
+
version = "2.0.1"
|
111
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
112
|
+
checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740"
|
113
|
+
dependencies = [
|
114
|
+
"bincode_derive",
|
115
|
+
"serde",
|
116
|
+
"unty",
|
117
|
+
]
|
118
|
+
|
119
|
+
[[package]]
|
120
|
+
name = "bincode_derive"
|
121
|
+
version = "2.0.1"
|
122
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
123
|
+
checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09"
|
124
|
+
dependencies = [
|
125
|
+
"virtue",
|
126
|
+
]
|
127
|
+
|
108
128
|
[[package]]
|
109
129
|
name = "bitflags"
|
110
130
|
version = "1.3.2"
|
@@ -282,7 +302,7 @@ checksum = "b53dc5b9b07424143d016ba843c9b510f424e239118697f5d5d582f2d437df41"
|
|
282
302
|
dependencies = [
|
283
303
|
"anndists",
|
284
304
|
"anyhow",
|
285
|
-
"bincode",
|
305
|
+
"bincode 1.3.3",
|
286
306
|
"cfg-if",
|
287
307
|
"cpu-time",
|
288
308
|
"env_logger",
|
@@ -728,9 +748,9 @@ dependencies = [
|
|
728
748
|
|
729
749
|
[[package]]
|
730
750
|
name = "redox_syscall"
|
731
|
-
version = "0.5.
|
751
|
+
version = "0.5.17"
|
732
752
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
733
|
-
checksum = "
|
753
|
+
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
|
734
754
|
dependencies = [
|
735
755
|
"bitflags 2.9.1",
|
736
756
|
]
|
@@ -892,12 +912,24 @@ version = "0.2.4"
|
|
892
912
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
893
913
|
checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
|
894
914
|
|
915
|
+
[[package]]
|
916
|
+
name = "unty"
|
917
|
+
version = "0.0.4"
|
918
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
919
|
+
checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae"
|
920
|
+
|
895
921
|
[[package]]
|
896
922
|
name = "utf8parse"
|
897
923
|
version = "0.2.2"
|
898
924
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
899
925
|
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
900
926
|
|
927
|
+
[[package]]
|
928
|
+
name = "virtue"
|
929
|
+
version = "0.0.18"
|
930
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
931
|
+
checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
|
932
|
+
|
901
933
|
[[package]]
|
902
934
|
name = "walkdir"
|
903
935
|
version = "2.5.0"
|
@@ -1124,8 +1156,9 @@ dependencies = [
|
|
1124
1156
|
|
1125
1157
|
[[package]]
|
1126
1158
|
name = "zeusdb-vector-database"
|
1127
|
-
version = "0.2.
|
1159
|
+
version = "0.2.1"
|
1128
1160
|
dependencies = [
|
1161
|
+
"bincode 2.0.1",
|
1129
1162
|
"hnsw_rs",
|
1130
1163
|
"numpy",
|
1131
1164
|
"pyo3",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "zeusdb-vector-database"
|
3
|
-
version = "0.2.
|
3
|
+
version = "0.2.1"
|
4
4
|
edition = "2021"
|
5
5
|
resolver = "2" # <-- Avoid compiling unnecessary features from dependencies.
|
6
6
|
|
@@ -17,6 +17,7 @@ serde_json = "1.0"
|
|
17
17
|
serde = { version = "1.0", features = ["derive"] }
|
18
18
|
rayon = "1.10"
|
19
19
|
rand = "0.9.1"
|
20
|
+
bincode = "2.0.1"
|
20
21
|
|
21
22
|
[profile.release]
|
22
23
|
lto = true # <-- Enable Link-Time Optimization
|
@@ -6,6 +6,7 @@ use std::sync::{Mutex, RwLock, Arc};
|
|
6
6
|
use hnsw_rs::prelude::{Hnsw, DistCosine, DistL2, DistL1, Distance};
|
7
7
|
use serde_json::Value;
|
8
8
|
use rayon::prelude::*;
|
9
|
+
use serde::{Serialize, Deserialize};
|
9
10
|
|
10
11
|
// Import PQ module
|
11
12
|
use crate::pq::PQ;
|
@@ -24,15 +25,54 @@ macro_rules! debug_log {
|
|
24
25
|
}
|
25
26
|
|
26
27
|
|
27
|
-
|
28
|
-
|
28
|
+
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
29
|
+
pub enum StorageMode {
|
30
|
+
#[serde(rename = "quantized_only")]
|
31
|
+
QuantizedOnly,
|
32
|
+
|
33
|
+
#[serde(rename = "quantized_with_raw")]
|
34
|
+
QuantizedWithRaw,
|
35
|
+
}
|
36
|
+
|
37
|
+
impl StorageMode {
|
38
|
+
pub fn from_string(s: &str) -> Result<Self, String> {
|
39
|
+
match s {
|
40
|
+
"quantized_only" => Ok(StorageMode::QuantizedOnly),
|
41
|
+
"quantized_with_raw" => Ok(StorageMode::QuantizedWithRaw),
|
42
|
+
_ => Err(format!(
|
43
|
+
"Invalid storage_mode: '{}'. Supported: quantized_only, quantized_with_raw",
|
44
|
+
s
|
45
|
+
))
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
pub fn to_string(&self) -> &'static str {
|
50
|
+
match self {
|
51
|
+
StorageMode::QuantizedOnly => "quantized_only",
|
52
|
+
StorageMode::QuantizedWithRaw => "quantized_with_raw",
|
53
|
+
}
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
impl Default for StorageMode {
|
58
|
+
fn default() -> Self {
|
59
|
+
StorageMode::QuantizedOnly
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
|
64
|
+
// Updated QuantizationConfig structure
|
65
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
29
66
|
pub struct QuantizationConfig {
|
30
67
|
pub subvectors: usize,
|
31
68
|
pub bits: usize,
|
32
69
|
pub training_size: usize,
|
33
70
|
pub max_training_vectors: Option<usize>,
|
71
|
+
pub storage_mode: StorageMode,
|
34
72
|
}
|
35
73
|
|
74
|
+
|
75
|
+
|
36
76
|
/// Custom distance function for Product Quantization using ADC
|
37
77
|
#[derive(Clone)]
|
38
78
|
pub struct DistPQ {
|
@@ -435,6 +475,16 @@ impl HNSWIndex {
|
|
435
475
|
let max_training_vectors = config.get_item("max_training_vectors")?
|
436
476
|
.map(|v| v.extract::<usize>())
|
437
477
|
.transpose()?;
|
478
|
+
|
479
|
+
// Extract storage_mode
|
480
|
+
let storage_mode_str = config.get_item("storage_mode")?
|
481
|
+
.map(|v| v.extract::<String>())
|
482
|
+
.transpose()?
|
483
|
+
.unwrap_or_else(|| "quantized_only".to_string());
|
484
|
+
|
485
|
+
let storage_mode = StorageMode::from_string(&storage_mode_str)
|
486
|
+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e))?;
|
487
|
+
|
438
488
|
|
439
489
|
// Validate PQ parameters
|
440
490
|
if dim % subvectors != 0 {
|
@@ -460,6 +510,7 @@ impl HNSWIndex {
|
|
460
510
|
bits,
|
461
511
|
training_size,
|
462
512
|
max_training_vectors,
|
513
|
+
storage_mode,
|
463
514
|
};
|
464
515
|
|
465
516
|
// Create PQ instance
|
@@ -941,7 +992,57 @@ impl HNSWIndex {
|
|
941
992
|
|
942
993
|
|
943
994
|
|
944
|
-
/// Get records by ID(s) with PQ reconstruction support
|
995
|
+
// /// Get records by ID(s) with PQ reconstruction support
|
996
|
+
// #[pyo3(signature = (input, return_vector = true))]
|
997
|
+
// pub fn get_records(&self, py: Python<'_>, input: &Bound<PyAny>, return_vector: bool) -> PyResult<Vec<Py<PyDict>>> {
|
998
|
+
// let ids: Vec<String> = if let Ok(id_str) = input.extract::<String>() {
|
999
|
+
// vec![id_str]
|
1000
|
+
// } else if let Ok(id_list) = input.extract::<Vec<String>>() {
|
1001
|
+
// id_list
|
1002
|
+
// } else {
|
1003
|
+
// return Err(PyErr::new::<pyo3::exceptions::PyTypeError, _>(
|
1004
|
+
// "Expected a string or a list of strings for ID(s)",
|
1005
|
+
// ));
|
1006
|
+
// };
|
1007
|
+
|
1008
|
+
// let mut records = Vec::with_capacity(ids.len());
|
1009
|
+
|
1010
|
+
// // Use read locks for concurrent access
|
1011
|
+
// let vectors = self.vectors.read().unwrap();
|
1012
|
+
// let pq_codes = self.pq_codes.read().unwrap();
|
1013
|
+
// let vector_metadata = self.vector_metadata.read().unwrap();
|
1014
|
+
|
1015
|
+
// for id in ids {
|
1016
|
+
// if let Some(vector) = vectors.get(&id) {
|
1017
|
+
// let metadata = vector_metadata.get(&id).cloned().unwrap_or_default();
|
1018
|
+
|
1019
|
+
// let dict = PyDict::new(py);
|
1020
|
+
// dict.set_item("id", id.clone())?;
|
1021
|
+
// dict.set_item("metadata", self.value_map_to_python(&metadata, py)?)?;
|
1022
|
+
|
1023
|
+
// if return_vector {
|
1024
|
+
// // Try raw vector first, then PQ reconstruction
|
1025
|
+
// let vector_data = if !vector.is_empty() {
|
1026
|
+
// vector.clone()
|
1027
|
+
// } else if let (Some(pq), Some(codes)) = (&self.pq, pq_codes.get(&id)) {
|
1028
|
+
// pq.reconstruct(codes).unwrap_or_else(|_| vector.clone())
|
1029
|
+
// } else {
|
1030
|
+
// vector.clone()
|
1031
|
+
// };
|
1032
|
+
|
1033
|
+
// dict.set_item("vector", vector_data)?;
|
1034
|
+
// }
|
1035
|
+
|
1036
|
+
// records.push(dict.into());
|
1037
|
+
// }
|
1038
|
+
// }
|
1039
|
+
|
1040
|
+
// Ok(records)
|
1041
|
+
// }
|
1042
|
+
|
1043
|
+
|
1044
|
+
|
1045
|
+
/// Get records by ID(s) with PQ reconstruction support and storage mode awareness
|
945
1046
|
#[pyo3(signature = (input, return_vector = true))]
|
946
1047
|
pub fn get_records(&self, py: Python<'_>, input: &Bound<PyAny>, return_vector: bool) -> PyResult<Vec<Py<PyDict>>> {
|
947
1048
|
let ids: Vec<String> = if let Ok(id_str) = input.extract::<String>() {
|
@@ -955,14 +1056,17 @@ impl HNSWIndex {
|
|
955
1056
|
};
|
956
1057
|
|
957
1058
|
let mut records = Vec::with_capacity(ids.len());
|
958
|
-
|
1059
|
+
|
959
1060
|
// Use read locks for concurrent access
|
960
1061
|
let vectors = self.vectors.read().unwrap();
|
961
1062
|
let pq_codes = self.pq_codes.read().unwrap();
|
962
1063
|
let vector_metadata = self.vector_metadata.read().unwrap();
|
963
1064
|
|
964
1065
|
for id in ids {
|
965
|
-
if
|
1066
|
+
// Check if this ID exists in either storage
|
1067
|
+
let exists = vectors.contains_key(&id) || pq_codes.contains_key(&id);
|
1068
|
+
|
1069
|
+
if exists {
|
966
1070
|
let metadata = vector_metadata.get(&id).cloned().unwrap_or_default();
|
967
1071
|
|
968
1072
|
let dict = PyDict::new(py);
|
@@ -970,16 +1074,27 @@ impl HNSWIndex {
|
|
970
1074
|
dict.set_item("metadata", self.value_map_to_python(&metadata, py)?)?;
|
971
1075
|
|
972
1076
|
if return_vector {
|
973
|
-
//
|
974
|
-
let vector_data = if
|
975
|
-
vector
|
1077
|
+
// Priority: raw vector > PQ reconstruction
|
1078
|
+
let vector_data = if let Some(raw_vector) = vectors.get(&id) {
|
1079
|
+
// Case 1: Raw vector available (QuantizedWithRaw mode or non-quantized)
|
1080
|
+
Some(raw_vector.clone())
|
976
1081
|
} else if let (Some(pq), Some(codes)) = (&self.pq, pq_codes.get(&id)) {
|
977
|
-
|
1082
|
+
// Case 2: Only quantized codes available (QuantizedOnly mode)
|
1083
|
+
match pq.reconstruct(codes) {
|
1084
|
+
Ok(reconstructed) => Some(reconstructed),
|
1085
|
+
Err(e) => {
|
1086
|
+
eprintln!("Warning: Failed to reconstruct vector for ID {}: {}", id, e);
|
1087
|
+
None
|
1088
|
+
}
|
1089
|
+
}
|
978
1090
|
} else {
|
979
|
-
vector
|
1091
|
+
// Case 3: No vector data available
|
1092
|
+
None
|
980
1093
|
};
|
981
|
-
|
982
|
-
|
1094
|
+
|
1095
|
+
if let Some(vec) = vector_data {
|
1096
|
+
dict.set_item("vector", vec)?;
|
1097
|
+
}
|
983
1098
|
}
|
984
1099
|
|
985
1100
|
records.push(dict.into());
|
@@ -992,7 +1107,75 @@ impl HNSWIndex {
|
|
992
1107
|
|
993
1108
|
|
994
1109
|
|
995
|
-
|
1110
|
+
|
1111
|
+
|
1112
|
+
|
1113
|
+
|
1114
|
+
|
1115
|
+
|
1116
|
+
|
1117
|
+
|
1118
|
+
// /// Enhanced get_stats with training info
|
1119
|
+
// pub fn get_stats(&self) -> HashMap<String, String> {
|
1120
|
+
// let mut stats = HashMap::new();
|
1121
|
+
|
1122
|
+
// let vectors = self.vectors.read().unwrap();
|
1123
|
+
// let pq_codes = self.pq_codes.read().unwrap();
|
1124
|
+
// let vector_count = *self.vector_count.lock().unwrap();
|
1125
|
+
// let training_ids = self.training_ids.read().unwrap();
|
1126
|
+
|
1127
|
+
// // Basic stats
|
1128
|
+
// stats.insert("total_vectors".to_string(), vector_count.to_string());
|
1129
|
+
// stats.insert("dimension".to_string(), self.dim.to_string());
|
1130
|
+
// stats.insert("expected_size".to_string(), self.expected_size.to_string());
|
1131
|
+
// stats.insert("space".to_string(), self.space.clone());
|
1132
|
+
// stats.insert("index_type".to_string(), "HNSW".to_string());
|
1133
|
+
|
1134
|
+
// stats.insert("m".to_string(), self.m.to_string());
|
1135
|
+
// stats.insert("ef_construction".to_string(), self.ef_construction.to_string());
|
1136
|
+
// stats.insert("thread_safety".to_string(), "RwLock+Mutex".to_string());
|
1137
|
+
|
1138
|
+
// // Storage breakdown
|
1139
|
+
// stats.insert("raw_vectors_stored".to_string(), vectors.len().to_string());
|
1140
|
+
// stats.insert("quantized_codes_stored".to_string(), pq_codes.len().to_string());
|
1141
|
+
|
1142
|
+
// // Training info
|
1143
|
+
// if let Some(config) = &self.quantization_config {
|
1144
|
+
// stats.insert("quantization_type".to_string(), "pq".to_string());
|
1145
|
+
// stats.insert("quantization_training_size".to_string(), config.training_size.to_string());
|
1146
|
+
|
1147
|
+
// let collected_count = training_ids.len();
|
1148
|
+
// let progress = self.get_training_progress();
|
1149
|
+
// stats.insert("training_progress".to_string(),
|
1150
|
+
// format!("{}/{} ({:.1}%)", collected_count, config.training_size, progress));
|
1151
|
+
|
1152
|
+
// let vectors_needed = self.training_vectors_needed();
|
1153
|
+
// stats.insert("training_vectors_needed".to_string(), vectors_needed.to_string());
|
1154
|
+
// stats.insert("training_threshold_reached".to_string(),
|
1155
|
+
// self.training_threshold_reached.load(Ordering::Acquire).to_string());
|
1156
|
+
|
1157
|
+
// if let Some(pq) = &self.pq {
|
1158
|
+
// let is_trained = pq.is_trained();
|
1159
|
+
// stats.insert("quantization_trained".to_string(), is_trained.to_string());
|
1160
|
+
// stats.insert("quantization_active".to_string(), self.is_quantized().to_string());
|
1161
|
+
|
1162
|
+
// if is_trained {
|
1163
|
+
// let compression_ratio = (pq.dim * 4) as f64 / pq.subvectors as f64;
|
1164
|
+
// stats.insert("quantization_compression_ratio".to_string(), format!("{:.1}x", compression_ratio));
|
1165
|
+
// }
|
1166
|
+
// }
|
1167
|
+
// } else {
|
1168
|
+
// stats.insert("quantization_type".to_string(), "none".to_string());
|
1169
|
+
// }
|
1170
|
+
|
1171
|
+
// stats.insert("storage_mode".to_string(), self.get_storage_mode());
|
1172
|
+
|
1173
|
+
// stats
|
1174
|
+
// }
|
1175
|
+
|
1176
|
+
|
1177
|
+
|
1178
|
+
/// Enhanced get_stats with storage mode information
|
996
1179
|
pub fn get_stats(&self) -> HashMap<String, String> {
|
997
1180
|
let mut stats = HashMap::new();
|
998
1181
|
|
@@ -1021,16 +1204,37 @@ impl HNSWIndex {
|
|
1021
1204
|
stats.insert("quantization_type".to_string(), "pq".to_string());
|
1022
1205
|
stats.insert("quantization_training_size".to_string(), config.training_size.to_string());
|
1023
1206
|
|
1207
|
+
// Storage mode information
|
1208
|
+
stats.insert("storage_mode".to_string(), config.storage_mode.to_string().to_string());
|
1209
|
+
|
1210
|
+
// Calculate actual memory usage based on storage mode
|
1211
|
+
let raw_memory_mb = (vectors.len() * self.dim * 4) as f64 / (1024.0 * 1024.0);
|
1212
|
+
let quantized_memory_mb = (pq_codes.len() * config.subvectors) as f64 / (1024.0 * 1024.0);
|
1213
|
+
|
1214
|
+
stats.insert("raw_vectors_memory_mb".to_string(), format!("{:.2}", raw_memory_mb));
|
1215
|
+
stats.insert("quantized_codes_memory_mb".to_string(), format!("{:.2}", quantized_memory_mb));
|
1216
|
+
|
1217
|
+
match config.storage_mode {
|
1218
|
+
StorageMode::QuantizedOnly => {
|
1219
|
+
stats.insert("storage_strategy".to_string(), "memory_optimized".to_string());
|
1220
|
+
stats.insert("memory_savings".to_string(), "maximum".to_string());
|
1221
|
+
}
|
1222
|
+
StorageMode::QuantizedWithRaw => {
|
1223
|
+
stats.insert("storage_strategy".to_string(), "quality_optimized".to_string());
|
1224
|
+
stats.insert("memory_savings".to_string(), "raw_vectors_kept".to_string());
|
1225
|
+
}
|
1226
|
+
}
|
1227
|
+
|
1024
1228
|
let collected_count = training_ids.len();
|
1025
1229
|
let progress = self.get_training_progress();
|
1026
|
-
stats.insert("training_progress".to_string(),
|
1230
|
+
stats.insert("training_progress".to_string(),
|
1027
1231
|
format!("{}/{} ({:.1}%)", collected_count, config.training_size, progress));
|
1028
1232
|
|
1029
1233
|
let vectors_needed = self.training_vectors_needed();
|
1030
1234
|
stats.insert("training_vectors_needed".to_string(), vectors_needed.to_string());
|
1031
|
-
stats.insert("training_threshold_reached".to_string(),
|
1235
|
+
stats.insert("training_threshold_reached".to_string(),
|
1032
1236
|
self.training_threshold_reached.load(Ordering::Acquire).to_string());
|
1033
|
-
|
1237
|
+
|
1034
1238
|
if let Some(pq) = &self.pq {
|
1035
1239
|
let is_trained = pq.is_trained();
|
1036
1240
|
stats.insert("quantization_trained".to_string(), is_trained.to_string());
|
@@ -1043,12 +1247,68 @@ impl HNSWIndex {
|
|
1043
1247
|
}
|
1044
1248
|
} else {
|
1045
1249
|
stats.insert("quantization_type".to_string(), "none".to_string());
|
1250
|
+
stats.insert("storage_mode".to_string(), "raw_only".to_string());
|
1046
1251
|
}
|
1047
1252
|
|
1048
|
-
stats.insert("
|
1253
|
+
stats.insert("storage_mode_description".to_string(), self.get_storage_mode());
|
1049
1254
|
|
1050
1255
|
stats
|
1051
1256
|
}
|
1257
|
+
|
1258
|
+
|
1259
|
+
|
1260
|
+
|
1261
|
+
|
1262
|
+
|
1263
|
+
|
1264
|
+
|
1265
|
+
|
1266
|
+
|
1267
|
+
|
1268
|
+
|
1269
|
+
|
1270
|
+
|
1271
|
+
|
1272
|
+
|
1273
|
+
|
1274
|
+
|
1275
|
+
|
1276
|
+
|
1277
|
+
|
1278
|
+
|
1279
|
+
|
1280
|
+
|
1281
|
+
|
1282
|
+
|
1283
|
+
|
1284
|
+
|
1285
|
+
|
1286
|
+
|
1287
|
+
|
1288
|
+
|
1289
|
+
|
1290
|
+
|
1291
|
+
|
1292
|
+
|
1293
|
+
|
1294
|
+
|
1295
|
+
|
1296
|
+
|
1297
|
+
|
1298
|
+
|
1299
|
+
|
1300
|
+
|
1301
|
+
|
1302
|
+
|
1303
|
+
|
1304
|
+
|
1305
|
+
|
1306
|
+
|
1307
|
+
|
1308
|
+
|
1309
|
+
|
1310
|
+
|
1311
|
+
|
1052
1312
|
|
1053
1313
|
|
1054
1314
|
|
@@ -1413,7 +1673,61 @@ impl HNSWIndex {
|
|
1413
1673
|
Ok(())
|
1414
1674
|
}
|
1415
1675
|
|
1416
|
-
/// Path C: Quantized storage (trained and active)
|
1676
|
+
// /// Path C: Quantized storage (trained and active)
|
1677
|
+
// fn add_quantized_vector(
|
1678
|
+
// &mut self,
|
1679
|
+
// id: String,
|
1680
|
+
// vector: Vec<f32>, // Already processed
|
1681
|
+
// metadata: HashMap<String, Value>
|
1682
|
+
// ) -> PyResult<()> {
|
1683
|
+
// let internal_id = self.get_next_id();
|
1684
|
+
|
1685
|
+
// // Store metadata
|
1686
|
+
// {
|
1687
|
+
// let mut vector_metadata = self.vector_metadata.write().unwrap();
|
1688
|
+
// vector_metadata.insert(id.clone(), metadata);
|
1689
|
+
// }
|
1690
|
+
|
1691
|
+
// // Update ID mappings
|
1692
|
+
// {
|
1693
|
+
// let mut id_map = self.id_map.write().unwrap();
|
1694
|
+
// let mut rev_map = self.rev_map.write().unwrap();
|
1695
|
+
|
1696
|
+
// id_map.insert(id.clone(), internal_id);
|
1697
|
+
// rev_map.insert(internal_id, id.clone());
|
1698
|
+
// }
|
1699
|
+
|
1700
|
+
// // Quantize the vector
|
1701
|
+
// let pq = self.pq.as_ref().unwrap();
|
1702
|
+
// let codes = pq.quantize(&vector).map_err(|e| {
|
1703
|
+
// PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
|
1704
|
+
// format!("Failed to quantize vector: {}", e)
|
1705
|
+
// )
|
1706
|
+
// })?;
|
1707
|
+
|
1708
|
+
// // Store quantized codes
|
1709
|
+
// {
|
1710
|
+
// let mut pq_codes = self.pq_codes.write().unwrap();
|
1711
|
+
// pq_codes.insert(id.clone(), codes.clone());
|
1712
|
+
// }
|
1713
|
+
|
1714
|
+
// // Store raw vector for exact reconstruction (persistence-ready)
|
1715
|
+
// {
|
1716
|
+
// let mut vectors = self.vectors.write().unwrap();
|
1717
|
+
// vectors.insert(id, vector.clone());
|
1718
|
+
// }
|
1719
|
+
|
1720
|
+
// // Insert codes into quantized HNSW
|
1721
|
+
// {
|
1722
|
+
// let mut hnsw_guard = self.hnsw.lock().unwrap();
|
1723
|
+
// hnsw_guard.insert_pq_codes(&codes, internal_id);
|
1724
|
+
// }
|
1725
|
+
|
1726
|
+
// Ok(())
|
1727
|
+
// }
|
1728
|
+
|
1729
|
+
|
1730
|
+
/// Path C: Quantized storage with configurable raw vector retention
|
1417
1731
|
fn add_quantized_vector(
|
1418
1732
|
&mut self,
|
1419
1733
|
id: String,
|
@@ -1445,16 +1759,19 @@ impl HNSWIndex {
|
|
1445
1759
|
)
|
1446
1760
|
})?;
|
1447
1761
|
|
1448
|
-
// Store quantized codes
|
1762
|
+
// Store quantized codes (always)
|
1449
1763
|
{
|
1450
1764
|
let mut pq_codes = self.pq_codes.write().unwrap();
|
1451
1765
|
pq_codes.insert(id.clone(), codes.clone());
|
1452
1766
|
}
|
1453
1767
|
|
1454
|
-
// Store raw vector
|
1455
|
-
{
|
1456
|
-
|
1457
|
-
|
1768
|
+
// Store raw vector only if configured to keep them
|
1769
|
+
if let Some(config) = &self.quantization_config {
|
1770
|
+
if config.storage_mode == StorageMode::QuantizedWithRaw {
|
1771
|
+
let mut vectors = self.vectors.write().unwrap();
|
1772
|
+
vectors.insert(id.clone(), vector.clone());
|
1773
|
+
}
|
1774
|
+
// If QuantizedOnly mode, we don't store raw vectors (saves memory)
|
1458
1775
|
}
|
1459
1776
|
|
1460
1777
|
// Insert codes into quantized HNSW
|
@@ -1466,6 +1783,41 @@ impl HNSWIndex {
|
|
1466
1783
|
Ok(())
|
1467
1784
|
}
|
1468
1785
|
|
1786
|
+
|
1787
|
+
|
1788
|
+
|
1789
|
+
|
1790
|
+
|
1791
|
+
|
1792
|
+
|
1793
|
+
|
1794
|
+
|
1795
|
+
|
1796
|
+
|
1797
|
+
|
1798
|
+
|
1799
|
+
|
1800
|
+
|
1801
|
+
|
1802
|
+
|
1803
|
+
|
1804
|
+
|
1805
|
+
|
1806
|
+
|
1807
|
+
|
1808
|
+
|
1809
|
+
|
1810
|
+
|
1811
|
+
|
1812
|
+
|
1813
|
+
|
1814
|
+
|
1815
|
+
|
1816
|
+
|
1817
|
+
|
1818
|
+
|
1819
|
+
|
1820
|
+
|
1469
1821
|
/// TRAINING TRIGGER: Uses threshold flag for race condition safety
|
1470
1822
|
fn maybe_trigger_training(&mut self) -> Result<(), String> {
|
1471
1823
|
// Check atomic flag first (fast path)
|
File without changes
|
File without changes
|
{zeusdb_vector_database-0.2.0 → zeusdb_vector_database-0.2.1}/src/zeusdb_vector_database/py.typed
RENAMED
File without changes
|
File without changes
|
File without changes
|