zeusdb-vector-database 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zeusdb-vector-database
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Requires-Dist: numpy>=2.2.6,<3.0.0
@@ -594,12 +594,13 @@ To enable PQ, pass a `quantization_config` dictionary to the `.create()` index m
594
594
  | `bits` | `int` | Bits per quantized code (controls centroids per subvector) | 1-8 | `8` |
595
595
  | `training_size` | `int` | Minimum vectors needed for stable k-means clustering | ≥ 1000 | 1000 |
596
596
  | `max_training_vectors` | `int` | Maximum vectors used during training (optional limit) | ≥ training_size | `None` |
597
+ | `storage_mode` | `str` | Storage strategy: "quantized_only" (memory optimized) or "quantized_with_raw" (keep raw vectors for exact reconstruction) | "quantized_only", "quantized_with_raw" | `"quantized_only"` |
597
598
 
598
599
 
599
600
  <br/>
600
601
 
601
602
 
602
- ### 🔧 Usage Example
603
+ ### 🔧 Usage Example 1
603
604
 
604
605
  ```python
605
606
  from zeusdb_vector_database import VectorDatabase
@@ -665,6 +666,36 @@ Results
665
666
  {'id': 'doc_8148', 'score': 0.5139288306236267, 'metadata': {'category': 'tech', 'year': 2026}},
666
667
  {'id': 'doc_7822', 'score': 0.5151920914649963, 'metadata': {'category': 'tech', 'year': 2026}},
667
668
  ]
669
+ ```
670
+ <br />
671
+
672
+ ### 🔧 Usage Example 2 - with explicit storage mode
673
+
674
+ ```python
675
+ from zeusdb_vector_database import VectorDatabase
676
+ import numpy as np
677
+
678
+ # Create index with product quantization
679
+ vdb = VectorDatabase()
680
+
681
+ # Configure quantization for memory efficiency
682
+ quantization_config = {
683
+ 'type': 'pq', # `pq` for Product Quantization
684
+ 'subvectors': 8, # Divide 1536-dim vectors into 8 subvectors of 192 dims each
685
+ 'bits': 8, # 256 centroids per subvector (2^8)
686
+ 'training_size': 10000, # Train when 10k vectors are collected
687
+ 'max_training_vectors': 50000, # Use max 50k vectors for training
688
+ 'storage_mode': 'quantized_only' # Explicitly set storage mode to only keep quantized values
689
+ }
690
+
691
+ # Create index with quantization
692
+ # This will automatically handle training when enough vectors are added
693
+ index = vdb.create(
694
+ index_type="hnsw",
695
+ dim=3072, # OpenAI `text-embedding-3-large` dimension
696
+ quantization_config=quantization_config # Add the compression configuration
697
+ )
698
+
668
699
  ```
669
700
 
670
701
  <br />
@@ -677,7 +708,8 @@ quantization_config = {
677
708
  'type': 'pq',
678
709
  'subvectors': 8, # Balanced: moderate compression, good accuracy
679
710
  'bits': 8, # 256 centroids per subvector (high precision)
680
- 'training_size': 10000 # Or higher for large datasets
711
+ 'training_size': 10000, # Or higher for large datasets
712
+ 'storage_mode': 'quantized_only' # Default, memory efficient
681
713
  }
682
714
  # Achieves ~16x–32x compression with strong recall for most applications
683
715
  ```
@@ -689,7 +721,8 @@ quantization_config = {
689
721
  'type': 'pq',
690
722
  'subvectors': 16, # More subvectors = better compression
691
723
  'bits': 6, # Fewer bits = less memory per centroid
692
- 'training_size': 20000
724
+ 'training_size': 20000,
725
+ 'storage_mode': 'quantized_only'
693
726
  }
694
727
  # Achieves ~32x compression ratio
695
728
  ```
@@ -701,6 +734,7 @@ quantization_config = {
701
734
  'subvectors': 4, # Fewer subvectors = better accuracy
702
735
  'bits': 8, # More bits = more precise quantization
703
736
  'training_size': 50000 # More training data = better centroids
737
+ 'storage_mode': 'quantized_with_raw' # Keep raw vectors for exact recall
704
738
  }
705
739
  # Achieves ~4x compression ratio with minimal accuracy loss
706
740
  ```
@@ -714,6 +748,10 @@ quantization_config = {
714
748
 
715
749
  Quantization is ideal for production deployments with large vector datasets (100k+ vectors) where memory efficiency is critical.
716
750
 
751
+ `"quantized_only"` is recommended for most use cases and maximizes memory savings.
752
+
753
+ `"quantized_with_raw"` keeps both quantized and raw vectors for exact reconstruction, but uses more memory.
754
+
717
755
 
718
756
  <br/>
719
757
 
@@ -575,12 +575,13 @@ To enable PQ, pass a `quantization_config` dictionary to the `.create()` index m
575
575
  | `bits` | `int` | Bits per quantized code (controls centroids per subvector) | 1-8 | `8` |
576
576
  | `training_size` | `int` | Minimum vectors needed for stable k-means clustering | ≥ 1000 | 1000 |
577
577
  | `max_training_vectors` | `int` | Maximum vectors used during training (optional limit) | ≥ training_size | `None` |
578
+ | `storage_mode` | `str` | Storage strategy: "quantized_only" (memory optimized) or "quantized_with_raw" (keep raw vectors for exact reconstruction) | "quantized_only", "quantized_with_raw" | `"quantized_only"` |
578
579
 
579
580
 
580
581
  <br/>
581
582
 
582
583
 
583
- ### 🔧 Usage Example
584
+ ### 🔧 Usage Example 1
584
585
 
585
586
  ```python
586
587
  from zeusdb_vector_database import VectorDatabase
@@ -646,6 +647,36 @@ Results
646
647
  {'id': 'doc_8148', 'score': 0.5139288306236267, 'metadata': {'category': 'tech', 'year': 2026}},
647
648
  {'id': 'doc_7822', 'score': 0.5151920914649963, 'metadata': {'category': 'tech', 'year': 2026}},
648
649
  ]
650
+ ```
651
+ <br />
652
+
653
+ ### 🔧 Usage Example 2 - with explicit storage mode
654
+
655
+ ```python
656
+ from zeusdb_vector_database import VectorDatabase
657
+ import numpy as np
658
+
659
+ # Create index with product quantization
660
+ vdb = VectorDatabase()
661
+
662
+ # Configure quantization for memory efficiency
663
+ quantization_config = {
664
+ 'type': 'pq', # `pq` for Product Quantization
665
+ 'subvectors': 8, # Divide 1536-dim vectors into 8 subvectors of 192 dims each
666
+ 'bits': 8, # 256 centroids per subvector (2^8)
667
+ 'training_size': 10000, # Train when 10k vectors are collected
668
+ 'max_training_vectors': 50000, # Use max 50k vectors for training
669
+ 'storage_mode': 'quantized_only' # Explicitly set storage mode to only keep quantized values
670
+ }
671
+
672
+ # Create index with quantization
673
+ # This will automatically handle training when enough vectors are added
674
+ index = vdb.create(
675
+ index_type="hnsw",
676
+ dim=3072, # OpenAI `text-embedding-3-large` dimension
677
+ quantization_config=quantization_config # Add the compression configuration
678
+ )
679
+
649
680
  ```
650
681
 
651
682
  <br />
@@ -658,7 +689,8 @@ quantization_config = {
658
689
  'type': 'pq',
659
690
  'subvectors': 8, # Balanced: moderate compression, good accuracy
660
691
  'bits': 8, # 256 centroids per subvector (high precision)
661
- 'training_size': 10000 # Or higher for large datasets
692
+ 'training_size': 10000, # Or higher for large datasets
693
+ 'storage_mode': 'quantized_only' # Default, memory efficient
662
694
  }
663
695
  # Achieves ~16x–32x compression with strong recall for most applications
664
696
  ```
@@ -670,7 +702,8 @@ quantization_config = {
670
702
  'type': 'pq',
671
703
  'subvectors': 16, # More subvectors = better compression
672
704
  'bits': 6, # Fewer bits = less memory per centroid
673
- 'training_size': 20000
705
+ 'training_size': 20000,
706
+ 'storage_mode': 'quantized_only'
674
707
  }
675
708
  # Achieves ~32x compression ratio
676
709
  ```
@@ -682,6 +715,7 @@ quantization_config = {
682
715
  'subvectors': 4, # Fewer subvectors = better accuracy
683
716
  'bits': 8, # More bits = more precise quantization
684
717
  'training_size': 50000 # More training data = better centroids
718
+ 'storage_mode': 'quantized_with_raw' # Keep raw vectors for exact recall
685
719
  }
686
720
  # Achieves ~4x compression ratio with minimal accuracy loss
687
721
  ```
@@ -695,6 +729,10 @@ quantization_config = {
695
729
 
696
730
  Quantization is ideal for production deployments with large vector datasets (100k+ vectors) where memory efficiency is critical.
697
731
 
732
+ `"quantized_only"` is recommended for most use cases and maximizes memory savings.
733
+
734
+ `"quantized_with_raw"` keeps both quantized and raw vectors for exact reconstruction, but uses more memory.
735
+
698
736
 
699
737
  <br/>
700
738
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "zeusdb-vector-database"
3
- version = "0.2.0"
3
+ version = "0.2.1"
4
4
  description = "Blazing-fast vector DB with real-time similarity search and metadata filtering."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -1,7 +1,7 @@
1
1
  """
2
2
  ZeusDB Vector Database Module
3
3
  """
4
- __version__ = "0.2.0"
4
+ __version__ = "0.2.1"
5
5
 
6
6
  from .vector_database import VectorDatabase # imports the VectorDatabase class from the vector_database.py file
7
7
 
@@ -56,7 +56,8 @@ class VectorDatabase:
56
56
  'subvectors': 8, # Number of subvectors (must divide dim evenly, default: 8)
57
57
  'bits': 8, # Bits per subvector (1-8, controls centroids, default: 8)
58
58
  'training_size': None, # Auto-calculated based on subvectors & bits (or specify manually)
59
- 'max_training_vectors': None # Optional limit on training vectors used
59
+ 'max_training_vectors': None, # Optional limit on training vectors used
60
+ 'storage_mode': 'quantized_only' # Storage mode for quantized vectors (or 'quantized_with_raw')
60
61
  }
61
62
 
62
63
  Note: Quantization reduces memory usage (typically 4-32x compression) but may
@@ -88,7 +89,8 @@ class VectorDatabase:
88
89
  'type': 'pq',
89
90
  'subvectors': 16, # More subvectors = better compression
90
91
  'bits': 6, # Fewer bits = less memory per centroid
91
- 'training_size': 75000 # Override auto-calculation
92
+ 'training_size': 75000, # Override auto-calculation
93
+ 'storage_mode': 'quantized_only' # Only store quantized vectors
92
94
  }
93
95
  index = vdb.create(
94
96
  index_type="hnsw",
@@ -126,11 +128,12 @@ class VectorDatabase:
126
128
 
127
129
  try:
128
130
  # Always pass quantization_config parameter
129
- clean_config = None
130
131
  if quantization_config is not None:
131
- # Clean quantization_config before passing to Rust (remove internal keys)
132
- clean_config = {k: v for k, v in quantization_config.items() if not k.startswith('_')}
133
-
132
+ # Remove keys with None values and internal keys
133
+ clean_config = {k: v for k, v in quantization_config.items() if not k.startswith('_') and v is not None}
134
+ else:
135
+ clean_config = None
136
+
134
137
  return constructor(quantization_config=clean_config, **kwargs)
135
138
  except Exception as e:
136
139
  raise RuntimeError(f"Failed to create {index_type.upper()} index: {e}") from e
@@ -172,7 +175,7 @@ class VectorDatabase:
172
175
  if dim % subvectors != 0:
173
176
  raise ValueError(
174
177
  f"subvectors ({subvectors}) must divide dimension ({dim}) evenly. "
175
- f"Consider using subvectors: {self._suggest_subvector_divisors(dim)}"
178
+ f"Consider using subvectors: {', '.join(map(str, self._suggest_subvector_divisors(dim)))}"
176
179
  )
177
180
 
178
181
  if subvectors > dim:
@@ -206,9 +209,38 @@ class VectorDatabase:
206
209
  )
207
210
  validated_config['max_training_vectors'] = max_training_vectors
208
211
 
212
+ # Validate storage mode
213
+ storage_mode = str(validated_config.get('storage_mode', 'quantized_only')).lower()
214
+ valid_modes = {'quantized_only', 'quantized_with_raw'}
215
+ if storage_mode not in valid_modes:
216
+ raise ValueError(
217
+ f"Invalid storage_mode: '{storage_mode}'. Supported modes: {', '.join(sorted(valid_modes))}"
218
+ )
219
+
220
+ validated_config['storage_mode'] = storage_mode
221
+
209
222
  # Calculate and warn about memory usage
210
223
  self._check_memory_usage(validated_config, dim)
224
+
225
+ # Add helpful warnings about storage mode
226
+ if storage_mode == 'quantized_with_raw':
227
+ import warnings
228
+ compression_ratio = validated_config.get('__memory_info__', {}).get('compression_ratio', 1.0)
229
+ warnings.warn(
230
+ f"storage_mode='quantized_with_raw' will use ~{compression_ratio:.1f}x more memory "
231
+ f"than 'quantized_only' but enables exact vector reconstruction.",
232
+ UserWarning,
233
+ stacklevel=2
234
+ )
211
235
 
236
+ # Final safety check: ensure all expected keys are present
237
+ # This is a final defensive programming - all the keys should already be set above, but added just in case
238
+ validated_config.setdefault('type', 'pq')
239
+ validated_config.setdefault('subvectors', 8)
240
+ validated_config.setdefault('bits', 8)
241
+ validated_config.setdefault('max_training_vectors', None)
242
+ validated_config.setdefault('storage_mode', 'quantized_only')
243
+
212
244
  return validated_config
213
245
 
214
246
  def _calculate_smart_training_size(self, subvectors: int, bits: int) -> int:
@@ -236,13 +268,14 @@ class VectorDatabase:
236
268
 
237
269
  return min(max(statistical_minimum, reasonable_minimum), reasonable_maximum)
238
270
 
239
- def _suggest_subvector_divisors(self, dim: int) -> str:
240
- """Suggest valid subvector counts that divide the dimension evenly."""
241
- divisors = []
242
- for i in range(1, min(33, dim + 1)): # Common subvector counts up to 32
243
- if dim % i == 0:
244
- divisors.append(str(i))
245
- return ', '.join(divisors[:8]) # Show first 8 suggestions
271
+
272
+ def _suggest_subvector_divisors(self, dim: int) -> list[int]:
273
+ """Return valid subvector counts that divide the dimension evenly (up to 32)."""
274
+ return [i for i in range(1, min(33, dim + 1)) if dim % i == 0]
275
+
276
+
277
+
278
+
246
279
 
247
280
  def _check_memory_usage(self, config: Dict[str, Any], dim: int) -> None:
248
281
  """
@@ -105,6 +105,26 @@ dependencies = [
105
105
  "serde",
106
106
  ]
107
107
 
108
+ [[package]]
109
+ name = "bincode"
110
+ version = "2.0.1"
111
+ source = "registry+https://github.com/rust-lang/crates.io-index"
112
+ checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740"
113
+ dependencies = [
114
+ "bincode_derive",
115
+ "serde",
116
+ "unty",
117
+ ]
118
+
119
+ [[package]]
120
+ name = "bincode_derive"
121
+ version = "2.0.1"
122
+ source = "registry+https://github.com/rust-lang/crates.io-index"
123
+ checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09"
124
+ dependencies = [
125
+ "virtue",
126
+ ]
127
+
108
128
  [[package]]
109
129
  name = "bitflags"
110
130
  version = "1.3.2"
@@ -282,7 +302,7 @@ checksum = "b53dc5b9b07424143d016ba843c9b510f424e239118697f5d5d582f2d437df41"
282
302
  dependencies = [
283
303
  "anndists",
284
304
  "anyhow",
285
- "bincode",
305
+ "bincode 1.3.3",
286
306
  "cfg-if",
287
307
  "cpu-time",
288
308
  "env_logger",
@@ -728,9 +748,9 @@ dependencies = [
728
748
 
729
749
  [[package]]
730
750
  name = "redox_syscall"
731
- version = "0.5.16"
751
+ version = "0.5.17"
732
752
  source = "registry+https://github.com/rust-lang/crates.io-index"
733
- checksum = "7251471db004e509f4e75a62cca9435365b5ec7bcdff530d612ac7c87c44a792"
753
+ checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
734
754
  dependencies = [
735
755
  "bitflags 2.9.1",
736
756
  ]
@@ -892,12 +912,24 @@ version = "0.2.4"
892
912
  source = "registry+https://github.com/rust-lang/crates.io-index"
893
913
  checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
894
914
 
915
+ [[package]]
916
+ name = "unty"
917
+ version = "0.0.4"
918
+ source = "registry+https://github.com/rust-lang/crates.io-index"
919
+ checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae"
920
+
895
921
  [[package]]
896
922
  name = "utf8parse"
897
923
  version = "0.2.2"
898
924
  source = "registry+https://github.com/rust-lang/crates.io-index"
899
925
  checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
900
926
 
927
+ [[package]]
928
+ name = "virtue"
929
+ version = "0.0.18"
930
+ source = "registry+https://github.com/rust-lang/crates.io-index"
931
+ checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
932
+
901
933
  [[package]]
902
934
  name = "walkdir"
903
935
  version = "2.5.0"
@@ -1124,8 +1156,9 @@ dependencies = [
1124
1156
 
1125
1157
  [[package]]
1126
1158
  name = "zeusdb-vector-database"
1127
- version = "0.2.0"
1159
+ version = "0.2.1"
1128
1160
  dependencies = [
1161
+ "bincode 2.0.1",
1129
1162
  "hnsw_rs",
1130
1163
  "numpy",
1131
1164
  "pyo3",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "zeusdb-vector-database"
3
- version = "0.2.0"
3
+ version = "0.2.1"
4
4
  edition = "2021"
5
5
  resolver = "2" # <-- Avoid compiling unnecessary features from dependencies.
6
6
 
@@ -17,6 +17,7 @@ serde_json = "1.0"
17
17
  serde = { version = "1.0", features = ["derive"] }
18
18
  rayon = "1.10"
19
19
  rand = "0.9.1"
20
+ bincode = "2.0.1"
20
21
 
21
22
  [profile.release]
22
23
  lto = true # <-- Enable Link-Time Optimization
@@ -6,6 +6,7 @@ use std::sync::{Mutex, RwLock, Arc};
6
6
  use hnsw_rs::prelude::{Hnsw, DistCosine, DistL2, DistL1, Distance};
7
7
  use serde_json::Value;
8
8
  use rayon::prelude::*;
9
+ use serde::{Serialize, Deserialize};
9
10
 
10
11
  // Import PQ module
11
12
  use crate::pq::PQ;
@@ -24,15 +25,54 @@ macro_rules! debug_log {
24
25
  }
25
26
 
26
27
 
27
- // Quantization configuration structure
28
- #[derive(Debug, Clone)]
28
+ #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
29
+ pub enum StorageMode {
30
+ #[serde(rename = "quantized_only")]
31
+ QuantizedOnly,
32
+
33
+ #[serde(rename = "quantized_with_raw")]
34
+ QuantizedWithRaw,
35
+ }
36
+
37
+ impl StorageMode {
38
+ pub fn from_string(s: &str) -> Result<Self, String> {
39
+ match s {
40
+ "quantized_only" => Ok(StorageMode::QuantizedOnly),
41
+ "quantized_with_raw" => Ok(StorageMode::QuantizedWithRaw),
42
+ _ => Err(format!(
43
+ "Invalid storage_mode: '{}'. Supported: quantized_only, quantized_with_raw",
44
+ s
45
+ ))
46
+ }
47
+ }
48
+
49
+ pub fn to_string(&self) -> &'static str {
50
+ match self {
51
+ StorageMode::QuantizedOnly => "quantized_only",
52
+ StorageMode::QuantizedWithRaw => "quantized_with_raw",
53
+ }
54
+ }
55
+ }
56
+
57
+ impl Default for StorageMode {
58
+ fn default() -> Self {
59
+ StorageMode::QuantizedOnly
60
+ }
61
+ }
62
+
63
+
64
+ // Updated QuantizationConfig structure
65
+ #[derive(Debug, Clone, Serialize, Deserialize)]
29
66
  pub struct QuantizationConfig {
30
67
  pub subvectors: usize,
31
68
  pub bits: usize,
32
69
  pub training_size: usize,
33
70
  pub max_training_vectors: Option<usize>,
71
+ pub storage_mode: StorageMode,
34
72
  }
35
73
 
74
+
75
+
36
76
  /// Custom distance function for Product Quantization using ADC
37
77
  #[derive(Clone)]
38
78
  pub struct DistPQ {
@@ -435,6 +475,16 @@ impl HNSWIndex {
435
475
  let max_training_vectors = config.get_item("max_training_vectors")?
436
476
  .map(|v| v.extract::<usize>())
437
477
  .transpose()?;
478
+
479
+ // Extract storage_mode
480
+ let storage_mode_str = config.get_item("storage_mode")?
481
+ .map(|v| v.extract::<String>())
482
+ .transpose()?
483
+ .unwrap_or_else(|| "quantized_only".to_string());
484
+
485
+ let storage_mode = StorageMode::from_string(&storage_mode_str)
486
+ .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e))?;
487
+
438
488
 
439
489
  // Validate PQ parameters
440
490
  if dim % subvectors != 0 {
@@ -460,6 +510,7 @@ impl HNSWIndex {
460
510
  bits,
461
511
  training_size,
462
512
  max_training_vectors,
513
+ storage_mode,
463
514
  };
464
515
 
465
516
  // Create PQ instance
@@ -941,7 +992,57 @@ impl HNSWIndex {
941
992
 
942
993
 
943
994
 
944
- /// Get records by ID(s) with PQ reconstruction support
995
+ // /// Get records by ID(s) with PQ reconstruction support
996
+ // #[pyo3(signature = (input, return_vector = true))]
997
+ // pub fn get_records(&self, py: Python<'_>, input: &Bound<PyAny>, return_vector: bool) -> PyResult<Vec<Py<PyDict>>> {
998
+ // let ids: Vec<String> = if let Ok(id_str) = input.extract::<String>() {
999
+ // vec![id_str]
1000
+ // } else if let Ok(id_list) = input.extract::<Vec<String>>() {
1001
+ // id_list
1002
+ // } else {
1003
+ // return Err(PyErr::new::<pyo3::exceptions::PyTypeError, _>(
1004
+ // "Expected a string or a list of strings for ID(s)",
1005
+ // ));
1006
+ // };
1007
+
1008
+ // let mut records = Vec::with_capacity(ids.len());
1009
+
1010
+ // // Use read locks for concurrent access
1011
+ // let vectors = self.vectors.read().unwrap();
1012
+ // let pq_codes = self.pq_codes.read().unwrap();
1013
+ // let vector_metadata = self.vector_metadata.read().unwrap();
1014
+
1015
+ // for id in ids {
1016
+ // if let Some(vector) = vectors.get(&id) {
1017
+ // let metadata = vector_metadata.get(&id).cloned().unwrap_or_default();
1018
+
1019
+ // let dict = PyDict::new(py);
1020
+ // dict.set_item("id", id.clone())?;
1021
+ // dict.set_item("metadata", self.value_map_to_python(&metadata, py)?)?;
1022
+
1023
+ // if return_vector {
1024
+ // // Try raw vector first, then PQ reconstruction
1025
+ // let vector_data = if !vector.is_empty() {
1026
+ // vector.clone()
1027
+ // } else if let (Some(pq), Some(codes)) = (&self.pq, pq_codes.get(&id)) {
1028
+ // pq.reconstruct(codes).unwrap_or_else(|_| vector.clone())
1029
+ // } else {
1030
+ // vector.clone()
1031
+ // };
1032
+
1033
+ // dict.set_item("vector", vector_data)?;
1034
+ // }
1035
+
1036
+ // records.push(dict.into());
1037
+ // }
1038
+ // }
1039
+
1040
+ // Ok(records)
1041
+ // }
1042
+
1043
+
1044
+
1045
+ /// Get records by ID(s) with PQ reconstruction support and storage mode awareness
945
1046
  #[pyo3(signature = (input, return_vector = true))]
946
1047
  pub fn get_records(&self, py: Python<'_>, input: &Bound<PyAny>, return_vector: bool) -> PyResult<Vec<Py<PyDict>>> {
947
1048
  let ids: Vec<String> = if let Ok(id_str) = input.extract::<String>() {
@@ -955,14 +1056,17 @@ impl HNSWIndex {
955
1056
  };
956
1057
 
957
1058
  let mut records = Vec::with_capacity(ids.len());
958
-
1059
+
959
1060
  // Use read locks for concurrent access
960
1061
  let vectors = self.vectors.read().unwrap();
961
1062
  let pq_codes = self.pq_codes.read().unwrap();
962
1063
  let vector_metadata = self.vector_metadata.read().unwrap();
963
1064
 
964
1065
  for id in ids {
965
- if let Some(vector) = vectors.get(&id) {
1066
+ // Check if this ID exists in either storage
1067
+ let exists = vectors.contains_key(&id) || pq_codes.contains_key(&id);
1068
+
1069
+ if exists {
966
1070
  let metadata = vector_metadata.get(&id).cloned().unwrap_or_default();
967
1071
 
968
1072
  let dict = PyDict::new(py);
@@ -970,16 +1074,27 @@ impl HNSWIndex {
970
1074
  dict.set_item("metadata", self.value_map_to_python(&metadata, py)?)?;
971
1075
 
972
1076
  if return_vector {
973
- // Try raw vector first, then PQ reconstruction
974
- let vector_data = if !vector.is_empty() {
975
- vector.clone()
1077
+ // Priority: raw vector > PQ reconstruction
1078
+ let vector_data = if let Some(raw_vector) = vectors.get(&id) {
1079
+ // Case 1: Raw vector available (QuantizedWithRaw mode or non-quantized)
1080
+ Some(raw_vector.clone())
976
1081
  } else if let (Some(pq), Some(codes)) = (&self.pq, pq_codes.get(&id)) {
977
- pq.reconstruct(codes).unwrap_or_else(|_| vector.clone())
1082
+ // Case 2: Only quantized codes available (QuantizedOnly mode)
1083
+ match pq.reconstruct(codes) {
1084
+ Ok(reconstructed) => Some(reconstructed),
1085
+ Err(e) => {
1086
+ eprintln!("Warning: Failed to reconstruct vector for ID {}: {}", id, e);
1087
+ None
1088
+ }
1089
+ }
978
1090
  } else {
979
- vector.clone()
1091
+ // Case 3: No vector data available
1092
+ None
980
1093
  };
981
-
982
- dict.set_item("vector", vector_data)?;
1094
+
1095
+ if let Some(vec) = vector_data {
1096
+ dict.set_item("vector", vec)?;
1097
+ }
983
1098
  }
984
1099
 
985
1100
  records.push(dict.into());
@@ -992,7 +1107,75 @@ impl HNSWIndex {
992
1107
 
993
1108
 
994
1109
 
995
- /// Enhanced get_stats with training info
1110
+
1111
+
1112
+
1113
+
1114
+
1115
+
1116
+
1117
+
1118
+ // /// Enhanced get_stats with training info
1119
+ // pub fn get_stats(&self) -> HashMap<String, String> {
1120
+ // let mut stats = HashMap::new();
1121
+
1122
+ // let vectors = self.vectors.read().unwrap();
1123
+ // let pq_codes = self.pq_codes.read().unwrap();
1124
+ // let vector_count = *self.vector_count.lock().unwrap();
1125
+ // let training_ids = self.training_ids.read().unwrap();
1126
+
1127
+ // // Basic stats
1128
+ // stats.insert("total_vectors".to_string(), vector_count.to_string());
1129
+ // stats.insert("dimension".to_string(), self.dim.to_string());
1130
+ // stats.insert("expected_size".to_string(), self.expected_size.to_string());
1131
+ // stats.insert("space".to_string(), self.space.clone());
1132
+ // stats.insert("index_type".to_string(), "HNSW".to_string());
1133
+
1134
+ // stats.insert("m".to_string(), self.m.to_string());
1135
+ // stats.insert("ef_construction".to_string(), self.ef_construction.to_string());
1136
+ // stats.insert("thread_safety".to_string(), "RwLock+Mutex".to_string());
1137
+
1138
+ // // Storage breakdown
1139
+ // stats.insert("raw_vectors_stored".to_string(), vectors.len().to_string());
1140
+ // stats.insert("quantized_codes_stored".to_string(), pq_codes.len().to_string());
1141
+
1142
+ // // Training info
1143
+ // if let Some(config) = &self.quantization_config {
1144
+ // stats.insert("quantization_type".to_string(), "pq".to_string());
1145
+ // stats.insert("quantization_training_size".to_string(), config.training_size.to_string());
1146
+
1147
+ // let collected_count = training_ids.len();
1148
+ // let progress = self.get_training_progress();
1149
+ // stats.insert("training_progress".to_string(),
1150
+ // format!("{}/{} ({:.1}%)", collected_count, config.training_size, progress));
1151
+
1152
+ // let vectors_needed = self.training_vectors_needed();
1153
+ // stats.insert("training_vectors_needed".to_string(), vectors_needed.to_string());
1154
+ // stats.insert("training_threshold_reached".to_string(),
1155
+ // self.training_threshold_reached.load(Ordering::Acquire).to_string());
1156
+
1157
+ // if let Some(pq) = &self.pq {
1158
+ // let is_trained = pq.is_trained();
1159
+ // stats.insert("quantization_trained".to_string(), is_trained.to_string());
1160
+ // stats.insert("quantization_active".to_string(), self.is_quantized().to_string());
1161
+
1162
+ // if is_trained {
1163
+ // let compression_ratio = (pq.dim * 4) as f64 / pq.subvectors as f64;
1164
+ // stats.insert("quantization_compression_ratio".to_string(), format!("{:.1}x", compression_ratio));
1165
+ // }
1166
+ // }
1167
+ // } else {
1168
+ // stats.insert("quantization_type".to_string(), "none".to_string());
1169
+ // }
1170
+
1171
+ // stats.insert("storage_mode".to_string(), self.get_storage_mode());
1172
+
1173
+ // stats
1174
+ // }
1175
+
1176
+
1177
+
1178
+ /// Enhanced get_stats with storage mode information
996
1179
  pub fn get_stats(&self) -> HashMap<String, String> {
997
1180
  let mut stats = HashMap::new();
998
1181
 
@@ -1021,16 +1204,37 @@ impl HNSWIndex {
1021
1204
  stats.insert("quantization_type".to_string(), "pq".to_string());
1022
1205
  stats.insert("quantization_training_size".to_string(), config.training_size.to_string());
1023
1206
 
1207
+ // Storage mode information
1208
+ stats.insert("storage_mode".to_string(), config.storage_mode.to_string().to_string());
1209
+
1210
+ // Calculate actual memory usage based on storage mode
1211
+ let raw_memory_mb = (vectors.len() * self.dim * 4) as f64 / (1024.0 * 1024.0);
1212
+ let quantized_memory_mb = (pq_codes.len() * config.subvectors) as f64 / (1024.0 * 1024.0);
1213
+
1214
+ stats.insert("raw_vectors_memory_mb".to_string(), format!("{:.2}", raw_memory_mb));
1215
+ stats.insert("quantized_codes_memory_mb".to_string(), format!("{:.2}", quantized_memory_mb));
1216
+
1217
+ match config.storage_mode {
1218
+ StorageMode::QuantizedOnly => {
1219
+ stats.insert("storage_strategy".to_string(), "memory_optimized".to_string());
1220
+ stats.insert("memory_savings".to_string(), "maximum".to_string());
1221
+ }
1222
+ StorageMode::QuantizedWithRaw => {
1223
+ stats.insert("storage_strategy".to_string(), "quality_optimized".to_string());
1224
+ stats.insert("memory_savings".to_string(), "raw_vectors_kept".to_string());
1225
+ }
1226
+ }
1227
+
1024
1228
  let collected_count = training_ids.len();
1025
1229
  let progress = self.get_training_progress();
1026
- stats.insert("training_progress".to_string(),
1230
+ stats.insert("training_progress".to_string(),
1027
1231
  format!("{}/{} ({:.1}%)", collected_count, config.training_size, progress));
1028
1232
 
1029
1233
  let vectors_needed = self.training_vectors_needed();
1030
1234
  stats.insert("training_vectors_needed".to_string(), vectors_needed.to_string());
1031
- stats.insert("training_threshold_reached".to_string(),
1235
+ stats.insert("training_threshold_reached".to_string(),
1032
1236
  self.training_threshold_reached.load(Ordering::Acquire).to_string());
1033
-
1237
+
1034
1238
  if let Some(pq) = &self.pq {
1035
1239
  let is_trained = pq.is_trained();
1036
1240
  stats.insert("quantization_trained".to_string(), is_trained.to_string());
@@ -1043,12 +1247,68 @@ impl HNSWIndex {
1043
1247
  }
1044
1248
  } else {
1045
1249
  stats.insert("quantization_type".to_string(), "none".to_string());
1250
+ stats.insert("storage_mode".to_string(), "raw_only".to_string());
1046
1251
  }
1047
1252
 
1048
- stats.insert("storage_mode".to_string(), self.get_storage_mode());
1253
+ stats.insert("storage_mode_description".to_string(), self.get_storage_mode());
1049
1254
 
1050
1255
  stats
1051
1256
  }
1257
+
1258
+
1259
+
1260
+
1261
+
1262
+
1263
+
1264
+
1265
+
1266
+
1267
+
1268
+
1269
+
1270
+
1271
+
1272
+
1273
+
1274
+
1275
+
1276
+
1277
+
1278
+
1279
+
1280
+
1281
+
1282
+
1283
+
1284
+
1285
+
1286
+
1287
+
1288
+
1289
+
1290
+
1291
+
1292
+
1293
+
1294
+
1295
+
1296
+
1297
+
1298
+
1299
+
1300
+
1301
+
1302
+
1303
+
1304
+
1305
+
1306
+
1307
+
1308
+
1309
+
1310
+
1311
+
1052
1312
 
1053
1313
 
1054
1314
 
@@ -1413,7 +1673,61 @@ impl HNSWIndex {
1413
1673
  Ok(())
1414
1674
  }
1415
1675
 
1416
- /// Path C: Quantized storage (trained and active)
1676
+ // /// Path C: Quantized storage (trained and active)
1677
+ // fn add_quantized_vector(
1678
+ // &mut self,
1679
+ // id: String,
1680
+ // vector: Vec<f32>, // Already processed
1681
+ // metadata: HashMap<String, Value>
1682
+ // ) -> PyResult<()> {
1683
+ // let internal_id = self.get_next_id();
1684
+
1685
+ // // Store metadata
1686
+ // {
1687
+ // let mut vector_metadata = self.vector_metadata.write().unwrap();
1688
+ // vector_metadata.insert(id.clone(), metadata);
1689
+ // }
1690
+
1691
+ // // Update ID mappings
1692
+ // {
1693
+ // let mut id_map = self.id_map.write().unwrap();
1694
+ // let mut rev_map = self.rev_map.write().unwrap();
1695
+
1696
+ // id_map.insert(id.clone(), internal_id);
1697
+ // rev_map.insert(internal_id, id.clone());
1698
+ // }
1699
+
1700
+ // // Quantize the vector
1701
+ // let pq = self.pq.as_ref().unwrap();
1702
+ // let codes = pq.quantize(&vector).map_err(|e| {
1703
+ // PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
1704
+ // format!("Failed to quantize vector: {}", e)
1705
+ // )
1706
+ // })?;
1707
+
1708
+ // // Store quantized codes
1709
+ // {
1710
+ // let mut pq_codes = self.pq_codes.write().unwrap();
1711
+ // pq_codes.insert(id.clone(), codes.clone());
1712
+ // }
1713
+
1714
+ // // Store raw vector for exact reconstruction (persistence-ready)
1715
+ // {
1716
+ // let mut vectors = self.vectors.write().unwrap();
1717
+ // vectors.insert(id, vector.clone());
1718
+ // }
1719
+
1720
+ // // Insert codes into quantized HNSW
1721
+ // {
1722
+ // let mut hnsw_guard = self.hnsw.lock().unwrap();
1723
+ // hnsw_guard.insert_pq_codes(&codes, internal_id);
1724
+ // }
1725
+
1726
+ // Ok(())
1727
+ // }
1728
+
1729
+
1730
+ /// Path C: Quantized storage with configurable raw vector retention
1417
1731
  fn add_quantized_vector(
1418
1732
  &mut self,
1419
1733
  id: String,
@@ -1445,16 +1759,19 @@ impl HNSWIndex {
1445
1759
  )
1446
1760
  })?;
1447
1761
 
1448
- // Store quantized codes
1762
+ // Store quantized codes (always)
1449
1763
  {
1450
1764
  let mut pq_codes = self.pq_codes.write().unwrap();
1451
1765
  pq_codes.insert(id.clone(), codes.clone());
1452
1766
  }
1453
1767
 
1454
- // Store raw vector for exact reconstruction (persistence-ready)
1455
- {
1456
- let mut vectors = self.vectors.write().unwrap();
1457
- vectors.insert(id, vector.clone());
1768
+ // Store raw vector only if configured to keep them
1769
+ if let Some(config) = &self.quantization_config {
1770
+ if config.storage_mode == StorageMode::QuantizedWithRaw {
1771
+ let mut vectors = self.vectors.write().unwrap();
1772
+ vectors.insert(id.clone(), vector.clone());
1773
+ }
1774
+ // If QuantizedOnly mode, we don't store raw vectors (saves memory)
1458
1775
  }
1459
1776
 
1460
1777
  // Insert codes into quantized HNSW
@@ -1466,6 +1783,41 @@ impl HNSWIndex {
1466
1783
  Ok(())
1467
1784
  }
1468
1785
 
1786
+
1787
+
1788
+
1789
+
1790
+
1791
+
1792
+
1793
+
1794
+
1795
+
1796
+
1797
+
1798
+
1799
+
1800
+
1801
+
1802
+
1803
+
1804
+
1805
+
1806
+
1807
+
1808
+
1809
+
1810
+
1811
+
1812
+
1813
+
1814
+
1815
+
1816
+
1817
+
1818
+
1819
+
1820
+
1469
1821
  /// TRAINING TRIGGER: Uses threshold flag for race condition safety
1470
1822
  fn maybe_trigger_training(&mut self) -> Result<(), String> {
1471
1823
  // Check atomic flag first (fast path)