vecforge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vecforge/core/vault.py ADDED
@@ -0,0 +1,760 @@
1
+ # VecForge — Universal Local-First Vector Database
2
+ # Copyright (c) 2026 Suneel Bose K · ArcGX TechLabs Private Limited
3
+ # Built by Suneel Bose K (Founder & CEO, ArcGX TechLabs)
4
+ #
5
+ # Licensed under the Business Source License 1.1 (BSL 1.1)
6
+ # Free for personal, research, open-source, and non-commercial use.
7
+ # Commercial use requires a separate license from ArcGX TechLabs.
8
+ # See LICENSE file in the project root or contact: suneelbose@arcgx.in
9
+
10
+ """
11
+ Main VecForge class — the 5-line API surface.
12
+
13
+ This is the primary entry point for all VecForge operations. Designed
14
+ to make any core feature usable in 5 lines of Python or fewer.
15
+
16
+ Built by Suneel Bose K · ArcGX TechLabs Private Limited.
17
+
18
+ Example::
19
+
20
+ from vecforge import VecForge
21
+
22
+ db = VecForge("my_vault")
23
+ db.add("Patient admitted with type 2 diabetes")
24
+ results = db.search("diabetic patient")
25
+ print(results[0].text)
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import logging
31
+ import time
32
+ from dataclasses import dataclass, field
33
+ from typing import Any
34
+
35
+ import numpy as np
36
+
37
+ from vecforge.core.bm25 import BM25Engine
38
+ from vecforge.core.embedder import Embedder
39
+ from vecforge.core.indexer import FaissIndexer
40
+ from vecforge.core.reranker import Reranker
41
+ from vecforge.core.storage import StorageBackend
42
+ from vecforge.exceptions import (
43
+ DeletionProtectedError,
44
+ InvalidAlphaError,
45
+ VaultEmptyError,
46
+ )
47
+ from vecforge.search.cascade import CascadeSearcher
48
+ from vecforge.search.filters import MetadataFilter
49
+ from vecforge.security.audit import AuditLogger
50
+ from vecforge.security.encryption import validate_encryption_key
51
+ from vecforge.security.namespaces import NamespaceManager
52
+ from vecforge.security.rbac import RBACManager
53
+
54
+ logger = logging.getLogger(__name__)
55
+
56
+
57
+ @dataclass
58
+ class SearchResult:
59
+ """A single search result from VecForge.
60
+
61
+ Built by Suneel Bose K · ArcGX TechLabs Private Limited.
62
+
63
+ Attributes:
64
+ text: Document text content.
65
+ score: Relevance score (higher = more relevant).
66
+ metadata: User-provided metadata dictionary.
67
+ namespace: Namespace this document belongs to.
68
+ doc_id: Unique document identifier.
69
+ modality: Content modality (text, image, audio, etc.).
70
+ timestamp: Document creation timestamp.
71
+ """
72
+
73
+ text: str
74
+ score: float
75
+ metadata: dict[str, Any] = field(default_factory=dict)
76
+ namespace: str = "default"
77
+ doc_id: str = ""
78
+ modality: str = "text"
79
+ timestamp: float = 0.0
80
+
81
+ def __repr__(self) -> str:
82
+ preview = self.text[:80] + "..." if len(self.text) > 80 else self.text
83
+ return (
84
+ f"SearchResult(score={self.score:.4f}, "
85
+ f"doc_id='{self.doc_id[:8]}...', "
86
+ f"text='{preview}')"
87
+ )
88
+
89
+
90
+ class VecForge:
91
+ """Universal local-first vector database.
92
+
93
+ The 5-line API: create, ingest, search, done.
94
+ Every feature is usable in 5 lines of Python or fewer.
95
+
96
+ Built by Suneel Bose K, Founder & CEO, ArcGX TechLabs Private Limited.
97
+
98
+ Args:
99
+ path: Vault path. Use ':memory:' for in-memory storage.
100
+ encryption_key: SQLCipher AES-256 encryption key. Use
101
+ ``os.environ['VECFORGE_KEY']`` — never hardcode.
102
+ audit_log: Path to JSONL audit log file. If None, auditing off.
103
+ quantum: Enable quantum-inspired acceleration. Defaults to False.
104
+ deletion_protection: Prevent accidental deletions. Defaults to False.
105
+ api_key: API key for RBAC. None = local admin.
106
+ model_name: Embedding model name. Defaults to 'all-MiniLM-L6-v2'.
107
+
108
+ Performance:
109
+ Init: O(1) — lazy model loading
110
+ Search: O(log N) + O(k) rerank
111
+ Add: O(d) embedding + O(1) storage
112
+
113
+ Example:
114
+ >>> from vecforge import VecForge
115
+ >>> db = VecForge("my_vault")
116
+ >>> db.add("Patient admitted with type 2 diabetes", metadata={"ward": "7"})
117
+ >>> results = db.search("diabetic patient")
118
+ >>> print(results[0].text)
119
+ 'Patient admitted with type 2 diabetes'
120
+ """
121
+
122
+ def __init__(
123
+ self,
124
+ path: str = ":memory:",
125
+ encryption_key: str | None = None,
126
+ audit_log: str | None = None,
127
+ quantum: bool = False,
128
+ deletion_protection: bool = False,
129
+ api_key: str | None = None,
130
+ model_name: str = "all-MiniLM-L6-v2",
131
+ ) -> None:
132
+ self._path = path
133
+ self._quantum = quantum
134
+ self._deletion_protection = deletion_protection
135
+
136
+ # security: Validate encryption key
137
+ validated_key = validate_encryption_key(encryption_key)
138
+
139
+ # ─── Initialize subsystems ───
140
+ self._storage = StorageBackend(path=path, encryption_key=validated_key)
141
+ self._embedder = Embedder(model_name=model_name)
142
+ self._indexer: FaissIndexer | None = None # why: Lazy — needs dimension
143
+ self._bm25 = BM25Engine()
144
+ self._reranker = Reranker()
145
+ self._rbac = RBACManager(api_key=api_key)
146
+ self._audit = AuditLogger(log_path=audit_log)
147
+ self._namespace_mgr = NamespaceManager(self._storage)
148
+
149
+ # why: Document index tracking — maps FAISS index → doc_id
150
+ self._index_to_doc_id: list[str] = []
151
+
152
+ # why: Try to restore from persisted state
153
+ self._restore_state()
154
+
155
+ logger.info(
156
+ "VecForge vault opened: %s (encrypted=%s, quantum=%s)",
157
+ path,
158
+ self._storage.is_encrypted,
159
+ quantum,
160
+ )
161
+
162
+ def _restore_state(self) -> None:
163
+ """Restore FAISS index and BM25 from persisted storage.
164
+
165
+ Performance:
166
+ Time: O(N * d) where N = docs, d = dimension
167
+ """
168
+ # why: Load all docs to rebuild in-memory indexes
169
+ docs = self._storage.get_all_docs()
170
+ if not docs:
171
+ return
172
+
173
+ # why: Initialize FAISS with first doc's embedding dimension
174
+ dim = len(docs[0].embedding)
175
+ self._indexer = FaissIndexer(dimension=dim)
176
+
177
+ # perf: Batch add all embeddings at once
178
+ embeddings = np.stack([d.embedding for d in docs])
179
+ self._indexer.add(embeddings)
180
+
181
+ # why: Rebuild BM25 corpus
182
+ self._bm25.add_documents([d.text for d in docs])
183
+
184
+ # why: Rebuild index → doc_id mapping
185
+ self._index_to_doc_id = [d.doc_id for d in docs]
186
+
187
+ logger.info("Restored %d documents from storage", len(docs))
188
+
189
+ def _ensure_indexer(self) -> FaissIndexer:
190
+ """Ensure FAISS indexer is initialized.
191
+
192
+ Returns:
193
+ FaissIndexer instance.
194
+
195
+ Performance:
196
+ Time: O(1) if already initialized, O(model_load) on first call
197
+ """
198
+ if self._indexer is None:
199
+ dim = self._embedder.dimension
200
+ self._indexer = FaissIndexer(dimension=dim)
201
+ return self._indexer
202
+
203
+ def add(
204
+ self,
205
+ text: str,
206
+ metadata: dict[str, Any] | None = None,
207
+ namespace: str = "default",
208
+ doc_id: str | None = None,
209
+ ) -> str:
210
+ """Add a text document to the vault.
211
+
212
+ Args:
213
+ text: Document text content.
214
+ metadata: Optional metadata dictionary.
215
+ namespace: Target namespace. Defaults to 'default'.
216
+ doc_id: Optional custom document ID.
217
+
218
+ Returns:
219
+ Document ID (UUID string).
220
+
221
+ Raises:
222
+ VecForgePermissionError: If API key lacks write permission.
223
+
224
+ Performance:
225
+ Time: O(d) for embedding + O(1) for storage
226
+ Typical: ~10ms per document
227
+
228
+ Example:
229
+ >>> doc_id = db.add(
230
+ ... "Patient P4821 — Type 2 diabetes",
231
+ ... metadata={"ward": "7", "year": 2024},
232
+ ... namespace="ward_7"
233
+ ... )
234
+ """
235
+ # security: Check write permission
236
+ self._rbac.require("write")
237
+
238
+ if metadata is None:
239
+ metadata = {}
240
+
241
+ # why: Ensure namespace exists, create if needed
242
+ if not self._namespace_mgr.exists(namespace):
243
+ self._namespace_mgr.create(namespace)
244
+
245
+ # ─── Embed the text ───
246
+ embedding = self._embedder.encode(text)
247
+ embedding_vec = embedding[0] # shape: (dimension,)
248
+
249
+ # ─── Store in SQLite ───
250
+ stored = self._storage.insert_doc(
251
+ text=text,
252
+ embedding=embedding_vec,
253
+ metadata=metadata,
254
+ namespace=namespace,
255
+ doc_id=doc_id,
256
+ )
257
+
258
+ # ─── Update in-memory indexes ───
259
+ indexer = self._ensure_indexer()
260
+ indexer.add(embedding_vec.reshape(1, -1))
261
+ self._bm25.add_document(text)
262
+ self._index_to_doc_id.append(stored.doc_id)
263
+
264
+ # security: Emit audit event
265
+ self._audit.log(
266
+ actor=self._rbac.key_id,
267
+ operation="add",
268
+ doc_id=stored.doc_id,
269
+ namespace=namespace,
270
+ metadata={"chars": len(text)},
271
+ )
272
+
273
+ logger.debug("Added doc %s to namespace '%s'", stored.doc_id, namespace)
274
+ return stored.doc_id
275
+
276
+ def add_batch(
277
+ self,
278
+ texts: list[str],
279
+ metadata_list: list[dict[str, Any]] | None = None,
280
+ namespace: str = "default",
281
+ ) -> list[str]:
282
+ """Add multiple documents in one efficient batch operation.
283
+
284
+ Embeds all texts in a single model call and batch-inserts into
285
+ FAISS, achieving ~3-5x throughput vs. sequential add().
286
+
287
+ Built by Suneel Bose K · ArcGX TechLabs Private Limited.
288
+
289
+ Args:
290
+ texts: List of document text strings.
291
+ metadata_list: Optional list of metadata dicts (one per
292
+ text). If None, all documents get empty metadata.
293
+ namespace: Target namespace. Defaults to 'default'.
294
+
295
+ Returns:
296
+ List of document IDs.
297
+
298
+ Raises:
299
+ VecForgePermissionError: If API key lacks write access.
300
+ ValueError: If metadata_list length != texts length.
301
+
302
+ Performance:
303
+ Time: O(B * d) single model call + O(B) storage
304
+ Typical: ~5x faster than sequential add() for B > 10
305
+
306
+ Example:
307
+ >>> ids = db.add_batch(
308
+ ... ["First doc", "Second doc", "Third doc"],
309
+ ... namespace="bulk",
310
+ ... )
311
+ >>> len(ids)
312
+ 3
313
+ """
314
+ # security: Check write permission
315
+ self._rbac.require("write")
316
+
317
+ if not texts:
318
+ return []
319
+
320
+ if metadata_list is None:
321
+ metadata_list = [{} for _ in texts]
322
+
323
+ if len(metadata_list) != len(texts):
324
+ raise ValueError(
325
+ f"metadata_list length ({len(metadata_list)}) "
326
+ f"must match texts length ({len(texts)})"
327
+ )
328
+
329
+ # why: Ensure namespace exists
330
+ if not self._namespace_mgr.exists(namespace):
331
+ self._namespace_mgr.create(namespace)
332
+
333
+ # perf: Batch embed all texts in one model call
334
+ embeddings = self._embedder.encode(texts)
335
+
336
+ # ─── Store & index each doc ───
337
+ indexer = self._ensure_indexer()
338
+ doc_ids: list[str] = []
339
+
340
+ for i, (text, meta) in enumerate(zip(texts, metadata_list, strict=False)):
341
+ stored = self._storage.insert_doc(
342
+ text=text,
343
+ embedding=embeddings[i],
344
+ metadata=meta,
345
+ namespace=namespace,
346
+ )
347
+ self._bm25.add_document(text)
348
+ self._index_to_doc_id.append(stored.doc_id)
349
+ doc_ids.append(stored.doc_id)
350
+
351
+ # perf: Batch add all embeddings to FAISS
352
+ indexer.add(embeddings)
353
+
354
+ # security: Audit batch operation
355
+ self._audit.log(
356
+ actor=self._rbac.key_id,
357
+ operation="add_batch",
358
+ namespace=namespace,
359
+ metadata={
360
+ "count": len(texts),
361
+ "chars": sum(len(t) for t in texts),
362
+ },
363
+ )
364
+
365
+ logger.info(
366
+ "Batch added %d docs to '%s'",
367
+ len(texts),
368
+ namespace,
369
+ )
370
+ return doc_ids
371
+
372
+ def search(
373
+ self,
374
+ query: str,
375
+ top_k: int = 10,
376
+ alpha: float = 0.5,
377
+ rerank: bool = False,
378
+ namespace: str | None = None,
379
+ filters: dict[str, Any] | None = None,
380
+ recency_weight: float = 0.0,
381
+ ) -> list[SearchResult]:
382
+ """Perform hybrid cascading search across the vault.
383
+
384
+ Runs a 4-stage pipeline: FAISS dense retrieval → BM25 keyword
385
+ merge → metadata filter → optional cross-encoder reranking.
386
+
387
+ Built by Suneel Bose K · ArcGX TechLabs Private Limited.
388
+
389
+ Args:
390
+ query: Natural language search query.
391
+ top_k: Number of results to return. Defaults to 10.
392
+ alpha: Semantic weight (0.0 = keyword only, 1.0 = semantic only).
393
+ Defaults to 0.5 (balanced hybrid).
394
+ rerank: If True, applies cross-encoder reranking.
395
+ Adds ~20-50ms. Defaults to False.
396
+ namespace: Restrict to this namespace. None = all accessible.
397
+ filters: Metadata key-value filters.
398
+ E.g. {"type": "NDA", "year": {"gte": 2023}}.
399
+ recency_weight: Weight for document recency (0.0–1.0).
400
+
401
+ Returns:
402
+ List of SearchResult sorted by descending relevance.
403
+
404
+ Raises:
405
+ VaultEmptyError: If vault contains no documents.
406
+ InvalidAlphaError: If alpha outside [0.0, 1.0].
407
+ NamespaceNotFoundError: If namespace does not exist.
408
+ VecForgePermissionError: If API key lacks read permission.
409
+
410
+ Performance:
411
+ Time: O(log N) FAISS + O(k) rerank where k << N
412
+ Typical: <15ms at 100k docs, <50ms at 1M docs
413
+
414
+ Example:
415
+ >>> results = db.search(
416
+ ... "elderly diabetic hip fracture",
417
+ ... namespace="ward_7",
418
+ ... rerank=True,
419
+ ... top_k=5,
420
+ ... )
421
+ >>> print(results[0].text)
422
+ """
423
+ # security: Check read permission
424
+ self._rbac.require("read")
425
+
426
+ # why: Validate alpha range
427
+ if not 0.0 <= alpha <= 1.0:
428
+ raise InvalidAlphaError(alpha)
429
+
430
+ # why: Validate namespace if specified
431
+ if namespace is not None:
432
+ self._namespace_mgr.validate(namespace)
433
+
434
+ # why: Check vault has data
435
+ indexer = self._ensure_indexer()
436
+ if indexer.count == 0:
437
+ raise VaultEmptyError(self._path)
438
+
439
+ # ─── Embed query ───
440
+ query_vector = self._embedder.encode(query)[0]
441
+
442
+ # ─── Run cascade search ───
443
+ cascade = CascadeSearcher(
444
+ indexer=indexer,
445
+ bm25=self._bm25,
446
+ reranker=self._reranker if rerank else None,
447
+ )
448
+
449
+ # why: Get more candidates for namespace/metadata filtering
450
+ fetch_k = top_k * 4
451
+ candidates = cascade.search(
452
+ query_vector=query_vector,
453
+ query_text=query,
454
+ top_k=fetch_k,
455
+ alpha=alpha,
456
+ rerank=False, # why: We'll rerank after hydration
457
+ )
458
+
459
+ # ─── Hydrate candidates with full doc data ───
460
+ results: list[SearchResult] = []
461
+ for candidate in candidates:
462
+ if candidate.doc_index >= len(self._index_to_doc_id):
463
+ continue
464
+
465
+ doc_id = self._index_to_doc_id[candidate.doc_index]
466
+ doc = self._storage.get_doc(doc_id)
467
+ if doc is None:
468
+ continue
469
+
470
+ # security: Namespace filtering
471
+ if namespace is not None and doc.namespace != namespace:
472
+ continue
473
+
474
+ results.append(
475
+ SearchResult(
476
+ text=doc.text,
477
+ score=candidate.score,
478
+ metadata=doc.metadata,
479
+ namespace=doc.namespace,
480
+ doc_id=doc.doc_id,
481
+ modality=doc.modality,
482
+ timestamp=doc.created_at,
483
+ )
484
+ )
485
+
486
+ # ─── Metadata filtering ───
487
+ if filters:
488
+ meta_filter = MetadataFilter(filters)
489
+ results = [r for r in results if meta_filter.matches(r.metadata)]
490
+
491
+ # ─── Recency weighting ───
492
+ if recency_weight > 0.0 and results:
493
+ now = time.time()
494
+ max_age = max((now - r.timestamp) for r in results) or 1.0
495
+
496
+ for r in results:
497
+ age_factor = 1.0 - ((now - r.timestamp) / max_age)
498
+ r.score = (1.0 - recency_weight) * r.score + recency_weight * age_factor
499
+
500
+ results.sort(key=lambda r: r.score, reverse=True)
501
+
502
+ # ─── Cross-encoder reranking (final pass) ───
503
+ if rerank and results:
504
+ reranked = self._reranker.rerank(
505
+ query,
506
+ [r.text for r in results],
507
+ top_k=top_k,
508
+ )
509
+ reranked_results = []
510
+ for orig_idx, rerank_score in reranked:
511
+ if orig_idx < len(results):
512
+ r = results[orig_idx]
513
+ r.score = rerank_score
514
+ reranked_results.append(r)
515
+ results = reranked_results
516
+
517
+ # ─── Trim to top_k ───
518
+ results = results[:top_k]
519
+
520
+ # security: Audit the search
521
+ self._audit.log(
522
+ actor=self._rbac.key_id,
523
+ operation="search",
524
+ namespace=namespace,
525
+ metadata={"query": query, "top_k": top_k, "results": len(results)},
526
+ )
527
+
528
+ return results
529
+
530
+ def ingest(
531
+ self,
532
+ path: str,
533
+ namespace: str = "default",
534
+ chunk_size: int = 1000,
535
+ chunk_overlap: int = 200,
536
+ ) -> int:
537
+ """Ingest documents from a file or directory.
538
+
539
+ Auto-detects format and chunks documents for embedding.
540
+ Supports: .txt, .md, .pdf, .docx, .html
541
+
542
+ Built by Suneel Bose K · ArcGX TechLabs Private Limited.
543
+
544
+ Args:
545
+ path: File or directory path to ingest.
546
+ namespace: Target namespace. Defaults to 'default'.
547
+ chunk_size: Max chars per chunk. Defaults to 1000.
548
+ chunk_overlap: Overlap between chunks. Defaults to 200.
549
+
550
+ Returns:
551
+ Number of chunks ingested.
552
+
553
+ Raises:
554
+ VecForgePermissionError: If API key lacks write permission.
555
+
556
+ Performance:
557
+ Time: O(F * S) where F = files, S = avg file size
558
+ Typical: <5min for 1000 PDFs
559
+
560
+ Example:
561
+ >>> count = db.ingest("my_documents/", namespace="legal")
562
+ >>> print(f"Ingested {count} chunks")
563
+ """
564
+ # security: Check write permission
565
+ self._rbac.require("write")
566
+
567
+ from vecforge.ingest.dispatcher import IngestDispatcher
568
+
569
+ dispatcher = IngestDispatcher(
570
+ chunk_size=chunk_size,
571
+ chunk_overlap=chunk_overlap,
572
+ )
573
+ chunks = dispatcher.ingest(path)
574
+
575
+ count = 0
576
+ for chunk in chunks:
577
+ self.add(
578
+ text=chunk.text,
579
+ metadata=chunk.metadata,
580
+ namespace=namespace,
581
+ )
582
+ count += 1
583
+
584
+ logger.info("Ingested %d chunks from %s", count, path)
585
+ return count
586
+
587
+ def delete(self, doc_id: str) -> bool:
588
+ """Delete a document by ID.
589
+
590
+ Args:
591
+ doc_id: Document identifier.
592
+
593
+ Returns:
594
+ True if document was deleted.
595
+
596
+ Raises:
597
+ DeletionProtectedError: If vault has deletion protection.
598
+ VecForgePermissionError: If API key lacks delete permission.
599
+
600
+ Performance:
601
+ Time: O(log N)
602
+
603
+ Example:
604
+ >>> db.delete("a1b2c3d4-...")
605
+ True
606
+ """
607
+ # security: Check delete permission
608
+ self._rbac.require("delete")
609
+
610
+ if self._deletion_protection:
611
+ raise DeletionProtectedError(doc_id)
612
+
613
+ deleted = self._storage.delete_doc(doc_id)
614
+
615
+ if deleted:
616
+ # security: Audit the deletion
617
+ self._audit.log(
618
+ actor=self._rbac.key_id,
619
+ operation="delete",
620
+ doc_id=doc_id,
621
+ )
622
+
623
+ # why: Rebuild in-memory indexes for consistency
624
+ self._rebuild_indexes()
625
+
626
+ return deleted
627
+
628
+ def _rebuild_indexes(self) -> None:
629
+ """Rebuild FAISS and BM25 indexes from storage.
630
+
631
+ Called after deletions to maintain consistency.
632
+
633
+ Performance:
634
+ Time: O(N * d)
635
+ """
636
+ docs = self._storage.get_all_docs()
637
+
638
+ if not docs:
639
+ if self._indexer is not None:
640
+ self._indexer.reset()
641
+ self._bm25.reset()
642
+ self._index_to_doc_id = []
643
+ return
644
+
645
+ dim = len(docs[0].embedding)
646
+ self._indexer = FaissIndexer(dimension=dim)
647
+
648
+ embeddings = np.stack([d.embedding for d in docs])
649
+ self._indexer.add(embeddings)
650
+
651
+ self._bm25 = BM25Engine()
652
+ self._bm25.add_documents([d.text for d in docs])
653
+
654
+ self._index_to_doc_id = [d.doc_id for d in docs]
655
+
656
+ def create_namespace(self, name: str) -> None:
657
+ """Create a new namespace for tenant isolation.
658
+
659
+ Args:
660
+ name: Namespace name.
661
+
662
+ Raises:
663
+ VecForgePermissionError: If API key lacks create_namespace.
664
+
665
+ Performance:
666
+ Time: O(1)
667
+
668
+ Example:
669
+ >>> db.create_namespace("ward_7")
670
+ """
671
+ self._rbac.require("create_namespace")
672
+ self._namespace_mgr.create(name)
673
+
674
+ self._audit.log(
675
+ actor=self._rbac.key_id,
676
+ operation="create_namespace",
677
+ namespace=name,
678
+ )
679
+
680
+ def list_namespaces(self) -> list[str]:
681
+ """List all namespaces in the vault.
682
+
683
+ Returns:
684
+ Sorted list of namespace names.
685
+
686
+ Performance:
687
+ Time: O(K)
688
+ """
689
+ return self._namespace_mgr.list_all()
690
+
691
+ def stats(self) -> dict[str, Any]:
692
+ """Get vault statistics.
693
+
694
+ Returns:
695
+ Dictionary with vault metadata and statistics.
696
+
697
+ Performance:
698
+ Time: O(1)
699
+
700
+ Example:
701
+ >>> db.stats()
702
+ {'documents': 1500, 'namespaces': ['default', 'ward_7'], ...}
703
+ """
704
+ namespaces = self.list_namespaces()
705
+ ns_counts = {}
706
+ for ns in namespaces:
707
+ ns_counts[ns] = self._storage.count_docs(namespace=ns)
708
+
709
+ return {
710
+ "path": self._path,
711
+ "documents": self._storage.count_docs(),
712
+ "namespaces": namespaces,
713
+ "namespace_counts": ns_counts,
714
+ "encrypted": self._storage.is_encrypted,
715
+ "quantum": self._quantum,
716
+ "deletion_protection": self._deletion_protection,
717
+ "index_vectors": self._indexer.count if self._indexer else 0,
718
+ "bm25_documents": self._bm25.count,
719
+ "built_by": "Suneel Bose K · ArcGX TechLabs Private Limited",
720
+ }
721
+
722
+ def save(self) -> None:
723
+ """Persist FAISS index to storage for durability.
724
+
725
+ Performance:
726
+ Time: O(N * d)
727
+ """
728
+ if self._indexer is not None and self._indexer.count > 0:
729
+ index_data = self._indexer.to_bytes()
730
+ self._storage.save_faiss_index(
731
+ index_data=index_data,
732
+ dimension=self._indexer.dimension,
733
+ count=self._indexer.count,
734
+ )
735
+ logger.info("FAISS index saved (%d vectors)", self._indexer.count)
736
+
737
+ def close(self) -> None:
738
+ """Save state and close the vault.
739
+
740
+ Performance:
741
+ Time: O(N * d) for index save + O(1) for connection close
742
+ """
743
+ self.save()
744
+ self._storage.close()
745
+ logger.info("VecForge vault closed: %s", self._path)
746
+
747
+ def __enter__(self) -> VecForge:
748
+ """Context manager entry."""
749
+ return self
750
+
751
+ def __exit__(self, *args: Any) -> None:
752
+ """Context manager exit — auto-save and close."""
753
+ self.close()
754
+
755
+ def __repr__(self) -> str:
756
+ count = self._storage.count_docs()
757
+ return (
758
+ f"VecForge(path='{self._path}', docs={count}, "
759
+ f"encrypted={self._storage.is_encrypted})"
760
+ )