vectrixdb 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. vectrixdb/__init__.py +366 -0
  2. vectrixdb/api/__init__.py +9 -0
  3. vectrixdb/api/server.py +1983 -0
  4. vectrixdb/benchmarks/__init__.py +22 -0
  5. vectrixdb/benchmarks/datasets.py +329 -0
  6. vectrixdb/benchmarks/metrics.py +271 -0
  7. vectrixdb/benchmarks/reports.py +356 -0
  8. vectrixdb/benchmarks/runner.py +390 -0
  9. vectrixdb/cli.py +329 -0
  10. vectrixdb/core/__init__.py +114 -0
  11. vectrixdb/core/advanced_search.py +923 -0
  12. vectrixdb/core/batch/__init__.py +22 -0
  13. vectrixdb/core/batch/memory.py +393 -0
  14. vectrixdb/core/batch/parallel.py +301 -0
  15. vectrixdb/core/batch/streaming.py +445 -0
  16. vectrixdb/core/cache.py +620 -0
  17. vectrixdb/core/collection.py +2164 -0
  18. vectrixdb/core/database.py +1105 -0
  19. vectrixdb/core/graphrag/__init__.py +120 -0
  20. vectrixdb/core/graphrag/chunker.py +384 -0
  21. vectrixdb/core/graphrag/config.py +303 -0
  22. vectrixdb/core/graphrag/extractor/__init__.py +239 -0
  23. vectrixdb/core/graphrag/extractor/base.py +424 -0
  24. vectrixdb/core/graphrag/extractor/hybrid_extractor.py +342 -0
  25. vectrixdb/core/graphrag/extractor/llm_extractor.py +508 -0
  26. vectrixdb/core/graphrag/extractor/nlp_extractor.py +461 -0
  27. vectrixdb/core/graphrag/extractor/rebel_extractor.py +316 -0
  28. vectrixdb/core/graphrag/graph/__init__.py +31 -0
  29. vectrixdb/core/graphrag/graph/community.py +334 -0
  30. vectrixdb/core/graphrag/graph/knowledge_graph.py +593 -0
  31. vectrixdb/core/graphrag/graph/storage.py +490 -0
  32. vectrixdb/core/graphrag/pipeline.py +492 -0
  33. vectrixdb/core/graphrag/retriever/__init__.py +27 -0
  34. vectrixdb/core/graphrag/retriever/global_search.py +331 -0
  35. vectrixdb/core/graphrag/retriever/hybrid_search.py +482 -0
  36. vectrixdb/core/graphrag/retriever/local_search.py +343 -0
  37. vectrixdb/core/graphrag/summarizer.py +353 -0
  38. vectrixdb/core/hnsw/__init__.py +22 -0
  39. vectrixdb/core/hnsw/distance.py +216 -0
  40. vectrixdb/core/hnsw/index.py +773 -0
  41. vectrixdb/core/neural_search.py +576 -0
  42. vectrixdb/core/payload_index/__init__.py +26 -0
  43. vectrixdb/core/payload_index/base.py +125 -0
  44. vectrixdb/core/payload_index/geo.py +393 -0
  45. vectrixdb/core/payload_index/manager.py +345 -0
  46. vectrixdb/core/payload_index/numeric.py +294 -0
  47. vectrixdb/core/payload_index/string.py +315 -0
  48. vectrixdb/core/payload_index/tag.py +235 -0
  49. vectrixdb/core/quantization/__init__.py +22 -0
  50. vectrixdb/core/quantization/base.py +220 -0
  51. vectrixdb/core/quantization/binary.py +370 -0
  52. vectrixdb/core/quantization/product.py +470 -0
  53. vectrixdb/core/quantization/scalar.py +332 -0
  54. vectrixdb/core/scaling.py +570 -0
  55. vectrixdb/core/search/__init__.py +59 -0
  56. vectrixdb/core/search/colbert.py +484 -0
  57. vectrixdb/core/search/dense.py +498 -0
  58. vectrixdb/core/search/embeddings.py +671 -0
  59. vectrixdb/core/search/fusion.py +672 -0
  60. vectrixdb/core/search/sparse.py +528 -0
  61. vectrixdb/core/search/sparse_v2.py +510 -0
  62. vectrixdb/core/sparse_index.py +482 -0
  63. vectrixdb/core/storage.py +736 -0
  64. vectrixdb/core/types.py +953 -0
  65. vectrixdb/dashboard/index.html +4462 -0
  66. vectrixdb/easy.py +1166 -0
  67. vectrixdb/integrations/__init__.py +30 -0
  68. vectrixdb/models/__init__.py +61 -0
  69. vectrixdb/models/data/dense_en/model.onnx +0 -0
  70. vectrixdb/models/data/dense_en/special_tokens_map.json +37 -0
  71. vectrixdb/models/data/dense_en/tokenizer.json +30686 -0
  72. vectrixdb/models/data/dense_en/tokenizer_config.json +56 -0
  73. vectrixdb/models/data/dense_en/vectrix_config.json +5 -0
  74. vectrixdb/models/data/dense_en/vocab.txt +30522 -0
  75. vectrixdb/models/data/sparse/config.json +6 -0
  76. vectrixdb/models/data/sparse/idf.json +1 -0
  77. vectrixdb/models/data/sparse/vocab.json +1 -0
  78. vectrixdb/models/downloader.py +1147 -0
  79. vectrixdb/models/embedded.py +1960 -0
  80. vectrixdb-1.0.0.dist-info/METADATA +276 -0
  81. vectrixdb-1.0.0.dist-info/RECORD +84 -0
  82. vectrixdb-1.0.0.dist-info/WHEEL +4 -0
  83. vectrixdb-1.0.0.dist-info/entry_points.txt +2 -0
  84. vectrixdb-1.0.0.dist-info/licenses/LICENSE +21 -0
vectrixdb/__init__.py ADDED
@@ -0,0 +1,366 @@
1
+ """
2
+ VectrixDB - Where vectors come alive.
3
+
4
+ The simplest, most powerful vector database. Zero config. Text in, results out.
5
+
6
+ EASY API (Recommended):
7
+ >>> from vectrixdb import Vectrix
8
+ >>>
9
+ >>> # Create and add - ONE LINE
10
+ >>> db = Vectrix("my_docs").add(["Python is great", "ML is fun", "AI is the future"])
11
+ >>>
12
+ >>> # Search - ONE LINE
13
+ >>> results = db.search("programming")
14
+ >>> print(results.top.text)
15
+ >>>
16
+ >>> # Full power - STILL ONE LINE
17
+ >>> results = db.search("artificial intelligence", mode="ultimate")
18
+
19
+ COMPARISON WITH COMPETITORS:
20
+
21
+ # Chroma (4 lines)
22
+ client = chromadb.Client()
23
+ collection = client.create_collection("docs")
24
+ collection.add(documents=["text"], ids=["1"])
25
+ results = collection.query(query_texts=["query"])
26
+
27
+ # Pinecone (5+ lines + API key + manual embedding)
28
+ pinecone.init(api_key="...")
29
+ index = pinecone.Index("docs")
30
+ embedding = model.encode("text")
31
+ index.upsert(vectors=[...])
32
+ results = index.query(vector=embedding)
33
+
34
+ # VectrixDB (1 line each!)
35
+ db = Vectrix("docs").add(["text"])
36
+ results = db.search("query")
37
+
38
+ ADVANCED API (Full Control):
39
+ >>> from vectrixdb import VectrixDB
40
+ >>> db = VectrixDB("./my_vectors")
41
+ >>> collection = db.create_collection("documents", dimension=384)
42
+ >>> collection.add(ids=["doc1"], vectors=[[0.1, 0.2, ...]])
43
+ >>> results = collection.search(query=[0.1, 0.2, ...], limit=10)
44
+
45
+ Author: VectrixDB Team
46
+ License: Apache 2.0
47
+ """
48
+
49
+ __version__ = "0.1.0"
50
+ __author__ = "VectrixDB Team"
51
+ __tagline__ = "Where vectors come alive"
52
+
53
+ # =============================================================================
54
+ # EASY API (Recommended for most users)
55
+ # =============================================================================
56
+ from .easy import Vectrix, Result, Results, create, open, quick_search
57
+
58
+ # Backwards compatibility alias
59
+ V = Vectrix
60
+
61
+ # =============================================================================
62
+ # ADVANCED API (Full control)
63
+ # =============================================================================
64
+ from .core.database import VectrixDB
65
+ from .core.collection import Collection
66
+ from .core.types import (
67
+ DistanceMetric,
68
+ SearchResult,
69
+ SearchResults,
70
+ SearchMode,
71
+ SearchQuery,
72
+ Point,
73
+ CollectionInfo,
74
+ DatabaseInfo,
75
+ IndexConfig,
76
+ IndexType,
77
+ Filter,
78
+ FilterCondition,
79
+ BatchResult,
80
+ SparseVector,
81
+ )
82
+
83
+ # Sparse vector index
84
+ from .core.sparse_index import SparseIndex
85
+
86
+ # Advanced search (Enterprise features)
87
+ from .core.advanced_search import (
88
+ Reranker,
89
+ RerankConfig,
90
+ RerankMethod,
91
+ FacetAggregator,
92
+ FacetConfig,
93
+ FacetResult,
94
+ ACLFilter,
95
+ ACLPrincipal,
96
+ TextAnalyzer,
97
+ EnhancedSearchResults,
98
+ )
99
+
100
+ # Storage backends
101
+ from .core.storage import (
102
+ StorageBackend,
103
+ StorageConfig,
104
+ BaseStorage,
105
+ InMemoryStorage,
106
+ SQLiteStorage,
107
+ create_storage,
108
+ )
109
+
110
+ # Caching layer
111
+ from .core.cache import (
112
+ CacheBackend,
113
+ CacheConfig,
114
+ BaseCache,
115
+ MemoryCache,
116
+ VectorCache,
117
+ create_cache,
118
+ )
119
+
120
+ # Auto-scaling
121
+ from .core.scaling import (
122
+ ScalingStrategy,
123
+ ScalingConfig,
124
+ AutoScaler,
125
+ ResourceMonitor,
126
+ MemoryManager,
127
+ )
128
+
129
+ # Quantization
130
+ from .core.quantization import (
131
+ BaseQuantizer,
132
+ ScalarQuantizer,
133
+ BinaryQuantizer,
134
+ ProductQuantizer,
135
+ QuantizationConfig,
136
+ QuantizationType,
137
+ )
138
+
139
+ # Native HNSW
140
+ from .core.hnsw import (
141
+ NativeHNSWIndex,
142
+ DistanceFunctions,
143
+ )
144
+
145
+ # Payload Indexing
146
+ from .core.payload_index import (
147
+ PayloadIndexManager,
148
+ NumericRangeIndex,
149
+ StringIndex,
150
+ TagIndex,
151
+ GeoIndex,
152
+ )
153
+
154
+ # Batch Operations
155
+ from .core.batch import (
156
+ ParallelBatchProcessor,
157
+ ParallelVectorInserter,
158
+ StreamingBatchProcessor,
159
+ StreamingReader,
160
+ MemoryEfficientBatcher,
161
+ LargeDatasetProcessor,
162
+ )
163
+
164
+ # Enhanced Search
165
+ from .core.search import (
166
+ DenseSearch,
167
+ MultiQuerySearch,
168
+ PrefetchRescore,
169
+ SparseSearch,
170
+ BM25Scorer,
171
+ QueryExpander,
172
+ ColBERTSearch,
173
+ MaxSimScorer,
174
+ TokenEmbeddings,
175
+ EmbeddingManager,
176
+ EmbeddingConfig,
177
+ FusionStrategy,
178
+ RRFFusion,
179
+ LinearFusion,
180
+ CondorcetFusion,
181
+ HybridSearcher,
182
+ # Embedded models (no network calls)
183
+ EmbeddedDenseProvider,
184
+ EmbeddedSparseProvider,
185
+ EmbeddedRerankerProvider,
186
+ get_embedded_provider,
187
+ )
188
+
189
+ # Embedded Models (no network calls after setup)
190
+ from .models import (
191
+ DenseEmbedder,
192
+ SparseEmbedder,
193
+ RerankerEmbedder,
194
+ LateInteractionEmbedder,
195
+ GraphExtractor,
196
+ Triplet,
197
+ download_models,
198
+ is_models_installed,
199
+ get_models_dir,
200
+ )
201
+
202
+ # Benchmarking
203
+ from .benchmarks import (
204
+ BenchmarkRunner,
205
+ BenchmarkResult,
206
+ BenchmarkDatasets,
207
+ MetricsCollector,
208
+ BenchmarkReport,
209
+ )
210
+
211
+ # GraphRAG (optional - requires graphrag dependencies)
212
+ try:
213
+ from .core.graphrag import (
214
+ GraphRAGConfig,
215
+ GraphRAGPipeline,
216
+ GraphRAGStats,
217
+ LLMProvider,
218
+ ExtractorType,
219
+ GraphSearchType,
220
+ create_openai_config,
221
+ create_ollama_config,
222
+ create_nlp_only_config,
223
+ create_pipeline,
224
+ )
225
+ GRAPHRAG_AVAILABLE = True
226
+ except ImportError:
227
+ GRAPHRAG_AVAILABLE = False
228
+ GraphRAGConfig = None
229
+ GraphRAGPipeline = None
230
+
231
+ __all__ = [
232
+ # Easy API (Recommended)
233
+ "Vectrix",
234
+ "V", # Backwards compatibility alias
235
+ "Result",
236
+ "Results",
237
+ "create",
238
+ "open",
239
+ "quick_search",
240
+ # Advanced API
241
+ "VectrixDB",
242
+ "Collection",
243
+ # Types
244
+ "DistanceMetric",
245
+ "SearchResult",
246
+ "SearchResults",
247
+ "SearchMode",
248
+ "SearchQuery",
249
+ "Point",
250
+ "CollectionInfo",
251
+ "DatabaseInfo",
252
+ "IndexConfig",
253
+ "IndexType",
254
+ "Filter",
255
+ "FilterCondition",
256
+ "BatchResult",
257
+ # Sparse Vectors
258
+ "SparseVector",
259
+ "SparseIndex",
260
+ # Advanced Search (Enterprise)
261
+ "Reranker",
262
+ "RerankConfig",
263
+ "RerankMethod",
264
+ "FacetAggregator",
265
+ "FacetConfig",
266
+ "FacetResult",
267
+ "ACLFilter",
268
+ "ACLPrincipal",
269
+ "TextAnalyzer",
270
+ "EnhancedSearchResults",
271
+ # Storage
272
+ "StorageBackend",
273
+ "StorageConfig",
274
+ "BaseStorage",
275
+ "InMemoryStorage",
276
+ "SQLiteStorage",
277
+ "create_storage",
278
+ # Cache
279
+ "CacheBackend",
280
+ "CacheConfig",
281
+ "BaseCache",
282
+ "MemoryCache",
283
+ "VectorCache",
284
+ "create_cache",
285
+ # Scaling
286
+ "ScalingStrategy",
287
+ "ScalingConfig",
288
+ "AutoScaler",
289
+ "ResourceMonitor",
290
+ "MemoryManager",
291
+ # Quantization
292
+ "BaseQuantizer",
293
+ "ScalarQuantizer",
294
+ "BinaryQuantizer",
295
+ "ProductQuantizer",
296
+ "QuantizationConfig",
297
+ "QuantizationType",
298
+ # Native HNSW
299
+ "NativeHNSWIndex",
300
+ "DistanceFunctions",
301
+ # Payload Indexing
302
+ "PayloadIndexManager",
303
+ "NumericRangeIndex",
304
+ "StringIndex",
305
+ "TagIndex",
306
+ "GeoIndex",
307
+ # Batch Operations
308
+ "ParallelBatchProcessor",
309
+ "ParallelVectorInserter",
310
+ "StreamingBatchProcessor",
311
+ "StreamingReader",
312
+ "MemoryEfficientBatcher",
313
+ "LargeDatasetProcessor",
314
+ # Enhanced Search
315
+ "DenseSearch",
316
+ "MultiQuerySearch",
317
+ "PrefetchRescore",
318
+ "SparseSearch",
319
+ "BM25Scorer",
320
+ "QueryExpander",
321
+ "ColBERTSearch",
322
+ "MaxSimScorer",
323
+ "TokenEmbeddings",
324
+ "EmbeddingManager",
325
+ "EmbeddingConfig",
326
+ "FusionStrategy",
327
+ "RRFFusion",
328
+ "LinearFusion",
329
+ "CondorcetFusion",
330
+ "HybridSearcher",
331
+ # Embedded Models (no network calls)
332
+ "EmbeddedDenseProvider",
333
+ "EmbeddedSparseProvider",
334
+ "EmbeddedRerankerProvider",
335
+ "get_embedded_provider",
336
+ "DenseEmbedder",
337
+ "SparseEmbedder",
338
+ "RerankerEmbedder",
339
+ "LateInteractionEmbedder",
340
+ "GraphExtractor",
341
+ "Triplet",
342
+ "download_models",
343
+ "is_models_installed",
344
+ "get_models_dir",
345
+ # Benchmarking
346
+ "BenchmarkRunner",
347
+ "BenchmarkResult",
348
+ "BenchmarkDatasets",
349
+ "MetricsCollector",
350
+ "BenchmarkReport",
351
+ # GraphRAG
352
+ "GraphRAGConfig",
353
+ "GraphRAGPipeline",
354
+ "GraphRAGStats",
355
+ "LLMProvider",
356
+ "ExtractorType",
357
+ "GraphSearchType",
358
+ "create_openai_config",
359
+ "create_ollama_config",
360
+ "create_nlp_only_config",
361
+ "create_pipeline",
362
+ "GRAPHRAG_AVAILABLE",
363
+ # Meta
364
+ "__version__",
365
+ ]
366
+
@@ -0,0 +1,9 @@
1
+ """
2
+ VectrixDB API - REST API server.
3
+
4
+ Author: Daddy Nyame Owusu - Boakye
5
+ """
6
+
7
+ from .server import create_app, app
8
+
9
+ __all__ = ["create_app", "app"]