vectrixdb 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectrixdb/__init__.py +366 -0
- vectrixdb/api/__init__.py +9 -0
- vectrixdb/api/server.py +1983 -0
- vectrixdb/benchmarks/__init__.py +22 -0
- vectrixdb/benchmarks/datasets.py +329 -0
- vectrixdb/benchmarks/metrics.py +271 -0
- vectrixdb/benchmarks/reports.py +356 -0
- vectrixdb/benchmarks/runner.py +390 -0
- vectrixdb/cli.py +329 -0
- vectrixdb/core/__init__.py +114 -0
- vectrixdb/core/advanced_search.py +923 -0
- vectrixdb/core/batch/__init__.py +22 -0
- vectrixdb/core/batch/memory.py +393 -0
- vectrixdb/core/batch/parallel.py +301 -0
- vectrixdb/core/batch/streaming.py +445 -0
- vectrixdb/core/cache.py +620 -0
- vectrixdb/core/collection.py +2164 -0
- vectrixdb/core/database.py +1105 -0
- vectrixdb/core/graphrag/__init__.py +120 -0
- vectrixdb/core/graphrag/chunker.py +384 -0
- vectrixdb/core/graphrag/config.py +303 -0
- vectrixdb/core/graphrag/extractor/__init__.py +239 -0
- vectrixdb/core/graphrag/extractor/base.py +424 -0
- vectrixdb/core/graphrag/extractor/hybrid_extractor.py +342 -0
- vectrixdb/core/graphrag/extractor/llm_extractor.py +508 -0
- vectrixdb/core/graphrag/extractor/nlp_extractor.py +461 -0
- vectrixdb/core/graphrag/extractor/rebel_extractor.py +316 -0
- vectrixdb/core/graphrag/graph/__init__.py +31 -0
- vectrixdb/core/graphrag/graph/community.py +334 -0
- vectrixdb/core/graphrag/graph/knowledge_graph.py +593 -0
- vectrixdb/core/graphrag/graph/storage.py +490 -0
- vectrixdb/core/graphrag/pipeline.py +492 -0
- vectrixdb/core/graphrag/retriever/__init__.py +27 -0
- vectrixdb/core/graphrag/retriever/global_search.py +331 -0
- vectrixdb/core/graphrag/retriever/hybrid_search.py +482 -0
- vectrixdb/core/graphrag/retriever/local_search.py +343 -0
- vectrixdb/core/graphrag/summarizer.py +353 -0
- vectrixdb/core/hnsw/__init__.py +22 -0
- vectrixdb/core/hnsw/distance.py +216 -0
- vectrixdb/core/hnsw/index.py +773 -0
- vectrixdb/core/neural_search.py +576 -0
- vectrixdb/core/payload_index/__init__.py +26 -0
- vectrixdb/core/payload_index/base.py +125 -0
- vectrixdb/core/payload_index/geo.py +393 -0
- vectrixdb/core/payload_index/manager.py +345 -0
- vectrixdb/core/payload_index/numeric.py +294 -0
- vectrixdb/core/payload_index/string.py +315 -0
- vectrixdb/core/payload_index/tag.py +235 -0
- vectrixdb/core/quantization/__init__.py +22 -0
- vectrixdb/core/quantization/base.py +220 -0
- vectrixdb/core/quantization/binary.py +370 -0
- vectrixdb/core/quantization/product.py +470 -0
- vectrixdb/core/quantization/scalar.py +332 -0
- vectrixdb/core/scaling.py +570 -0
- vectrixdb/core/search/__init__.py +59 -0
- vectrixdb/core/search/colbert.py +484 -0
- vectrixdb/core/search/dense.py +498 -0
- vectrixdb/core/search/embeddings.py +671 -0
- vectrixdb/core/search/fusion.py +672 -0
- vectrixdb/core/search/sparse.py +528 -0
- vectrixdb/core/search/sparse_v2.py +510 -0
- vectrixdb/core/sparse_index.py +482 -0
- vectrixdb/core/storage.py +736 -0
- vectrixdb/core/types.py +953 -0
- vectrixdb/dashboard/index.html +4462 -0
- vectrixdb/easy.py +1166 -0
- vectrixdb/integrations/__init__.py +30 -0
- vectrixdb/models/__init__.py +61 -0
- vectrixdb/models/data/dense_en/model.onnx +0 -0
- vectrixdb/models/data/dense_en/special_tokens_map.json +37 -0
- vectrixdb/models/data/dense_en/tokenizer.json +30686 -0
- vectrixdb/models/data/dense_en/tokenizer_config.json +56 -0
- vectrixdb/models/data/dense_en/vectrix_config.json +5 -0
- vectrixdb/models/data/dense_en/vocab.txt +30522 -0
- vectrixdb/models/data/sparse/config.json +6 -0
- vectrixdb/models/data/sparse/idf.json +1 -0
- vectrixdb/models/data/sparse/vocab.json +1 -0
- vectrixdb/models/downloader.py +1147 -0
- vectrixdb/models/embedded.py +1960 -0
- vectrixdb-1.0.0.dist-info/METADATA +276 -0
- vectrixdb-1.0.0.dist-info/RECORD +84 -0
- vectrixdb-1.0.0.dist-info/WHEEL +4 -0
- vectrixdb-1.0.0.dist-info/entry_points.txt +2 -0
- vectrixdb-1.0.0.dist-info/licenses/LICENSE +21 -0
vectrixdb/__init__.py
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
"""
|
|
2
|
+
VectrixDB - Where vectors come alive.
|
|
3
|
+
|
|
4
|
+
The simplest, most powerful vector database. Zero config. Text in, results out.
|
|
5
|
+
|
|
6
|
+
EASY API (Recommended):
|
|
7
|
+
>>> from vectrixdb import Vectrix
|
|
8
|
+
>>>
|
|
9
|
+
>>> # Create and add - ONE LINE
|
|
10
|
+
>>> db = Vectrix("my_docs").add(["Python is great", "ML is fun", "AI is the future"])
|
|
11
|
+
>>>
|
|
12
|
+
>>> # Search - ONE LINE
|
|
13
|
+
>>> results = db.search("programming")
|
|
14
|
+
>>> print(results.top.text)
|
|
15
|
+
>>>
|
|
16
|
+
>>> # Full power - STILL ONE LINE
|
|
17
|
+
>>> results = db.search("artificial intelligence", mode="ultimate")
|
|
18
|
+
|
|
19
|
+
COMPARISON WITH COMPETITORS:
|
|
20
|
+
|
|
21
|
+
# Chroma (4 lines)
|
|
22
|
+
client = chromadb.Client()
|
|
23
|
+
collection = client.create_collection("docs")
|
|
24
|
+
collection.add(documents=["text"], ids=["1"])
|
|
25
|
+
results = collection.query(query_texts=["query"])
|
|
26
|
+
|
|
27
|
+
# Pinecone (5+ lines + API key + manual embedding)
|
|
28
|
+
pinecone.init(api_key="...")
|
|
29
|
+
index = pinecone.Index("docs")
|
|
30
|
+
embedding = model.encode("text")
|
|
31
|
+
index.upsert(vectors=[...])
|
|
32
|
+
results = index.query(vector=embedding)
|
|
33
|
+
|
|
34
|
+
# VectrixDB (1 line each!)
|
|
35
|
+
db = Vectrix("docs").add(["text"])
|
|
36
|
+
results = db.search("query")
|
|
37
|
+
|
|
38
|
+
ADVANCED API (Full Control):
|
|
39
|
+
>>> from vectrixdb import VectrixDB
|
|
40
|
+
>>> db = VectrixDB("./my_vectors")
|
|
41
|
+
>>> collection = db.create_collection("documents", dimension=384)
|
|
42
|
+
>>> collection.add(ids=["doc1"], vectors=[[0.1, 0.2, ...]])
|
|
43
|
+
>>> results = collection.search(query=[0.1, 0.2, ...], limit=10)
|
|
44
|
+
|
|
45
|
+
Author: VectrixDB Team
|
|
46
|
+
License: Apache 2.0
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
__version__ = "0.1.0"
|
|
50
|
+
__author__ = "VectrixDB Team"
|
|
51
|
+
__tagline__ = "Where vectors come alive"
|
|
52
|
+
|
|
53
|
+
# =============================================================================
|
|
54
|
+
# EASY API (Recommended for most users)
|
|
55
|
+
# =============================================================================
|
|
56
|
+
from .easy import Vectrix, Result, Results, create, open, quick_search
|
|
57
|
+
|
|
58
|
+
# Backwards compatibility alias
|
|
59
|
+
V = Vectrix
|
|
60
|
+
|
|
61
|
+
# =============================================================================
|
|
62
|
+
# ADVANCED API (Full control)
|
|
63
|
+
# =============================================================================
|
|
64
|
+
from .core.database import VectrixDB
|
|
65
|
+
from .core.collection import Collection
|
|
66
|
+
from .core.types import (
|
|
67
|
+
DistanceMetric,
|
|
68
|
+
SearchResult,
|
|
69
|
+
SearchResults,
|
|
70
|
+
SearchMode,
|
|
71
|
+
SearchQuery,
|
|
72
|
+
Point,
|
|
73
|
+
CollectionInfo,
|
|
74
|
+
DatabaseInfo,
|
|
75
|
+
IndexConfig,
|
|
76
|
+
IndexType,
|
|
77
|
+
Filter,
|
|
78
|
+
FilterCondition,
|
|
79
|
+
BatchResult,
|
|
80
|
+
SparseVector,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Sparse vector index
|
|
84
|
+
from .core.sparse_index import SparseIndex
|
|
85
|
+
|
|
86
|
+
# Advanced search (Enterprise features)
|
|
87
|
+
from .core.advanced_search import (
|
|
88
|
+
Reranker,
|
|
89
|
+
RerankConfig,
|
|
90
|
+
RerankMethod,
|
|
91
|
+
FacetAggregator,
|
|
92
|
+
FacetConfig,
|
|
93
|
+
FacetResult,
|
|
94
|
+
ACLFilter,
|
|
95
|
+
ACLPrincipal,
|
|
96
|
+
TextAnalyzer,
|
|
97
|
+
EnhancedSearchResults,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Storage backends
|
|
101
|
+
from .core.storage import (
|
|
102
|
+
StorageBackend,
|
|
103
|
+
StorageConfig,
|
|
104
|
+
BaseStorage,
|
|
105
|
+
InMemoryStorage,
|
|
106
|
+
SQLiteStorage,
|
|
107
|
+
create_storage,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Caching layer
|
|
111
|
+
from .core.cache import (
|
|
112
|
+
CacheBackend,
|
|
113
|
+
CacheConfig,
|
|
114
|
+
BaseCache,
|
|
115
|
+
MemoryCache,
|
|
116
|
+
VectorCache,
|
|
117
|
+
create_cache,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Auto-scaling
|
|
121
|
+
from .core.scaling import (
|
|
122
|
+
ScalingStrategy,
|
|
123
|
+
ScalingConfig,
|
|
124
|
+
AutoScaler,
|
|
125
|
+
ResourceMonitor,
|
|
126
|
+
MemoryManager,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Quantization
|
|
130
|
+
from .core.quantization import (
|
|
131
|
+
BaseQuantizer,
|
|
132
|
+
ScalarQuantizer,
|
|
133
|
+
BinaryQuantizer,
|
|
134
|
+
ProductQuantizer,
|
|
135
|
+
QuantizationConfig,
|
|
136
|
+
QuantizationType,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Native HNSW
|
|
140
|
+
from .core.hnsw import (
|
|
141
|
+
NativeHNSWIndex,
|
|
142
|
+
DistanceFunctions,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Payload Indexing
|
|
146
|
+
from .core.payload_index import (
|
|
147
|
+
PayloadIndexManager,
|
|
148
|
+
NumericRangeIndex,
|
|
149
|
+
StringIndex,
|
|
150
|
+
TagIndex,
|
|
151
|
+
GeoIndex,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Batch Operations
|
|
155
|
+
from .core.batch import (
|
|
156
|
+
ParallelBatchProcessor,
|
|
157
|
+
ParallelVectorInserter,
|
|
158
|
+
StreamingBatchProcessor,
|
|
159
|
+
StreamingReader,
|
|
160
|
+
MemoryEfficientBatcher,
|
|
161
|
+
LargeDatasetProcessor,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Enhanced Search
|
|
165
|
+
from .core.search import (
|
|
166
|
+
DenseSearch,
|
|
167
|
+
MultiQuerySearch,
|
|
168
|
+
PrefetchRescore,
|
|
169
|
+
SparseSearch,
|
|
170
|
+
BM25Scorer,
|
|
171
|
+
QueryExpander,
|
|
172
|
+
ColBERTSearch,
|
|
173
|
+
MaxSimScorer,
|
|
174
|
+
TokenEmbeddings,
|
|
175
|
+
EmbeddingManager,
|
|
176
|
+
EmbeddingConfig,
|
|
177
|
+
FusionStrategy,
|
|
178
|
+
RRFFusion,
|
|
179
|
+
LinearFusion,
|
|
180
|
+
CondorcetFusion,
|
|
181
|
+
HybridSearcher,
|
|
182
|
+
# Embedded models (no network calls)
|
|
183
|
+
EmbeddedDenseProvider,
|
|
184
|
+
EmbeddedSparseProvider,
|
|
185
|
+
EmbeddedRerankerProvider,
|
|
186
|
+
get_embedded_provider,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Embedded Models (no network calls after setup)
|
|
190
|
+
from .models import (
|
|
191
|
+
DenseEmbedder,
|
|
192
|
+
SparseEmbedder,
|
|
193
|
+
RerankerEmbedder,
|
|
194
|
+
LateInteractionEmbedder,
|
|
195
|
+
GraphExtractor,
|
|
196
|
+
Triplet,
|
|
197
|
+
download_models,
|
|
198
|
+
is_models_installed,
|
|
199
|
+
get_models_dir,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Benchmarking
|
|
203
|
+
from .benchmarks import (
|
|
204
|
+
BenchmarkRunner,
|
|
205
|
+
BenchmarkResult,
|
|
206
|
+
BenchmarkDatasets,
|
|
207
|
+
MetricsCollector,
|
|
208
|
+
BenchmarkReport,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# GraphRAG (optional - requires graphrag dependencies)
|
|
212
|
+
try:
|
|
213
|
+
from .core.graphrag import (
|
|
214
|
+
GraphRAGConfig,
|
|
215
|
+
GraphRAGPipeline,
|
|
216
|
+
GraphRAGStats,
|
|
217
|
+
LLMProvider,
|
|
218
|
+
ExtractorType,
|
|
219
|
+
GraphSearchType,
|
|
220
|
+
create_openai_config,
|
|
221
|
+
create_ollama_config,
|
|
222
|
+
create_nlp_only_config,
|
|
223
|
+
create_pipeline,
|
|
224
|
+
)
|
|
225
|
+
GRAPHRAG_AVAILABLE = True
|
|
226
|
+
except ImportError:
|
|
227
|
+
GRAPHRAG_AVAILABLE = False
|
|
228
|
+
GraphRAGConfig = None
|
|
229
|
+
GraphRAGPipeline = None
|
|
230
|
+
|
|
231
|
+
__all__ = [
|
|
232
|
+
# Easy API (Recommended)
|
|
233
|
+
"Vectrix",
|
|
234
|
+
"V", # Backwards compatibility alias
|
|
235
|
+
"Result",
|
|
236
|
+
"Results",
|
|
237
|
+
"create",
|
|
238
|
+
"open",
|
|
239
|
+
"quick_search",
|
|
240
|
+
# Advanced API
|
|
241
|
+
"VectrixDB",
|
|
242
|
+
"Collection",
|
|
243
|
+
# Types
|
|
244
|
+
"DistanceMetric",
|
|
245
|
+
"SearchResult",
|
|
246
|
+
"SearchResults",
|
|
247
|
+
"SearchMode",
|
|
248
|
+
"SearchQuery",
|
|
249
|
+
"Point",
|
|
250
|
+
"CollectionInfo",
|
|
251
|
+
"DatabaseInfo",
|
|
252
|
+
"IndexConfig",
|
|
253
|
+
"IndexType",
|
|
254
|
+
"Filter",
|
|
255
|
+
"FilterCondition",
|
|
256
|
+
"BatchResult",
|
|
257
|
+
# Sparse Vectors
|
|
258
|
+
"SparseVector",
|
|
259
|
+
"SparseIndex",
|
|
260
|
+
# Advanced Search (Enterprise)
|
|
261
|
+
"Reranker",
|
|
262
|
+
"RerankConfig",
|
|
263
|
+
"RerankMethod",
|
|
264
|
+
"FacetAggregator",
|
|
265
|
+
"FacetConfig",
|
|
266
|
+
"FacetResult",
|
|
267
|
+
"ACLFilter",
|
|
268
|
+
"ACLPrincipal",
|
|
269
|
+
"TextAnalyzer",
|
|
270
|
+
"EnhancedSearchResults",
|
|
271
|
+
# Storage
|
|
272
|
+
"StorageBackend",
|
|
273
|
+
"StorageConfig",
|
|
274
|
+
"BaseStorage",
|
|
275
|
+
"InMemoryStorage",
|
|
276
|
+
"SQLiteStorage",
|
|
277
|
+
"create_storage",
|
|
278
|
+
# Cache
|
|
279
|
+
"CacheBackend",
|
|
280
|
+
"CacheConfig",
|
|
281
|
+
"BaseCache",
|
|
282
|
+
"MemoryCache",
|
|
283
|
+
"VectorCache",
|
|
284
|
+
"create_cache",
|
|
285
|
+
# Scaling
|
|
286
|
+
"ScalingStrategy",
|
|
287
|
+
"ScalingConfig",
|
|
288
|
+
"AutoScaler",
|
|
289
|
+
"ResourceMonitor",
|
|
290
|
+
"MemoryManager",
|
|
291
|
+
# Quantization
|
|
292
|
+
"BaseQuantizer",
|
|
293
|
+
"ScalarQuantizer",
|
|
294
|
+
"BinaryQuantizer",
|
|
295
|
+
"ProductQuantizer",
|
|
296
|
+
"QuantizationConfig",
|
|
297
|
+
"QuantizationType",
|
|
298
|
+
# Native HNSW
|
|
299
|
+
"NativeHNSWIndex",
|
|
300
|
+
"DistanceFunctions",
|
|
301
|
+
# Payload Indexing
|
|
302
|
+
"PayloadIndexManager",
|
|
303
|
+
"NumericRangeIndex",
|
|
304
|
+
"StringIndex",
|
|
305
|
+
"TagIndex",
|
|
306
|
+
"GeoIndex",
|
|
307
|
+
# Batch Operations
|
|
308
|
+
"ParallelBatchProcessor",
|
|
309
|
+
"ParallelVectorInserter",
|
|
310
|
+
"StreamingBatchProcessor",
|
|
311
|
+
"StreamingReader",
|
|
312
|
+
"MemoryEfficientBatcher",
|
|
313
|
+
"LargeDatasetProcessor",
|
|
314
|
+
# Enhanced Search
|
|
315
|
+
"DenseSearch",
|
|
316
|
+
"MultiQuerySearch",
|
|
317
|
+
"PrefetchRescore",
|
|
318
|
+
"SparseSearch",
|
|
319
|
+
"BM25Scorer",
|
|
320
|
+
"QueryExpander",
|
|
321
|
+
"ColBERTSearch",
|
|
322
|
+
"MaxSimScorer",
|
|
323
|
+
"TokenEmbeddings",
|
|
324
|
+
"EmbeddingManager",
|
|
325
|
+
"EmbeddingConfig",
|
|
326
|
+
"FusionStrategy",
|
|
327
|
+
"RRFFusion",
|
|
328
|
+
"LinearFusion",
|
|
329
|
+
"CondorcetFusion",
|
|
330
|
+
"HybridSearcher",
|
|
331
|
+
# Embedded Models (no network calls)
|
|
332
|
+
"EmbeddedDenseProvider",
|
|
333
|
+
"EmbeddedSparseProvider",
|
|
334
|
+
"EmbeddedRerankerProvider",
|
|
335
|
+
"get_embedded_provider",
|
|
336
|
+
"DenseEmbedder",
|
|
337
|
+
"SparseEmbedder",
|
|
338
|
+
"RerankerEmbedder",
|
|
339
|
+
"LateInteractionEmbedder",
|
|
340
|
+
"GraphExtractor",
|
|
341
|
+
"Triplet",
|
|
342
|
+
"download_models",
|
|
343
|
+
"is_models_installed",
|
|
344
|
+
"get_models_dir",
|
|
345
|
+
# Benchmarking
|
|
346
|
+
"BenchmarkRunner",
|
|
347
|
+
"BenchmarkResult",
|
|
348
|
+
"BenchmarkDatasets",
|
|
349
|
+
"MetricsCollector",
|
|
350
|
+
"BenchmarkReport",
|
|
351
|
+
# GraphRAG
|
|
352
|
+
"GraphRAGConfig",
|
|
353
|
+
"GraphRAGPipeline",
|
|
354
|
+
"GraphRAGStats",
|
|
355
|
+
"LLMProvider",
|
|
356
|
+
"ExtractorType",
|
|
357
|
+
"GraphSearchType",
|
|
358
|
+
"create_openai_config",
|
|
359
|
+
"create_ollama_config",
|
|
360
|
+
"create_nlp_only_config",
|
|
361
|
+
"create_pipeline",
|
|
362
|
+
"GRAPHRAG_AVAILABLE",
|
|
363
|
+
# Meta
|
|
364
|
+
"__version__",
|
|
365
|
+
]
|
|
366
|
+
|