vectrixdb 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. vectrixdb-1.0.0/.gitattributes +1 -0
  2. vectrixdb-1.0.0/.gitignore +72 -0
  3. vectrixdb-1.0.0/LICENSE +21 -0
  4. vectrixdb-1.0.0/PKG-INFO +276 -0
  5. vectrixdb-1.0.0/README.md +206 -0
  6. vectrixdb-1.0.0/pyproject.toml +133 -0
  7. vectrixdb-1.0.0/requirements.txt +19 -0
  8. vectrixdb-1.0.0/scripts/quantize_models.py +296 -0
  9. vectrixdb-1.0.0/setup.py +198 -0
  10. vectrixdb-1.0.0/tests/.gitignore +33 -0
  11. vectrixdb-1.0.0/tests/Custom Models.ipynb +8126 -0
  12. vectrixdb-1.0.0/tests/Direct Embedders.ipynb +374 -0
  13. vectrixdb-1.0.0/tests/LangChain.ipynb +372 -0
  14. vectrixdb-1.0.0/tests/Loading Data.ipynb +382 -0
  15. vectrixdb-1.0.0/tests/Quick Start.ipynb +180 -0
  16. vectrixdb-1.0.0/tests/Tiers & Modes.ipynb +364 -0
  17. vectrixdb-1.0.0/vectrixdb/__init__.py +366 -0
  18. vectrixdb-1.0.0/vectrixdb/api/__init__.py +9 -0
  19. vectrixdb-1.0.0/vectrixdb/api/server.py +1983 -0
  20. vectrixdb-1.0.0/vectrixdb/benchmarks/__init__.py +22 -0
  21. vectrixdb-1.0.0/vectrixdb/benchmarks/datasets.py +329 -0
  22. vectrixdb-1.0.0/vectrixdb/benchmarks/metrics.py +271 -0
  23. vectrixdb-1.0.0/vectrixdb/benchmarks/reports.py +356 -0
  24. vectrixdb-1.0.0/vectrixdb/benchmarks/runner.py +390 -0
  25. vectrixdb-1.0.0/vectrixdb/cli.py +329 -0
  26. vectrixdb-1.0.0/vectrixdb/core/__init__.py +114 -0
  27. vectrixdb-1.0.0/vectrixdb/core/advanced_search.py +923 -0
  28. vectrixdb-1.0.0/vectrixdb/core/batch/__init__.py +22 -0
  29. vectrixdb-1.0.0/vectrixdb/core/batch/memory.py +393 -0
  30. vectrixdb-1.0.0/vectrixdb/core/batch/parallel.py +301 -0
  31. vectrixdb-1.0.0/vectrixdb/core/batch/streaming.py +445 -0
  32. vectrixdb-1.0.0/vectrixdb/core/cache.py +620 -0
  33. vectrixdb-1.0.0/vectrixdb/core/collection.py +2164 -0
  34. vectrixdb-1.0.0/vectrixdb/core/database.py +1105 -0
  35. vectrixdb-1.0.0/vectrixdb/core/graphrag/__init__.py +120 -0
  36. vectrixdb-1.0.0/vectrixdb/core/graphrag/chunker.py +384 -0
  37. vectrixdb-1.0.0/vectrixdb/core/graphrag/config.py +303 -0
  38. vectrixdb-1.0.0/vectrixdb/core/graphrag/extractor/__init__.py +239 -0
  39. vectrixdb-1.0.0/vectrixdb/core/graphrag/extractor/base.py +424 -0
  40. vectrixdb-1.0.0/vectrixdb/core/graphrag/extractor/hybrid_extractor.py +342 -0
  41. vectrixdb-1.0.0/vectrixdb/core/graphrag/extractor/llm_extractor.py +508 -0
  42. vectrixdb-1.0.0/vectrixdb/core/graphrag/extractor/nlp_extractor.py +461 -0
  43. vectrixdb-1.0.0/vectrixdb/core/graphrag/extractor/rebel_extractor.py +316 -0
  44. vectrixdb-1.0.0/vectrixdb/core/graphrag/graph/__init__.py +31 -0
  45. vectrixdb-1.0.0/vectrixdb/core/graphrag/graph/community.py +334 -0
  46. vectrixdb-1.0.0/vectrixdb/core/graphrag/graph/knowledge_graph.py +593 -0
  47. vectrixdb-1.0.0/vectrixdb/core/graphrag/graph/storage.py +490 -0
  48. vectrixdb-1.0.0/vectrixdb/core/graphrag/pipeline.py +492 -0
  49. vectrixdb-1.0.0/vectrixdb/core/graphrag/retriever/__init__.py +27 -0
  50. vectrixdb-1.0.0/vectrixdb/core/graphrag/retriever/global_search.py +331 -0
  51. vectrixdb-1.0.0/vectrixdb/core/graphrag/retriever/hybrid_search.py +482 -0
  52. vectrixdb-1.0.0/vectrixdb/core/graphrag/retriever/local_search.py +343 -0
  53. vectrixdb-1.0.0/vectrixdb/core/graphrag/summarizer.py +353 -0
  54. vectrixdb-1.0.0/vectrixdb/core/hnsw/__init__.py +22 -0
  55. vectrixdb-1.0.0/vectrixdb/core/hnsw/distance.py +216 -0
  56. vectrixdb-1.0.0/vectrixdb/core/hnsw/index.py +773 -0
  57. vectrixdb-1.0.0/vectrixdb/core/neural_search.py +576 -0
  58. vectrixdb-1.0.0/vectrixdb/core/payload_index/__init__.py +26 -0
  59. vectrixdb-1.0.0/vectrixdb/core/payload_index/base.py +125 -0
  60. vectrixdb-1.0.0/vectrixdb/core/payload_index/geo.py +393 -0
  61. vectrixdb-1.0.0/vectrixdb/core/payload_index/manager.py +345 -0
  62. vectrixdb-1.0.0/vectrixdb/core/payload_index/numeric.py +294 -0
  63. vectrixdb-1.0.0/vectrixdb/core/payload_index/string.py +315 -0
  64. vectrixdb-1.0.0/vectrixdb/core/payload_index/tag.py +235 -0
  65. vectrixdb-1.0.0/vectrixdb/core/quantization/__init__.py +22 -0
  66. vectrixdb-1.0.0/vectrixdb/core/quantization/base.py +220 -0
  67. vectrixdb-1.0.0/vectrixdb/core/quantization/binary.py +370 -0
  68. vectrixdb-1.0.0/vectrixdb/core/quantization/product.py +470 -0
  69. vectrixdb-1.0.0/vectrixdb/core/quantization/scalar.py +332 -0
  70. vectrixdb-1.0.0/vectrixdb/core/scaling.py +570 -0
  71. vectrixdb-1.0.0/vectrixdb/core/search/__init__.py +59 -0
  72. vectrixdb-1.0.0/vectrixdb/core/search/colbert.py +484 -0
  73. vectrixdb-1.0.0/vectrixdb/core/search/dense.py +498 -0
  74. vectrixdb-1.0.0/vectrixdb/core/search/embeddings.py +671 -0
  75. vectrixdb-1.0.0/vectrixdb/core/search/fusion.py +672 -0
  76. vectrixdb-1.0.0/vectrixdb/core/search/sparse.py +528 -0
  77. vectrixdb-1.0.0/vectrixdb/core/search/sparse_v2.py +510 -0
  78. vectrixdb-1.0.0/vectrixdb/core/sparse_index.py +482 -0
  79. vectrixdb-1.0.0/vectrixdb/core/storage.py +736 -0
  80. vectrixdb-1.0.0/vectrixdb/core/types.py +953 -0
  81. vectrixdb-1.0.0/vectrixdb/dashboard/index.html +4462 -0
  82. vectrixdb-1.0.0/vectrixdb/easy.py +1166 -0
  83. vectrixdb-1.0.0/vectrixdb/integrations/__init__.py +30 -0
  84. vectrixdb-1.0.0/vectrixdb/models/__init__.py +61 -0
  85. vectrixdb-1.0.0/vectrixdb/models/data/dense_en/model.onnx +0 -0
  86. vectrixdb-1.0.0/vectrixdb/models/data/dense_en/special_tokens_map.json +37 -0
  87. vectrixdb-1.0.0/vectrixdb/models/data/dense_en/tokenizer.json +30686 -0
  88. vectrixdb-1.0.0/vectrixdb/models/data/dense_en/tokenizer_config.json +56 -0
  89. vectrixdb-1.0.0/vectrixdb/models/data/dense_en/vectrix_config.json +5 -0
  90. vectrixdb-1.0.0/vectrixdb/models/data/dense_en/vocab.txt +30522 -0
  91. vectrixdb-1.0.0/vectrixdb/models/data/sparse/config.json +6 -0
  92. vectrixdb-1.0.0/vectrixdb/models/data/sparse/idf.json +1 -0
  93. vectrixdb-1.0.0/vectrixdb/models/data/sparse/vocab.json +1 -0
  94. vectrixdb-1.0.0/vectrixdb/models/downloader.py +1147 -0
  95. vectrixdb-1.0.0/vectrixdb/models/embedded.py +1960 -0
@@ -0,0 +1 @@
1
+ *.onnx filter=lfs diff=lfs merge=lfs -text
@@ -0,0 +1,72 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ venv/
8
+ .venv/
9
+ env/
10
+ .env
11
+ *.egg-info/
12
+ dist/
13
+ build/
14
+ .eggs/
15
+ *.egg
16
+
17
+ # IDE
18
+ .idea/
19
+ .vscode/
20
+ *.swp
21
+ *.swo
22
+ *~
23
+
24
+ # Testing
25
+ .pytest_cache/
26
+ .coverage
27
+ htmlcov/
28
+ .tox/
29
+ .nox/
30
+
31
+ # Database files
32
+ *.db
33
+ *.db-shm
34
+ *.db-wal
35
+ *.usearch
36
+ *.hnsw
37
+ vectrixdb_data/
38
+ example_data/
39
+ test_data/
40
+
41
+ # Jupyter checkpoints
42
+ .ipynb_checkpoints/
43
+
44
+ # Downloaded multilingual models (exclude from repo, download from GitHub releases)
45
+ vectrixdb/models/data/dense/
46
+ vectrixdb/models/data/reranker/
47
+ vectrixdb/models/data/bge-m3/
48
+ vectrixdb/models/data/rebel/
49
+ releases/
50
+
51
+ # English models are BUNDLED (do NOT exclude):
52
+ # - vectrixdb/models/data/dense_en/
53
+ # - vectrixdb/models/data/reranker_en/
54
+ # - vectrixdb/models/data/colbert/
55
+ # - vectrixdb/models/data/sparse/
56
+
57
+ # Node
58
+ node_modules/
59
+ dashboard/dist/
60
+ dashboard/.cache/
61
+ *.log
62
+ npm-debug.log*
63
+ yarn-debug.log*
64
+ yarn-error.log*
65
+
66
+ # OS
67
+ .DS_Store
68
+ Thumbs.db
69
+
70
+ # Misc
71
+ *.bak
72
+ *.tmp
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 knowusuboaky
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,276 @@
1
+ Metadata-Version: 2.4
2
+ Name: vectrixdb
3
+ Version: 1.0.0
4
+ Summary: Where vectors come alive - A lightweight, visual-first vector database with embedded ML models
5
+ Project-URL: Homepage, https://github.com/knowusuboaky/VectrixDB
6
+ Project-URL: Documentation, https://github.com/knowusuboaky/VectrixDB#readme
7
+ Project-URL: Repository, https://github.com/knowusuboaky/VectrixDB
8
+ Author-email: Kwadwo Daddy Nyame Owusu - Boakye <kwadwo.owusuboakye@outlook.com>
9
+ License-Expression: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: ai,approximate-nearest-neighbors,embeddings,hnsw,machine-learning,similarity-search,vector-database
12
+ Classifier: Development Status :: 5 - Production/Stable
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Database
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Requires-Python: >=3.9
24
+ Requires-Dist: aiosqlite>=0.19.0
25
+ Requires-Dist: fastapi>=0.109.0
26
+ Requires-Dist: httpx>=0.26.0
27
+ Requires-Dist: numpy>=1.24.0
28
+ Requires-Dist: onnxruntime>=1.15.0
29
+ Requires-Dist: orjson>=3.9.0
30
+ Requires-Dist: pydantic>=2.0.0
31
+ Requires-Dist: rich>=13.0.0
32
+ Requires-Dist: tokenizers>=0.15.0
33
+ Requires-Dist: typer>=0.9.0
34
+ Requires-Dist: usearch>=2.0.0
35
+ Requires-Dist: uvicorn[standard]>=0.27.0
36
+ Requires-Dist: websockets>=12.0
37
+ Provides-Extra: all
38
+ Requires-Dist: black>=23.0.0; extra == 'all'
39
+ Requires-Dist: fastembed>=0.2.0; extra == 'all'
40
+ Requires-Dist: mypy>=1.0.0; extra == 'all'
41
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == 'all'
42
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'all'
43
+ Requires-Dist: pytest>=7.0.0; extra == 'all'
44
+ Requires-Dist: ruff>=0.1.0; extra == 'all'
45
+ Requires-Dist: scikit-learn>=1.3.0; extra == 'all'
46
+ Requires-Dist: sentence-transformers>=2.2.0; extra == 'all'
47
+ Requires-Dist: umap-learn>=0.5.0; extra == 'all'
48
+ Provides-Extra: dev
49
+ Requires-Dist: black>=23.0.0; extra == 'dev'
50
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
51
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
52
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
53
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
54
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
55
+ Provides-Extra: embeddings
56
+ Requires-Dist: fastembed>=0.2.0; extra == 'embeddings'
57
+ Requires-Dist: sentence-transformers>=2.2.0; extra == 'embeddings'
58
+ Provides-Extra: fastembed
59
+ Requires-Dist: fastembed>=0.2.0; extra == 'fastembed'
60
+ Provides-Extra: hf
61
+ Requires-Dist: sentence-transformers>=2.2.0; extra == 'hf'
62
+ Provides-Extra: setup-models
63
+ Requires-Dist: optimum[onnxruntime]>=1.12.0; extra == 'setup-models'
64
+ Requires-Dist: torch>=2.0.0; extra == 'setup-models'
65
+ Requires-Dist: transformers>=4.30.0; extra == 'setup-models'
66
+ Provides-Extra: viz
67
+ Requires-Dist: scikit-learn>=1.3.0; extra == 'viz'
68
+ Requires-Dist: umap-learn>=0.5.0; extra == 'viz'
69
+ Description-Content-Type: text/markdown
70
+
71
+ # VectrixDB
72
+
73
+ [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
74
+ [![Python Versions](https://img.shields.io/pypi/pyversions/vectrixdb.svg)](https://pypi.org/project/vectrixdb/)
75
+ [![VectrixDB Version](https://img.shields.io/pypi/v/vectrixdb.svg)](https://pypi.org/project/vectrixdb/)
76
+ [![Downloads](https://pepy.tech/badge/vectrixdb)](https://pepy.tech/project/vectrixdb)
77
+ [![Build Status](https://img.shields.io/github/actions/workflow/status/knowusuboaky/VectrixDB/main.yml)](https://github.com/knowusuboaky/VectrixDB/actions)
78
+ [![Issues](https://img.shields.io/github/issues/knowusuboaky/VectrixDB)](https://github.com/knowusuboaky/VectrixDB/issues)
79
+ [![Contact](https://img.shields.io/badge/Email-Contact-green.svg)](mailto:kwadwo.owusuboakye@outlook.com)
80
+
81
+ **Where vectors come alive.**
82
+
83
+ A lightweight, visual-first vector database with embedded ML models - no API keys required.
84
+
85
+ ## Why VectrixDB?
86
+
87
+ | Feature | VectrixDB | Qdrant | Chroma | Pinecone |
88
+ |---------|-----------|--------|--------|----------|
89
+ | Beautiful Dashboard | Yes | Basic | No | No |
90
+ | Embedded ML Models | Yes | No | No | No |
91
+ | 4 Search Tiers | Yes | No | No | No |
92
+ | GraphRAG Built-in | Yes | No | No | No |
93
+ | Zero Config | Yes | No | Yes | Yes |
94
+ | No API Keys Needed | Yes | Yes | No | No |
95
+ | Open Source | Yes | Yes | Yes | No |
96
+
97
+ ## Quick Start
98
+
99
+ ```bash
100
+ pip install vectrixdb
101
+ ```
102
+
103
+ ```python
104
+ from vectrixdb import Vectrix
105
+
106
+ # Create database with hybrid search (uses bundled English models)
107
+ db = Vectrix("my_docs", tier="hybrid", language="en")
108
+
109
+ # Add documents
110
+ db.add([
111
+ "Python is great for data science",
112
+ "JavaScript powers the web",
113
+ "Rust is known for memory safety"
114
+ ])
115
+
116
+ # Search
117
+ results = db.search("programming languages")
118
+ print(results.top.text) # Best match
119
+ ```
120
+
121
+ ## 4-Tier System
122
+
123
+ | Tier | Features | Use Case |
124
+ |------|----------|----------|
125
+ | **dense** | Vector similarity | Fast semantic search |
126
+ | **hybrid** | + BM25 sparse | Better keyword matching |
127
+ | **ultimate** | + ColBERT late interaction | Maximum accuracy |
128
+ | **graph** | + Knowledge graph | Complex reasoning (GraphRAG) |
129
+
130
+ ```python
131
+ # Dense tier (fastest)
132
+ db = Vectrix("docs", tier="dense")
133
+
134
+ # Hybrid tier (balanced)
135
+ db = Vectrix("docs", tier="hybrid")
136
+
137
+ # Ultimate tier (best quality)
138
+ db = Vectrix("docs", tier="ultimate")
139
+
140
+ # Graph tier (GraphRAG)
141
+ db = Vectrix("docs", tier="graph")
142
+ ```
143
+
144
+ ## Search Modes
145
+
146
+ ```python
147
+ # Dense - vector similarity
148
+ results = db.search("AI", mode="dense")
149
+
150
+ # Sparse - BM25 keyword
151
+ results = db.search("machine learning", mode="sparse")
152
+
153
+ # Hybrid - combined
154
+ results = db.search("neural networks", mode="hybrid")
155
+
156
+ # Rerank - with cross-encoder
157
+ results = db.search("deep learning", mode="rerank")
158
+ ```
159
+
160
+ ## With Metadata
161
+
162
+ ```python
163
+ db.add(
164
+ texts=["iPhone 15", "Galaxy S24", "Pixel 8"],
165
+ metadata=[
166
+ {"brand": "Apple", "price": 999},
167
+ {"brand": "Samsung", "price": 899},
168
+ {"brand": "Google", "price": 699}
169
+ ]
170
+ )
171
+
172
+ # Filter by metadata
173
+ results = db.search("smartphone", filter={"brand": "Apple"})
174
+ ```
175
+
176
+ ## Embedded Models
177
+
178
+ VectrixDB bundles English models (~386MB) - no downloads needed:
179
+
180
+ | Model | Purpose | Size |
181
+ |-------|---------|------|
182
+ | e5-small-v2 | Dense embeddings | 129MB |
183
+ | ms-marco-MiniLM | Reranking | 129MB |
184
+ | answerai-colbert-small | Late interaction | 129MB |
185
+ | BM25 vocab | Sparse search | 17KB |
186
+
187
+ ### Multilingual Models (auto-download)
188
+
189
+ For 100+ languages, models download from GitHub on first use:
190
+
191
+ ```python
192
+ # Multilingual (downloads ~450MB on first use)
193
+ db = Vectrix("docs", tier="hybrid") # or language="multi"
194
+
195
+ # English only (bundled, no download)
196
+ db = Vectrix("docs", tier="hybrid", language="en")
197
+ ```
198
+
199
+ | Model | Purpose | Languages |
200
+ |-------|---------|-----------|
201
+ | multilingual-e5-small | Dense | 100+ |
202
+ | mmarco-mMiniLMv2 | Reranking | 15+ |
203
+ | BGE-M3 | Late interaction | 100+ |
204
+ | mREBEL | GraphRAG extraction | 18 |
205
+
206
+ ## REST API & Dashboard
207
+
208
+ ```bash
209
+ # Start server
210
+ VECTRIXDB_API_KEY=your_key vectrixdb serve --port 7337
211
+
212
+ # Open dashboard
213
+ # http://localhost:7337/dashboard
214
+ ```
215
+
216
+ ```bash
217
+ # Create collection
218
+ curl -X POST http://localhost:7337/api/v1/collections \
219
+ -H "api-key: your_key" \
220
+ -d '{"name": "docs", "dimension": 384}'
221
+
222
+ # Add with auto-embedding
223
+ curl -X POST http://localhost:7337/api/v1/collections/docs/text-upsert \
224
+ -H "api-key: your_key" \
225
+ -d '{"points": [{"id": "1", "text": "Hello world"}]}'
226
+
227
+ # Search
228
+ curl -X POST http://localhost:7337/api/v1/collections/docs/text-search \
229
+ -H "api-key: your_key" \
230
+ -d '{"query_text": "greeting", "limit": 10}'
231
+ ```
232
+
233
+ ## Project Structure
234
+
235
+ ```
236
+ VectrixDB/
237
+ ├── vectrixdb/
238
+ │ ├── core/ # Vector index, storage, search
239
+ │ │ ├── graphrag/ # Knowledge graph
240
+ │ │ └── search/ # Search algorithms
241
+ │ ├── api/ # FastAPI server
242
+ │ ├── models/ # Embedded ONNX models
243
+ │ │ └── data/ # Bundled English models
244
+ │ ├── dashboard/ # Web UI
245
+ │ └── cli.py # Command line
246
+ ├── tests/ # Jupyter notebooks
247
+ └── requirements.txt
248
+ ```
249
+
250
+ ## Installation from Source
251
+
252
+ ```bash
253
+ git clone https://github.com/knowusuboaky/VectrixDB.git
254
+ cd VectrixDB
255
+ pip install -e .
256
+ ```
257
+
258
+ ## Requirements
259
+
260
+ - Python 3.9+
261
+ - No external API keys
262
+ - Models bundled or auto-downloaded
263
+
264
+ ## License
265
+
266
+ Apache 2.0
267
+
268
+ ## Author
269
+
270
+ **Kwadwo Daddy Nyame Owusu - Boakye**
271
+
272
+ GitHub: [@knowusuboaky](https://github.com/knowusuboaky)
273
+
274
+ ---
275
+
276
+ *Where vectors come alive.*
@@ -0,0 +1,206 @@
1
+ # VectrixDB
2
+
3
+ [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
4
+ [![Python Versions](https://img.shields.io/pypi/pyversions/vectrixdb.svg)](https://pypi.org/project/vectrixdb/)
5
+ [![VectrixDB Version](https://img.shields.io/pypi/v/vectrixdb.svg)](https://pypi.org/project/vectrixdb/)
6
+ [![Downloads](https://pepy.tech/badge/vectrixdb)](https://pepy.tech/project/vectrixdb)
7
+ [![Build Status](https://img.shields.io/github/actions/workflow/status/knowusuboaky/VectrixDB/main.yml)](https://github.com/knowusuboaky/VectrixDB/actions)
8
+ [![Issues](https://img.shields.io/github/issues/knowusuboaky/VectrixDB)](https://github.com/knowusuboaky/VectrixDB/issues)
9
+ [![Contact](https://img.shields.io/badge/Email-Contact-green.svg)](mailto:kwadwo.owusuboakye@outlook.com)
10
+
11
+ **Where vectors come alive.**
12
+
13
+ A lightweight, visual-first vector database with embedded ML models - no API keys required.
14
+
15
+ ## Why VectrixDB?
16
+
17
+ | Feature | VectrixDB | Qdrant | Chroma | Pinecone |
18
+ |---------|-----------|--------|--------|----------|
19
+ | Beautiful Dashboard | Yes | Basic | No | No |
20
+ | Embedded ML Models | Yes | No | No | No |
21
+ | 4 Search Tiers | Yes | No | No | No |
22
+ | GraphRAG Built-in | Yes | No | No | No |
23
+ | Zero Config | Yes | No | Yes | Yes |
24
+ | No API Keys Needed | Yes | Yes | No | No |
25
+ | Open Source | Yes | Yes | Yes | No |
26
+
27
+ ## Quick Start
28
+
29
+ ```bash
30
+ pip install vectrixdb
31
+ ```
32
+
33
+ ```python
34
+ from vectrixdb import Vectrix
35
+
36
+ # Create database with hybrid search (uses bundled English models)
37
+ db = Vectrix("my_docs", tier="hybrid", language="en")
38
+
39
+ # Add documents
40
+ db.add([
41
+ "Python is great for data science",
42
+ "JavaScript powers the web",
43
+ "Rust is known for memory safety"
44
+ ])
45
+
46
+ # Search
47
+ results = db.search("programming languages")
48
+ print(results.top.text) # Best match
49
+ ```
50
+
51
+ ## 4-Tier System
52
+
53
+ | Tier | Features | Use Case |
54
+ |------|----------|----------|
55
+ | **dense** | Vector similarity | Fast semantic search |
56
+ | **hybrid** | + BM25 sparse | Better keyword matching |
57
+ | **ultimate** | + ColBERT late interaction | Maximum accuracy |
58
+ | **graph** | + Knowledge graph | Complex reasoning (GraphRAG) |
59
+
60
+ ```python
61
+ # Dense tier (fastest)
62
+ db = Vectrix("docs", tier="dense")
63
+
64
+ # Hybrid tier (balanced)
65
+ db = Vectrix("docs", tier="hybrid")
66
+
67
+ # Ultimate tier (best quality)
68
+ db = Vectrix("docs", tier="ultimate")
69
+
70
+ # Graph tier (GraphRAG)
71
+ db = Vectrix("docs", tier="graph")
72
+ ```
73
+
74
+ ## Search Modes
75
+
76
+ ```python
77
+ # Dense - vector similarity
78
+ results = db.search("AI", mode="dense")
79
+
80
+ # Sparse - BM25 keyword
81
+ results = db.search("machine learning", mode="sparse")
82
+
83
+ # Hybrid - combined
84
+ results = db.search("neural networks", mode="hybrid")
85
+
86
+ # Rerank - with cross-encoder
87
+ results = db.search("deep learning", mode="rerank")
88
+ ```
89
+
90
+ ## With Metadata
91
+
92
+ ```python
93
+ db.add(
94
+ texts=["iPhone 15", "Galaxy S24", "Pixel 8"],
95
+ metadata=[
96
+ {"brand": "Apple", "price": 999},
97
+ {"brand": "Samsung", "price": 899},
98
+ {"brand": "Google", "price": 699}
99
+ ]
100
+ )
101
+
102
+ # Filter by metadata
103
+ results = db.search("smartphone", filter={"brand": "Apple"})
104
+ ```
105
+
106
+ ## Embedded Models
107
+
108
+ VectrixDB bundles English models (~386MB) - no downloads needed:
109
+
110
+ | Model | Purpose | Size |
111
+ |-------|---------|------|
112
+ | e5-small-v2 | Dense embeddings | 129MB |
113
+ | ms-marco-MiniLM | Reranking | 129MB |
114
+ | answerai-colbert-small | Late interaction | 129MB |
115
+ | BM25 vocab | Sparse search | 17KB |
116
+
117
+ ### Multilingual Models (auto-download)
118
+
119
+ For 100+ languages, models download from GitHub on first use:
120
+
121
+ ```python
122
+ # Multilingual (downloads ~450MB on first use)
123
+ db = Vectrix("docs", tier="hybrid") # or language="multi"
124
+
125
+ # English only (bundled, no download)
126
+ db = Vectrix("docs", tier="hybrid", language="en")
127
+ ```
128
+
129
+ | Model | Purpose | Languages |
130
+ |-------|---------|-----------|
131
+ | multilingual-e5-small | Dense | 100+ |
132
+ | mmarco-mMiniLMv2 | Reranking | 15+ |
133
+ | BGE-M3 | Late interaction | 100+ |
134
+ | mREBEL | GraphRAG extraction | 18 |
135
+
136
+ ## REST API & Dashboard
137
+
138
+ ```bash
139
+ # Start server
140
+ VECTRIXDB_API_KEY=your_key vectrixdb serve --port 7337
141
+
142
+ # Open dashboard
143
+ # http://localhost:7337/dashboard
144
+ ```
145
+
146
+ ```bash
147
+ # Create collection
148
+ curl -X POST http://localhost:7337/api/v1/collections \
149
+ -H "api-key: your_key" \
150
+ -d '{"name": "docs", "dimension": 384}'
151
+
152
+ # Add with auto-embedding
153
+ curl -X POST http://localhost:7337/api/v1/collections/docs/text-upsert \
154
+ -H "api-key: your_key" \
155
+ -d '{"points": [{"id": "1", "text": "Hello world"}]}'
156
+
157
+ # Search
158
+ curl -X POST http://localhost:7337/api/v1/collections/docs/text-search \
159
+ -H "api-key: your_key" \
160
+ -d '{"query_text": "greeting", "limit": 10}'
161
+ ```
162
+
163
+ ## Project Structure
164
+
165
+ ```
166
+ VectrixDB/
167
+ ├── vectrixdb/
168
+ │ ├── core/ # Vector index, storage, search
169
+ │ │ ├── graphrag/ # Knowledge graph
170
+ │ │ └── search/ # Search algorithms
171
+ │ ├── api/ # FastAPI server
172
+ │ ├── models/ # Embedded ONNX models
173
+ │ │ └── data/ # Bundled English models
174
+ │ ├── dashboard/ # Web UI
175
+ │ └── cli.py # Command line
176
+ ├── tests/ # Jupyter notebooks
177
+ └── requirements.txt
178
+ ```
179
+
180
+ ## Installation from Source
181
+
182
+ ```bash
183
+ git clone https://github.com/knowusuboaky/VectrixDB.git
184
+ cd VectrixDB
185
+ pip install -e .
186
+ ```
187
+
188
+ ## Requirements
189
+
190
+ - Python 3.9+
191
+ - No external API keys
192
+ - Models bundled or auto-downloaded
193
+
194
+ ## License
195
+
196
+ Apache 2.0
197
+
198
+ ## Author
199
+
200
+ **Kwadwo Daddy Nyame Owusu - Boakye**
201
+
202
+ GitHub: [@knowusuboaky](https://github.com/knowusuboaky)
203
+
204
+ ---
205
+
206
+ *Where vectors come alive.*