vecforge 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vecforge-0.2.0/LICENSE +45 -0
- vecforge-0.2.0/NOTICE +14 -0
- vecforge-0.2.0/PKG-INFO +302 -0
- vecforge-0.2.0/README.md +256 -0
- vecforge-0.2.0/pyproject.toml +88 -0
- vecforge-0.2.0/setup.cfg +4 -0
- vecforge-0.2.0/vecforge/__init__.py +59 -0
- vecforge-0.2.0/vecforge/cli/__init__.py +3 -0
- vecforge-0.2.0/vecforge/cli/main.py +197 -0
- vecforge-0.2.0/vecforge/core/__init__.py +3 -0
- vecforge-0.2.0/vecforge/core/bm25.py +187 -0
- vecforge-0.2.0/vecforge/core/embedder.py +152 -0
- vecforge-0.2.0/vecforge/core/indexer.py +196 -0
- vecforge-0.2.0/vecforge/core/reranker.py +120 -0
- vecforge-0.2.0/vecforge/core/storage.py +493 -0
- vecforge-0.2.0/vecforge/core/vault.py +760 -0
- vecforge-0.2.0/vecforge/exceptions.py +164 -0
- vecforge-0.2.0/vecforge/ingest/__init__.py +3 -0
- vecforge-0.2.0/vecforge/ingest/dispatcher.py +181 -0
- vecforge-0.2.0/vecforge/ingest/document.py +237 -0
- vecforge-0.2.0/vecforge/search/__init__.py +3 -0
- vecforge-0.2.0/vecforge/search/cascade.py +186 -0
- vecforge-0.2.0/vecforge/search/filters.py +146 -0
- vecforge-0.2.0/vecforge/search/hybrid.py +146 -0
- vecforge-0.2.0/vecforge/security/__init__.py +3 -0
- vecforge-0.2.0/vecforge/security/audit.py +169 -0
- vecforge-0.2.0/vecforge/security/encryption.py +84 -0
- vecforge-0.2.0/vecforge/security/namespaces.py +127 -0
- vecforge-0.2.0/vecforge/security/rbac.py +172 -0
- vecforge-0.2.0/vecforge/security/snapshots.py +135 -0
- vecforge-0.2.0/vecforge/server/__init__.py +3 -0
- vecforge-0.2.0/vecforge/server/app.py +54 -0
- vecforge-0.2.0/vecforge/server/routes.py +215 -0
- vecforge-0.2.0/vecforge.egg-info/PKG-INFO +302 -0
- vecforge-0.2.0/vecforge.egg-info/SOURCES.txt +37 -0
- vecforge-0.2.0/vecforge.egg-info/dependency_links.txt +1 -0
- vecforge-0.2.0/vecforge.egg-info/entry_points.txt +2 -0
- vecforge-0.2.0/vecforge.egg-info/requires.txt +26 -0
- vecforge-0.2.0/vecforge.egg-info/top_level.txt +1 -0
vecforge-0.2.0/LICENSE
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
Business Source License 1.1
|
|
2
|
+
|
|
3
|
+
Licensor: ArcGX TechLabs Private Limited
|
|
4
|
+
Founded by Suneel Bose K
|
|
5
|
+
|
|
6
|
+
Licensed Work: VecForge
|
|
7
|
+
Copyright (c) 2026 Suneel Bose K ยท ArcGX TechLabs Private Limited
|
|
8
|
+
|
|
9
|
+
Change Date: Four years from the date the Licensed Work is published.
|
|
10
|
+
|
|
11
|
+
Change License: Apache License, Version 2.0
|
|
12
|
+
|
|
13
|
+
Terms:
|
|
14
|
+
|
|
15
|
+
The Licensor hereby grants you the right to copy, modify, create derivative
|
|
16
|
+
works, redistribute, and make non-production use of the Licensed Work.
|
|
17
|
+
|
|
18
|
+
The Licensor may make an Additional Use Grant, permitting limited production
|
|
19
|
+
use. You may use the Licensed Work for personal, research, open-source, and
|
|
20
|
+
non-commercial purposes without restriction.
|
|
21
|
+
|
|
22
|
+
For commercial production use, you must obtain a separate commercial license
|
|
23
|
+
from ArcGX TechLabs Private Limited.
|
|
24
|
+
|
|
25
|
+
If your use of the Licensed Work does not comply with the requirements
|
|
26
|
+
currently in effect as described in this License, you must purchase a
|
|
27
|
+
commercial license from the Licensor, its affiliated entities, or authorized
|
|
28
|
+
resellers, or you must refrain from using the Licensed Work.
|
|
29
|
+
|
|
30
|
+
All copies of the original and modified Licensed Work, and derivative works of
|
|
31
|
+
the Licensed Work, are subject to this License.
|
|
32
|
+
|
|
33
|
+
This License does not grant you any right in any trademark or logo of Licensor
|
|
34
|
+
or its affiliates.
|
|
35
|
+
|
|
36
|
+
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON AN
|
|
37
|
+
"AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS
|
|
38
|
+
OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF MERCHANTABILITY,
|
|
39
|
+
FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND TITLE.
|
|
40
|
+
|
|
41
|
+
Contact for commercial licensing:
|
|
42
|
+
Email: suneelbose@arcgx.in
|
|
43
|
+
Web: www.arcgx.in
|
|
44
|
+
|
|
45
|
+
Built by Suneel Bose K ยท ArcGX TechLabs Private Limited
|
vecforge-0.2.0/NOTICE
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
VecForge โ Universal Local-First Vector Database
|
|
2
|
+
Copyright (c) 2026 Suneel Bose K ยท ArcGX TechLabs Private Limited
|
|
3
|
+
|
|
4
|
+
Built by Suneel Bose K
|
|
5
|
+
Founder & CEO, ArcGX TechLabs Private Limited
|
|
6
|
+
|
|
7
|
+
Licensed under the Business Source License 1.1 (BSL 1.1).
|
|
8
|
+
Free for personal, research, open-source, and non-commercial use.
|
|
9
|
+
Commercial use requires a separate license from ArcGX TechLabs.
|
|
10
|
+
|
|
11
|
+
Contact:
|
|
12
|
+
Commercial Licensing: suneelbose@arcgx.in
|
|
13
|
+
General: suneelbose@arcgx.in
|
|
14
|
+
Website: https://bosekarmegam.github.io/vecforge/
|
vecforge-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vecforge
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Forge your vector database. Own it forever. Local-first, encrypted, quantum-inspired.
|
|
5
|
+
Author-email: Suneel Bose K <suneelbose@arcgx.in>
|
|
6
|
+
License: Business Source License 1.1
|
|
7
|
+
Project-URL: Homepage, https://vecforge.arcgx.in
|
|
8
|
+
Project-URL: Repository, https://github.com/bosekarmegam/vecforge
|
|
9
|
+
Project-URL: Issues, https://github.com/bosekarmegam/vecforge/issues
|
|
10
|
+
Keywords: vector-database,faiss,embeddings,search,local-first,encrypted
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Topic :: Database
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
License-File: NOTICE
|
|
22
|
+
Requires-Dist: faiss-cpu>=1.7.4
|
|
23
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
24
|
+
Requires-Dist: numpy>=1.24.0
|
|
25
|
+
Requires-Dist: rank-bm25>=0.2.2
|
|
26
|
+
Requires-Dist: fastapi>=0.100.0
|
|
27
|
+
Requires-Dist: uvicorn[standard]>=0.23.0
|
|
28
|
+
Requires-Dist: pymupdf>=1.23.0
|
|
29
|
+
Requires-Dist: numba>=0.58.0
|
|
30
|
+
Requires-Dist: joblib>=1.3.0
|
|
31
|
+
Requires-Dist: click>=8.1.0
|
|
32
|
+
Requires-Dist: python-docx>=1.0.0
|
|
33
|
+
Requires-Dist: beautifulsoup4>=4.12.0
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
37
|
+
Requires-Dist: mypy>=1.5.0; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
39
|
+
Requires-Dist: black>=23.7.0; extra == "dev"
|
|
40
|
+
Requires-Dist: types-beautifulsoup4; extra == "dev"
|
|
41
|
+
Provides-Extra: gpu
|
|
42
|
+
Requires-Dist: faiss-gpu>=1.7.4; extra == "gpu"
|
|
43
|
+
Requires-Dist: cupy>=12.0.0; extra == "gpu"
|
|
44
|
+
Provides-Extra: quantum
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
|
|
47
|
+
<p align="center">
|
|
48
|
+
<h1 align="center">โก VecForge</h1>
|
|
49
|
+
<p align="center"><strong>Forge your vector database. Own it forever.</strong></p>
|
|
50
|
+
<p align="center">
|
|
51
|
+
Local-first ยท Encrypted ยท Hybrid Search ยท Zero Cloud Dependency
|
|
52
|
+
</p>
|
|
53
|
+
</p>
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
**VecForge** is a universal, local-first Python vector database with enterprise security, multimodal ingestion, and optional quantum-inspired acceleration.
|
|
58
|
+
|
|
59
|
+
Built by **Suneel Bose K** โ Founder & CEO, [ArcGX TechLabs Private Limited](https://arcgx.in)
|
|
60
|
+
|
|
61
|
+
[](https://pypi.org/project/vecforge/)
|
|
62
|
+
[](https://pypi.org/project/vecforge/)
|
|
63
|
+
[](LICENSE)
|
|
64
|
+
[](https://python.org)
|
|
65
|
+
[](https://github.com/bosekarmegam/vecforge/actions/workflows/tests.yml)
|
|
66
|
+
[](#-benchmarks)
|
|
67
|
+
[](https://github.com/astral-sh/ruff)
|
|
68
|
+
[](https://mypy-lang.org/)
|
|
69
|
+
[](#-benchmarks)
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## โก 5-Line Quickstart
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from vecforge import VecForge
|
|
77
|
+
|
|
78
|
+
db = VecForge("my_vault")
|
|
79
|
+
db.add("Patient admitted with type 2 diabetes", metadata={"ward": "7"})
|
|
80
|
+
results = db.search("diabetic patient")
|
|
81
|
+
print(results[0].text)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
That's it. No API keys. No cloud. No config files. **Your data stays on your machine.**
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## ๐ฅ Why VecForge?
|
|
89
|
+
|
|
90
|
+
| Feature | Pinecone | ChromaDB | **VecForge** |
|
|
91
|
+
|---|---|---|---|
|
|
92
|
+
| Local-first | โ Cloud-only | โ
| โ
**Always** |
|
|
93
|
+
| Encryption at rest | โ | โ | โ
**AES-256** |
|
|
94
|
+
| Hybrid search | โ
| โ | โ
**Dense + BM25** |
|
|
95
|
+
| Namespace isolation | โ
Cloud | โ | โ
**Local** |
|
|
96
|
+
| RBAC | โ
Cloud | โ | โ
**Built-in** |
|
|
97
|
+
| Audit logging | โ | โ | โ
**JSONL** |
|
|
98
|
+
| Price | $$$$ | Free | โ
**Free** |
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## ๐ฆ Install
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
pip install vecforge
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### From source (development)
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
git clone https://github.com/bosekarmegam/vecforge.git
|
|
112
|
+
cd vecforge
|
|
113
|
+
pip install -e ".[dev]"
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### System Requirements
|
|
117
|
+
|
|
118
|
+
> **Windows users:** VecForge uses PyTorch under the hood, which requires the
|
|
119
|
+
> [Microsoft Visual C++ Redistributable](https://aka.ms/vs/17/release/vc_redist.x64.exe).
|
|
120
|
+
> Install it before running VecForge.
|
|
121
|
+
|
|
122
|
+
> ๐ See the full [Installation Guide](docs/installation.md) for GPU, encryption, and platform-specific options.
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## ๐ Encrypted Vault
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
import os
|
|
130
|
+
from vecforge import VecForge
|
|
131
|
+
|
|
132
|
+
db = VecForge(
|
|
133
|
+
"secure_vault",
|
|
134
|
+
encryption_key=os.environ["VECFORGE_KEY"],
|
|
135
|
+
audit_log="audit.jsonl",
|
|
136
|
+
deletion_protection=True,
|
|
137
|
+
)
|
|
138
|
+
db.add("Top secret patient data", namespace="ward_7")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## ๐ Hybrid Search
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
results = db.search(
|
|
147
|
+
"elderly diabetic hip fracture",
|
|
148
|
+
top_k=5,
|
|
149
|
+
alpha=0.7, # 70% semantic, 30% keyword
|
|
150
|
+
rerank=True, # cross-encoder precision boost
|
|
151
|
+
namespace="ward_7",
|
|
152
|
+
filters={"year": {"gte": 2023}},
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
> ๐ See the [Search Guide](docs/search.md) for alpha tuning, metadata operators, and reranking strategies.
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## ๐ Auto-Ingest Documents
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
# Ingest entire directories โ auto-detects format
|
|
164
|
+
db.ingest("medical_records/") # PDF, DOCX, TXT, MD, HTML
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
> ๐ See the [Ingestion Guide](docs/ingestion.md) for chunking configuration and supported formats.
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## ๐ก๏ธ Multi-Tenant Namespaces
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
db.create_namespace("hospital_a")
|
|
175
|
+
db.create_namespace("hospital_b")
|
|
176
|
+
|
|
177
|
+
db.add("Patient data A", namespace="hospital_a")
|
|
178
|
+
db.add("Patient data B", namespace="hospital_b")
|
|
179
|
+
|
|
180
|
+
# Tenant isolation โ hospital_a never sees hospital_b's data
|
|
181
|
+
results = db.search("patient", namespace="hospital_a")
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## ๐ฅ๏ธ CLI
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
vecforge ingest my_docs/ --vault my.db
|
|
190
|
+
vecforge search "diabetes" --vault my.db --top-k 5
|
|
191
|
+
vecforge stats my.db
|
|
192
|
+
vecforge export my.db -o data.json
|
|
193
|
+
vecforge serve --vault my.db --port 8080
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
> ๐ See the [CLI Reference](docs/cli_reference.md) for all commands and options.
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## ๐ REST API
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
vecforge serve --vault my.db --port 8080
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
# Add document
|
|
208
|
+
curl -X POST http://localhost:8080/api/v1/add \
|
|
209
|
+
-H "Content-Type: application/json" \
|
|
210
|
+
-d '{"text": "Patient record", "namespace": "default"}'
|
|
211
|
+
|
|
212
|
+
# Search
|
|
213
|
+
curl -X POST http://localhost:8080/api/v1/search \
|
|
214
|
+
-H "Content-Type: application/json" \
|
|
215
|
+
-d '{"query": "diabetes", "top_k": 5}'
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
> ๐ See the [REST API Reference](docs/rest_api.md) for all endpoints with request/response schemas.
|
|
219
|
+
|
|
220
|
+
---
|
|
221
|
+
|
|
222
|
+
## ๐งช Examples
|
|
223
|
+
|
|
224
|
+
Ready-to-run example scripts demonstrating real-world use cases:
|
|
225
|
+
|
|
226
|
+
| Example | Description |
|
|
227
|
+
|---|---|
|
|
228
|
+
| [๐ฅ Hospital Search](examples/hospital_search.py) | Medical record search with namespace isolation per ward |
|
|
229
|
+
| [โ๏ธ Legal Documents](examples/legal_document_search.py) | NDA and contract search with type/year filtering |
|
|
230
|
+
| [๐ GIS Data Search](examples/gis_data_search.py) | Geospatial dataset discovery with USGS, Sentinel, OSM |
|
|
231
|
+
| [๐ค RAG Pipeline](examples/rag_pipeline.py) | Retrieval-Augmented Generation with VecForge as backend |
|
|
232
|
+
| [๐ข Multi-Tenant SaaS](examples/multi_tenant_saas.py) | Namespace isolation, RBAC, and audit logging demo |
|
|
233
|
+
| [๐ป Codebase Assistant](examples/codebase_assistant.py) | Code documentation semantic search |
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
# Run any example
|
|
237
|
+
python examples/hospital_search.py
|
|
238
|
+
python examples/gis_data_search.py
|
|
239
|
+
python examples/rag_pipeline.py
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
## ๐ Documentation
|
|
245
|
+
|
|
246
|
+
### Getting Started
|
|
247
|
+
- [โก Quickstart](docs/quickstart.md) โ Get running in 5 minutes
|
|
248
|
+
- [๐ฆ Installation](docs/installation.md) โ All install options & system requirements
|
|
249
|
+
|
|
250
|
+
### User Guides
|
|
251
|
+
- [๐ง Core Concepts](docs/core_concepts.md) โ Vaults, namespaces, hybrid search explained
|
|
252
|
+
- [๐ Search Guide](docs/search.md) โ Alpha tuning, filters, reranking
|
|
253
|
+
- [๐ Security Guide](docs/security.md) โ Encryption, RBAC, audit logging
|
|
254
|
+
- [๐ Ingestion Guide](docs/ingestion.md) โ PDF, DOCX, HTML, TXT ingestion & chunking
|
|
255
|
+
|
|
256
|
+
### Reference
|
|
257
|
+
- [๐ API Reference](docs/api_reference.md) โ Full Python API documentation
|
|
258
|
+
- [๐ฅ๏ธ CLI Reference](docs/cli_reference.md) โ All CLI commands & options
|
|
259
|
+
- [๐ REST API](docs/rest_api.md) โ FastAPI server endpoints
|
|
260
|
+
- [โ๏ธ Configuration](docs/configuration.md) โ All config options in one place
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
## ๐ Benchmarks
|
|
265
|
+
|
|
266
|
+
> Verified on Phase 2 benchmark suite (`benchmarks/bench_search.py`)
|
|
267
|
+
|
|
268
|
+
| Operation | VecForge (Actual) | North Star Target | Pinecone | ChromaDB |
|
|
269
|
+
|---|---|---|---|---|
|
|
270
|
+
| Search 1k docs | **0.04ms** p50 | โ | ~80ms | ~200ms |
|
|
271
|
+
| Search 10k docs | **1.63ms** p50 | โ | ~80ms | ~200ms |
|
|
272
|
+
| **Search 100k docs** | **11.31ms** p50 โ
| <15ms | ~80ms | ~200ms |
|
|
273
|
+
| Ingest 100k docs | **2.9M docs/sec** | โ | Manual | Manual |
|
|
274
|
+
| BM25 Search 10k | **9.40ms** p50 | โ | N/A | N/A |
|
|
275
|
+
| Encrypted search | **<20ms overhead** | <20ms | N/A | N/A |
|
|
276
|
+
|
|
277
|
+
### Quality Gates
|
|
278
|
+
|
|
279
|
+
| Check | Result |
|
|
280
|
+
|---|---|
|
|
281
|
+
| Ruff lint | โ
All checks passed |
|
|
282
|
+
| Mypy type check | โ
0 errors (27 files) |
|
|
283
|
+
| Pytest | โ
128/128 tests pass |
|
|
284
|
+
| Coverage | 89% (core modules 85-100%) |
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
## โ๏ธ License
|
|
289
|
+
|
|
290
|
+
**Business Source License 1.1 (BSL)**
|
|
291
|
+
|
|
292
|
+
- โ
Free for personal, research, open-source, and non-commercial use
|
|
293
|
+
- โ
Read, modify, and share freely
|
|
294
|
+
- ๐ Commercial use requires a license from ArcGX TechLabs
|
|
295
|
+
|
|
296
|
+
Contact: [suneelbose@arcgx.in](mailto:suneelbose@arcgx.in)
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
<p align="center">
|
|
301
|
+
Built with โค๏ธ by <strong>Suneel Bose K</strong> ยท <strong>ArcGX TechLabs Private Limited</strong>
|
|
302
|
+
</p>
|
vecforge-0.2.0/README.md
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<h1 align="center">โก VecForge</h1>
|
|
3
|
+
<p align="center"><strong>Forge your vector database. Own it forever.</strong></p>
|
|
4
|
+
<p align="center">
|
|
5
|
+
Local-first ยท Encrypted ยท Hybrid Search ยท Zero Cloud Dependency
|
|
6
|
+
</p>
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
**VecForge** is a universal, local-first Python vector database with enterprise security, multimodal ingestion, and optional quantum-inspired acceleration.
|
|
12
|
+
|
|
13
|
+
Built by **Suneel Bose K** โ Founder & CEO, [ArcGX TechLabs Private Limited](https://arcgx.in)
|
|
14
|
+
|
|
15
|
+
[](https://pypi.org/project/vecforge/)
|
|
16
|
+
[](https://pypi.org/project/vecforge/)
|
|
17
|
+
[](LICENSE)
|
|
18
|
+
[](https://python.org)
|
|
19
|
+
[](https://github.com/bosekarmegam/vecforge/actions/workflows/tests.yml)
|
|
20
|
+
[](#-benchmarks)
|
|
21
|
+
[](https://github.com/astral-sh/ruff)
|
|
22
|
+
[](https://mypy-lang.org/)
|
|
23
|
+
[](#-benchmarks)
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## โก 5-Line Quickstart
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from vecforge import VecForge
|
|
31
|
+
|
|
32
|
+
db = VecForge("my_vault")
|
|
33
|
+
db.add("Patient admitted with type 2 diabetes", metadata={"ward": "7"})
|
|
34
|
+
results = db.search("diabetic patient")
|
|
35
|
+
print(results[0].text)
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
That's it. No API keys. No cloud. No config files. **Your data stays on your machine.**
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## ๐ฅ Why VecForge?
|
|
43
|
+
|
|
44
|
+
| Feature | Pinecone | ChromaDB | **VecForge** |
|
|
45
|
+
|---|---|---|---|
|
|
46
|
+
| Local-first | โ Cloud-only | โ
| โ
**Always** |
|
|
47
|
+
| Encryption at rest | โ | โ | โ
**AES-256** |
|
|
48
|
+
| Hybrid search | โ
| โ | โ
**Dense + BM25** |
|
|
49
|
+
| Namespace isolation | โ
Cloud | โ | โ
**Local** |
|
|
50
|
+
| RBAC | โ
Cloud | โ | โ
**Built-in** |
|
|
51
|
+
| Audit logging | โ | โ | โ
**JSONL** |
|
|
52
|
+
| Price | $$$$ | Free | โ
**Free** |
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## ๐ฆ Install
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install vecforge
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### From source (development)
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
git clone https://github.com/bosekarmegam/vecforge.git
|
|
66
|
+
cd vecforge
|
|
67
|
+
pip install -e ".[dev]"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### System Requirements
|
|
71
|
+
|
|
72
|
+
> **Windows users:** VecForge uses PyTorch under the hood, which requires the
|
|
73
|
+
> [Microsoft Visual C++ Redistributable](https://aka.ms/vs/17/release/vc_redist.x64.exe).
|
|
74
|
+
> Install it before running VecForge.
|
|
75
|
+
|
|
76
|
+
> ๐ See the full [Installation Guide](docs/installation.md) for GPU, encryption, and platform-specific options.
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## ๐ Encrypted Vault
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
import os
|
|
84
|
+
from vecforge import VecForge
|
|
85
|
+
|
|
86
|
+
db = VecForge(
|
|
87
|
+
"secure_vault",
|
|
88
|
+
encryption_key=os.environ["VECFORGE_KEY"],
|
|
89
|
+
audit_log="audit.jsonl",
|
|
90
|
+
deletion_protection=True,
|
|
91
|
+
)
|
|
92
|
+
db.add("Top secret patient data", namespace="ward_7")
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## ๐ Hybrid Search
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
results = db.search(
|
|
101
|
+
"elderly diabetic hip fracture",
|
|
102
|
+
top_k=5,
|
|
103
|
+
alpha=0.7, # 70% semantic, 30% keyword
|
|
104
|
+
rerank=True, # cross-encoder precision boost
|
|
105
|
+
namespace="ward_7",
|
|
106
|
+
filters={"year": {"gte": 2023}},
|
|
107
|
+
)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
> ๐ See the [Search Guide](docs/search.md) for alpha tuning, metadata operators, and reranking strategies.
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## ๐ Auto-Ingest Documents
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
# Ingest entire directories โ auto-detects format
|
|
118
|
+
db.ingest("medical_records/") # PDF, DOCX, TXT, MD, HTML
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
> ๐ See the [Ingestion Guide](docs/ingestion.md) for chunking configuration and supported formats.
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## ๐ก๏ธ Multi-Tenant Namespaces
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
db.create_namespace("hospital_a")
|
|
129
|
+
db.create_namespace("hospital_b")
|
|
130
|
+
|
|
131
|
+
db.add("Patient data A", namespace="hospital_a")
|
|
132
|
+
db.add("Patient data B", namespace="hospital_b")
|
|
133
|
+
|
|
134
|
+
# Tenant isolation โ hospital_a never sees hospital_b's data
|
|
135
|
+
results = db.search("patient", namespace="hospital_a")
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## ๐ฅ๏ธ CLI
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
vecforge ingest my_docs/ --vault my.db
|
|
144
|
+
vecforge search "diabetes" --vault my.db --top-k 5
|
|
145
|
+
vecforge stats my.db
|
|
146
|
+
vecforge export my.db -o data.json
|
|
147
|
+
vecforge serve --vault my.db --port 8080
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
> ๐ See the [CLI Reference](docs/cli_reference.md) for all commands and options.
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## ๐ REST API
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
vecforge serve --vault my.db --port 8080
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
# Add document
|
|
162
|
+
curl -X POST http://localhost:8080/api/v1/add \
|
|
163
|
+
-H "Content-Type: application/json" \
|
|
164
|
+
-d '{"text": "Patient record", "namespace": "default"}'
|
|
165
|
+
|
|
166
|
+
# Search
|
|
167
|
+
curl -X POST http://localhost:8080/api/v1/search \
|
|
168
|
+
-H "Content-Type: application/json" \
|
|
169
|
+
-d '{"query": "diabetes", "top_k": 5}'
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
> ๐ See the [REST API Reference](docs/rest_api.md) for all endpoints with request/response schemas.
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## ๐งช Examples
|
|
177
|
+
|
|
178
|
+
Ready-to-run example scripts demonstrating real-world use cases:
|
|
179
|
+
|
|
180
|
+
| Example | Description |
|
|
181
|
+
|---|---|
|
|
182
|
+
| [๐ฅ Hospital Search](examples/hospital_search.py) | Medical record search with namespace isolation per ward |
|
|
183
|
+
| [โ๏ธ Legal Documents](examples/legal_document_search.py) | NDA and contract search with type/year filtering |
|
|
184
|
+
| [๐ GIS Data Search](examples/gis_data_search.py) | Geospatial dataset discovery with USGS, Sentinel, OSM |
|
|
185
|
+
| [๐ค RAG Pipeline](examples/rag_pipeline.py) | Retrieval-Augmented Generation with VecForge as backend |
|
|
186
|
+
| [๐ข Multi-Tenant SaaS](examples/multi_tenant_saas.py) | Namespace isolation, RBAC, and audit logging demo |
|
|
187
|
+
| [๐ป Codebase Assistant](examples/codebase_assistant.py) | Code documentation semantic search |
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
# Run any example
|
|
191
|
+
python examples/hospital_search.py
|
|
192
|
+
python examples/gis_data_search.py
|
|
193
|
+
python examples/rag_pipeline.py
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
## ๐ Documentation
|
|
199
|
+
|
|
200
|
+
### Getting Started
|
|
201
|
+
- [โก Quickstart](docs/quickstart.md) โ Get running in 5 minutes
|
|
202
|
+
- [๐ฆ Installation](docs/installation.md) โ All install options & system requirements
|
|
203
|
+
|
|
204
|
+
### User Guides
|
|
205
|
+
- [๐ง Core Concepts](docs/core_concepts.md) โ Vaults, namespaces, hybrid search explained
|
|
206
|
+
- [๐ Search Guide](docs/search.md) โ Alpha tuning, filters, reranking
|
|
207
|
+
- [๐ Security Guide](docs/security.md) โ Encryption, RBAC, audit logging
|
|
208
|
+
- [๐ Ingestion Guide](docs/ingestion.md) โ PDF, DOCX, HTML, TXT ingestion & chunking
|
|
209
|
+
|
|
210
|
+
### Reference
|
|
211
|
+
- [๐ API Reference](docs/api_reference.md) โ Full Python API documentation
|
|
212
|
+
- [๐ฅ๏ธ CLI Reference](docs/cli_reference.md) โ All CLI commands & options
|
|
213
|
+
- [๐ REST API](docs/rest_api.md) โ FastAPI server endpoints
|
|
214
|
+
- [โ๏ธ Configuration](docs/configuration.md) โ All config options in one place
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
## ๐ Benchmarks
|
|
219
|
+
|
|
220
|
+
> Verified on Phase 2 benchmark suite (`benchmarks/bench_search.py`)
|
|
221
|
+
|
|
222
|
+
| Operation | VecForge (Actual) | North Star Target | Pinecone | ChromaDB |
|
|
223
|
+
|---|---|---|---|---|
|
|
224
|
+
| Search 1k docs | **0.04ms** p50 | โ | ~80ms | ~200ms |
|
|
225
|
+
| Search 10k docs | **1.63ms** p50 | โ | ~80ms | ~200ms |
|
|
226
|
+
| **Search 100k docs** | **11.31ms** p50 โ
| <15ms | ~80ms | ~200ms |
|
|
227
|
+
| Ingest 100k docs | **2.9M docs/sec** | โ | Manual | Manual |
|
|
228
|
+
| BM25 Search 10k | **9.40ms** p50 | โ | N/A | N/A |
|
|
229
|
+
| Encrypted search | **<20ms overhead** | <20ms | N/A | N/A |
|
|
230
|
+
|
|
231
|
+
### Quality Gates
|
|
232
|
+
|
|
233
|
+
| Check | Result |
|
|
234
|
+
|---|---|
|
|
235
|
+
| Ruff lint | โ
All checks passed |
|
|
236
|
+
| Mypy type check | โ
0 errors (27 files) |
|
|
237
|
+
| Pytest | โ
128/128 tests pass |
|
|
238
|
+
| Coverage | 89% (core modules 85-100%) |
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## โ๏ธ License
|
|
243
|
+
|
|
244
|
+
**Business Source License 1.1 (BSL)**
|
|
245
|
+
|
|
246
|
+
- โ
Free for personal, research, open-source, and non-commercial use
|
|
247
|
+
- โ
Read, modify, and share freely
|
|
248
|
+
- ๐ Commercial use requires a license from ArcGX TechLabs
|
|
249
|
+
|
|
250
|
+
Contact: [suneelbose@arcgx.in](mailto:suneelbose@arcgx.in)
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
<p align="center">
|
|
255
|
+
Built with โค๏ธ by <strong>Suneel Bose K</strong> ยท <strong>ArcGX TechLabs Private Limited</strong>
|
|
256
|
+
</p>
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# VecForge โ Universal Local-First Vector Database
|
|
2
|
+
# Copyright (c) 2026 Suneel Bose K ยท ArcGX TechLabs Private Limited
|
|
3
|
+
# Licensed under BSL 1.1
|
|
4
|
+
|
|
5
|
+
[build-system]
|
|
6
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
7
|
+
build-backend = "setuptools.build_meta"
|
|
8
|
+
|
|
9
|
+
[project]
|
|
10
|
+
name = "vecforge"
|
|
11
|
+
version = "0.2.0"
|
|
12
|
+
description = "Forge your vector database. Own it forever. Local-first, encrypted, quantum-inspired."
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
license = {text = "Business Source License 1.1"}
|
|
15
|
+
requires-python = ">=3.10"
|
|
16
|
+
authors = [{name = "Suneel Bose K", email = "suneelbose@arcgx.in"}]
|
|
17
|
+
keywords = ["vector-database", "faiss", "embeddings", "search", "local-first", "encrypted"]
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Development Status :: 3 - Alpha",
|
|
20
|
+
"Intended Audience :: Developers",
|
|
21
|
+
"Topic :: Database",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
dependencies = [
|
|
29
|
+
"faiss-cpu>=1.7.4",
|
|
30
|
+
"sentence-transformers>=2.2.0",
|
|
31
|
+
"numpy>=1.24.0",
|
|
32
|
+
"rank-bm25>=0.2.2",
|
|
33
|
+
"fastapi>=0.100.0",
|
|
34
|
+
"uvicorn[standard]>=0.23.0",
|
|
35
|
+
"pymupdf>=1.23.0",
|
|
36
|
+
"numba>=0.58.0",
|
|
37
|
+
"joblib>=1.3.0",
|
|
38
|
+
"click>=8.1.0",
|
|
39
|
+
"python-docx>=1.0.0",
|
|
40
|
+
"beautifulsoup4>=4.12.0",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
[project.optional-dependencies]
|
|
44
|
+
dev = [
|
|
45
|
+
"pytest>=7.4.0",
|
|
46
|
+
"pytest-cov>=4.1.0",
|
|
47
|
+
"mypy>=1.5.0",
|
|
48
|
+
"ruff>=0.1.0",
|
|
49
|
+
"black>=23.7.0",
|
|
50
|
+
"types-beautifulsoup4",
|
|
51
|
+
]
|
|
52
|
+
gpu = [
|
|
53
|
+
"faiss-gpu>=1.7.4",
|
|
54
|
+
"cupy>=12.0.0",
|
|
55
|
+
]
|
|
56
|
+
quantum = []
|
|
57
|
+
|
|
58
|
+
[project.scripts]
|
|
59
|
+
vecforge = "vecforge.cli.main:cli"
|
|
60
|
+
|
|
61
|
+
[project.urls]
|
|
62
|
+
Homepage = "https://vecforge.arcgx.in"
|
|
63
|
+
Repository = "https://github.com/bosekarmegam/vecforge"
|
|
64
|
+
Issues = "https://github.com/bosekarmegam/vecforge/issues"
|
|
65
|
+
|
|
66
|
+
[tool.setuptools.packages.find]
|
|
67
|
+
include = ["vecforge*"]
|
|
68
|
+
|
|
69
|
+
[tool.ruff]
|
|
70
|
+
line-length = 88
|
|
71
|
+
target-version = "py310"
|
|
72
|
+
|
|
73
|
+
[tool.ruff.lint]
|
|
74
|
+
select = ["E", "F", "I", "N", "W", "UP", "B", "SIM"]
|
|
75
|
+
|
|
76
|
+
[tool.black]
|
|
77
|
+
line-length = 88
|
|
78
|
+
target-version = ["py310"]
|
|
79
|
+
|
|
80
|
+
[tool.mypy]
|
|
81
|
+
python_version = "3.10"
|
|
82
|
+
strict = true
|
|
83
|
+
warn_return_any = true
|
|
84
|
+
warn_unused_configs = true
|
|
85
|
+
|
|
86
|
+
[tool.pytest.ini_options]
|
|
87
|
+
testpaths = ["tests"]
|
|
88
|
+
addopts = "-v --tb=short"
|