vecforge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vecforge/__init__.py +59 -0
- vecforge/cli/__init__.py +3 -0
- vecforge/cli/main.py +197 -0
- vecforge/core/__init__.py +3 -0
- vecforge/core/bm25.py +187 -0
- vecforge/core/embedder.py +152 -0
- vecforge/core/indexer.py +196 -0
- vecforge/core/reranker.py +120 -0
- vecforge/core/storage.py +493 -0
- vecforge/core/vault.py +760 -0
- vecforge/exceptions.py +164 -0
- vecforge/ingest/__init__.py +3 -0
- vecforge/ingest/dispatcher.py +181 -0
- vecforge/ingest/document.py +237 -0
- vecforge/search/__init__.py +3 -0
- vecforge/search/cascade.py +186 -0
- vecforge/search/filters.py +146 -0
- vecforge/search/hybrid.py +146 -0
- vecforge/security/__init__.py +3 -0
- vecforge/security/audit.py +169 -0
- vecforge/security/encryption.py +84 -0
- vecforge/security/namespaces.py +127 -0
- vecforge/security/rbac.py +172 -0
- vecforge/security/snapshots.py +135 -0
- vecforge/server/__init__.py +3 -0
- vecforge/server/app.py +54 -0
- vecforge/server/routes.py +215 -0
- vecforge-0.2.0.dist-info/METADATA +302 -0
- vecforge-0.2.0.dist-info/RECORD +34 -0
- vecforge-0.2.0.dist-info/WHEEL +5 -0
- vecforge-0.2.0.dist-info/entry_points.txt +2 -0
- vecforge-0.2.0.dist-info/licenses/LICENSE +45 -0
- vecforge-0.2.0.dist-info/licenses/NOTICE +14 -0
- vecforge-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# VecForge — Universal Local-First Vector Database
|
|
2
|
+
# Copyright (c) 2026 Suneel Bose K · ArcGX TechLabs Private Limited
|
|
3
|
+
# Built by Suneel Bose K (Founder & CEO, ArcGX TechLabs)
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Business Source License 1.1 (BSL 1.1)
|
|
6
|
+
# Free for personal, research, open-source, and non-commercial use.
|
|
7
|
+
# Commercial use requires a separate license from ArcGX TechLabs.
|
|
8
|
+
# See LICENSE file in the project root or contact: suneelbose@arcgx.in
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
FastAPI route definitions for VecForge REST API.
|
|
12
|
+
|
|
13
|
+
Provides endpoints for add, search, delete, stats, and namespace
|
|
14
|
+
management.
|
|
15
|
+
|
|
16
|
+
Built by Suneel Bose K · ArcGX TechLabs Private Limited.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from fastapi import APIRouter, HTTPException
|
|
24
|
+
from pydantic import BaseModel, Field
|
|
25
|
+
|
|
26
|
+
from vecforge.core.vault import VecForge
|
|
27
|
+
from vecforge.exceptions import (
|
|
28
|
+
VaultEmptyError,
|
|
29
|
+
VecForgeError,
|
|
30
|
+
VecForgePermissionError,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# ─── Request/Response Models ───
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class AddRequest(BaseModel):
|
|
37
|
+
"""Request body for adding a document."""
|
|
38
|
+
|
|
39
|
+
text: str = Field(..., description="Document text content")
|
|
40
|
+
metadata: dict[str, Any] = Field(default_factory=dict, description="Metadata")
|
|
41
|
+
namespace: str = Field(default="default", description="Target namespace")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class AddResponse(BaseModel):
|
|
45
|
+
"""Response for a successful add operation."""
|
|
46
|
+
|
|
47
|
+
doc_id: str
|
|
48
|
+
message: str = "Document added successfully"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class SearchRequest(BaseModel):
|
|
52
|
+
"""Request body for searching the vault."""
|
|
53
|
+
|
|
54
|
+
query: str = Field(..., description="Search query")
|
|
55
|
+
top_k: int = Field(default=10, ge=1, le=100, description="Number of results")
|
|
56
|
+
alpha: float = Field(default=0.5, ge=0.0, le=1.0, description="Semantic weight")
|
|
57
|
+
namespace: str | None = Field(default=None, description="Restrict to namespace")
|
|
58
|
+
filters: dict[str, Any] | None = Field(default=None, description="Metadata filters")
|
|
59
|
+
rerank: bool = Field(default=False, description="Enable reranking")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class SearchResultItem(BaseModel):
|
|
63
|
+
"""A single search result."""
|
|
64
|
+
|
|
65
|
+
text: str
|
|
66
|
+
score: float
|
|
67
|
+
metadata: dict[str, Any]
|
|
68
|
+
namespace: str
|
|
69
|
+
doc_id: str
|
|
70
|
+
modality: str
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class SearchResponse(BaseModel):
|
|
74
|
+
"""Response for a search operation."""
|
|
75
|
+
|
|
76
|
+
results: list[SearchResultItem]
|
|
77
|
+
count: int
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class StatsResponse(BaseModel):
|
|
81
|
+
"""Response for vault statistics."""
|
|
82
|
+
|
|
83
|
+
path: str
|
|
84
|
+
documents: int
|
|
85
|
+
namespaces: list[str]
|
|
86
|
+
encrypted: bool
|
|
87
|
+
quantum: bool
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class NamespaceRequest(BaseModel):
|
|
91
|
+
"""Request body for creating a namespace."""
|
|
92
|
+
|
|
93
|
+
name: str = Field(..., description="Namespace name")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ─── Router ───
|
|
97
|
+
|
|
98
|
+
_vault_instance: VecForge | None = None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def create_router(vault_path: str) -> APIRouter:
|
|
102
|
+
"""Create API router with vault instance.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
vault_path: Path to the vault database.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Configured APIRouter.
|
|
109
|
+
"""
|
|
110
|
+
global _vault_instance
|
|
111
|
+
_vault_instance = VecForge(vault_path)
|
|
112
|
+
|
|
113
|
+
router = APIRouter(prefix="/api/v1", tags=["VecForge"])
|
|
114
|
+
|
|
115
|
+
@router.post("/add", response_model=AddResponse)
|
|
116
|
+
async def add_document(request: AddRequest) -> AddResponse:
|
|
117
|
+
"""Add a document to the vault."""
|
|
118
|
+
assert _vault_instance is not None
|
|
119
|
+
try:
|
|
120
|
+
doc_id = _vault_instance.add(
|
|
121
|
+
text=request.text,
|
|
122
|
+
metadata=request.metadata,
|
|
123
|
+
namespace=request.namespace,
|
|
124
|
+
)
|
|
125
|
+
return AddResponse(doc_id=doc_id)
|
|
126
|
+
except VecForgeError as e:
|
|
127
|
+
raise HTTPException(status_code=400, detail=str(e)) from e
|
|
128
|
+
|
|
129
|
+
@router.post("/search", response_model=SearchResponse)
|
|
130
|
+
async def search_vault(request: SearchRequest) -> SearchResponse:
|
|
131
|
+
"""Search the vault with a natural language query."""
|
|
132
|
+
assert _vault_instance is not None
|
|
133
|
+
try:
|
|
134
|
+
results = _vault_instance.search(
|
|
135
|
+
query=request.query,
|
|
136
|
+
top_k=request.top_k,
|
|
137
|
+
alpha=request.alpha,
|
|
138
|
+
namespace=request.namespace,
|
|
139
|
+
filters=request.filters,
|
|
140
|
+
rerank=request.rerank,
|
|
141
|
+
)
|
|
142
|
+
items = [
|
|
143
|
+
SearchResultItem(
|
|
144
|
+
text=r.text,
|
|
145
|
+
score=r.score,
|
|
146
|
+
metadata=r.metadata,
|
|
147
|
+
namespace=r.namespace,
|
|
148
|
+
doc_id=r.doc_id,
|
|
149
|
+
modality=r.modality,
|
|
150
|
+
)
|
|
151
|
+
for r in results
|
|
152
|
+
]
|
|
153
|
+
return SearchResponse(results=items, count=len(items))
|
|
154
|
+
except VaultEmptyError as e:
|
|
155
|
+
raise HTTPException(status_code=404, detail=str(e)) from e
|
|
156
|
+
except VecForgePermissionError as e:
|
|
157
|
+
raise HTTPException(status_code=403, detail=str(e)) from e
|
|
158
|
+
except VecForgeError as e:
|
|
159
|
+
raise HTTPException(status_code=400, detail=str(e)) from e
|
|
160
|
+
|
|
161
|
+
@router.delete("/docs/{doc_id}")
|
|
162
|
+
async def delete_document(doc_id: str) -> dict[str, Any]:
|
|
163
|
+
"""Delete a document by ID."""
|
|
164
|
+
assert _vault_instance is not None
|
|
165
|
+
try:
|
|
166
|
+
deleted = _vault_instance.delete(doc_id)
|
|
167
|
+
if not deleted:
|
|
168
|
+
raise HTTPException(
|
|
169
|
+
status_code=404,
|
|
170
|
+
detail=f"Document '{doc_id}' not found",
|
|
171
|
+
)
|
|
172
|
+
return {"deleted": True, "doc_id": doc_id}
|
|
173
|
+
except VecForgeError as e:
|
|
174
|
+
raise HTTPException(status_code=400, detail=str(e)) from e
|
|
175
|
+
|
|
176
|
+
@router.get("/stats", response_model=StatsResponse)
|
|
177
|
+
async def get_stats() -> StatsResponse:
|
|
178
|
+
"""Get vault statistics."""
|
|
179
|
+
assert _vault_instance is not None
|
|
180
|
+
info = _vault_instance.stats()
|
|
181
|
+
return StatsResponse(
|
|
182
|
+
path=info["path"],
|
|
183
|
+
documents=info["documents"],
|
|
184
|
+
namespaces=info["namespaces"],
|
|
185
|
+
encrypted=info["encrypted"],
|
|
186
|
+
quantum=info["quantum"],
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
@router.post("/namespaces")
|
|
190
|
+
async def create_namespace(request: NamespaceRequest) -> dict[str, str]:
|
|
191
|
+
"""Create a new namespace."""
|
|
192
|
+
assert _vault_instance is not None
|
|
193
|
+
try:
|
|
194
|
+
_vault_instance.create_namespace(request.name)
|
|
195
|
+
return {"created": request.name}
|
|
196
|
+
except VecForgeError as e:
|
|
197
|
+
raise HTTPException(status_code=400, detail=str(e)) from e
|
|
198
|
+
|
|
199
|
+
@router.get("/namespaces")
|
|
200
|
+
async def list_namespaces() -> dict[str, list[str]]:
|
|
201
|
+
"""List all namespaces."""
|
|
202
|
+
assert _vault_instance is not None
|
|
203
|
+
return {"namespaces": _vault_instance.list_namespaces()}
|
|
204
|
+
|
|
205
|
+
@router.get("/health")
|
|
206
|
+
async def health_check() -> dict[str, str]:
|
|
207
|
+
"""Health check endpoint."""
|
|
208
|
+
return {
|
|
209
|
+
"status": "healthy",
|
|
210
|
+
"service": "VecForge",
|
|
211
|
+
"version": "0.2.0",
|
|
212
|
+
"built_by": "Suneel Bose K · ArcGX TechLabs",
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
return router
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vecforge
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Forge your vector database. Own it forever. Local-first, encrypted, quantum-inspired.
|
|
5
|
+
Author-email: Suneel Bose K <suneelbose@arcgx.in>
|
|
6
|
+
License: Business Source License 1.1
|
|
7
|
+
Project-URL: Homepage, https://vecforge.arcgx.in
|
|
8
|
+
Project-URL: Repository, https://github.com/bosekarmegam/vecforge
|
|
9
|
+
Project-URL: Issues, https://github.com/bosekarmegam/vecforge/issues
|
|
10
|
+
Keywords: vector-database,faiss,embeddings,search,local-first,encrypted
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Topic :: Database
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
License-File: NOTICE
|
|
22
|
+
Requires-Dist: faiss-cpu>=1.7.4
|
|
23
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
24
|
+
Requires-Dist: numpy>=1.24.0
|
|
25
|
+
Requires-Dist: rank-bm25>=0.2.2
|
|
26
|
+
Requires-Dist: fastapi>=0.100.0
|
|
27
|
+
Requires-Dist: uvicorn[standard]>=0.23.0
|
|
28
|
+
Requires-Dist: pymupdf>=1.23.0
|
|
29
|
+
Requires-Dist: numba>=0.58.0
|
|
30
|
+
Requires-Dist: joblib>=1.3.0
|
|
31
|
+
Requires-Dist: click>=8.1.0
|
|
32
|
+
Requires-Dist: python-docx>=1.0.0
|
|
33
|
+
Requires-Dist: beautifulsoup4>=4.12.0
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
37
|
+
Requires-Dist: mypy>=1.5.0; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
39
|
+
Requires-Dist: black>=23.7.0; extra == "dev"
|
|
40
|
+
Requires-Dist: types-beautifulsoup4; extra == "dev"
|
|
41
|
+
Provides-Extra: gpu
|
|
42
|
+
Requires-Dist: faiss-gpu>=1.7.4; extra == "gpu"
|
|
43
|
+
Requires-Dist: cupy>=12.0.0; extra == "gpu"
|
|
44
|
+
Provides-Extra: quantum
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
|
|
47
|
+
<p align="center">
|
|
48
|
+
<h1 align="center">⚡ VecForge</h1>
|
|
49
|
+
<p align="center"><strong>Forge your vector database. Own it forever.</strong></p>
|
|
50
|
+
<p align="center">
|
|
51
|
+
Local-first · Encrypted · Hybrid Search · Zero Cloud Dependency
|
|
52
|
+
</p>
|
|
53
|
+
</p>
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
**VecForge** is a universal, local-first Python vector database with enterprise security, multimodal ingestion, and optional quantum-inspired acceleration.
|
|
58
|
+
|
|
59
|
+
Built by **Suneel Bose K** — Founder & CEO, [ArcGX TechLabs Private Limited](https://arcgx.in)
|
|
60
|
+
|
|
61
|
+
[](https://pypi.org/project/vecforge/)
|
|
62
|
+
[](https://pypi.org/project/vecforge/)
|
|
63
|
+
[](LICENSE)
|
|
64
|
+
[](https://python.org)
|
|
65
|
+
[](https://github.com/bosekarmegam/vecforge/actions/workflows/tests.yml)
|
|
66
|
+
[](#-benchmarks)
|
|
67
|
+
[](https://github.com/astral-sh/ruff)
|
|
68
|
+
[](https://mypy-lang.org/)
|
|
69
|
+
[](#-benchmarks)
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## ⚡ 5-Line Quickstart
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from vecforge import VecForge
|
|
77
|
+
|
|
78
|
+
db = VecForge("my_vault")
|
|
79
|
+
db.add("Patient admitted with type 2 diabetes", metadata={"ward": "7"})
|
|
80
|
+
results = db.search("diabetic patient")
|
|
81
|
+
print(results[0].text)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
That's it. No API keys. No cloud. No config files. **Your data stays on your machine.**
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## 🔥 Why VecForge?
|
|
89
|
+
|
|
90
|
+
| Feature | Pinecone | ChromaDB | **VecForge** |
|
|
91
|
+
|---|---|---|---|
|
|
92
|
+
| Local-first | ❌ Cloud-only | ✅ | ✅ **Always** |
|
|
93
|
+
| Encryption at rest | ❌ | ❌ | ✅ **AES-256** |
|
|
94
|
+
| Hybrid search | ✅ | ❌ | ✅ **Dense + BM25** |
|
|
95
|
+
| Namespace isolation | ✅ Cloud | ❌ | ✅ **Local** |
|
|
96
|
+
| RBAC | ✅ Cloud | ❌ | ✅ **Built-in** |
|
|
97
|
+
| Audit logging | ❌ | ❌ | ✅ **JSONL** |
|
|
98
|
+
| Price | $$$$ | Free | ✅ **Free** |
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## 📦 Install
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
pip install vecforge
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### From source (development)
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
git clone https://github.com/bosekarmegam/vecforge.git
|
|
112
|
+
cd vecforge
|
|
113
|
+
pip install -e ".[dev]"
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### System Requirements
|
|
117
|
+
|
|
118
|
+
> **Windows users:** VecForge uses PyTorch under the hood, which requires the
|
|
119
|
+
> [Microsoft Visual C++ Redistributable](https://aka.ms/vs/17/release/vc_redist.x64.exe).
|
|
120
|
+
> Install it before running VecForge.
|
|
121
|
+
|
|
122
|
+
> 📖 See the full [Installation Guide](docs/installation.md) for GPU, encryption, and platform-specific options.
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## 🔐 Encrypted Vault
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
import os
|
|
130
|
+
from vecforge import VecForge
|
|
131
|
+
|
|
132
|
+
db = VecForge(
|
|
133
|
+
"secure_vault",
|
|
134
|
+
encryption_key=os.environ["VECFORGE_KEY"],
|
|
135
|
+
audit_log="audit.jsonl",
|
|
136
|
+
deletion_protection=True,
|
|
137
|
+
)
|
|
138
|
+
db.add("Top secret patient data", namespace="ward_7")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## 🔍 Hybrid Search
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
results = db.search(
|
|
147
|
+
"elderly diabetic hip fracture",
|
|
148
|
+
top_k=5,
|
|
149
|
+
alpha=0.7, # 70% semantic, 30% keyword
|
|
150
|
+
rerank=True, # cross-encoder precision boost
|
|
151
|
+
namespace="ward_7",
|
|
152
|
+
filters={"year": {"gte": 2023}},
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
> 📖 See the [Search Guide](docs/search.md) for alpha tuning, metadata operators, and reranking strategies.
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## 📄 Auto-Ingest Documents
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
# Ingest entire directories — auto-detects format
|
|
164
|
+
db.ingest("medical_records/") # PDF, DOCX, TXT, MD, HTML
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
> 📖 See the [Ingestion Guide](docs/ingestion.md) for chunking configuration and supported formats.
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## 🛡️ Multi-Tenant Namespaces
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
db.create_namespace("hospital_a")
|
|
175
|
+
db.create_namespace("hospital_b")
|
|
176
|
+
|
|
177
|
+
db.add("Patient data A", namespace="hospital_a")
|
|
178
|
+
db.add("Patient data B", namespace="hospital_b")
|
|
179
|
+
|
|
180
|
+
# Tenant isolation — hospital_a never sees hospital_b's data
|
|
181
|
+
results = db.search("patient", namespace="hospital_a")
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## 🖥️ CLI
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
vecforge ingest my_docs/ --vault my.db
|
|
190
|
+
vecforge search "diabetes" --vault my.db --top-k 5
|
|
191
|
+
vecforge stats my.db
|
|
192
|
+
vecforge export my.db -o data.json
|
|
193
|
+
vecforge serve --vault my.db --port 8080
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
> 📖 See the [CLI Reference](docs/cli_reference.md) for all commands and options.
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## 🌐 REST API
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
vecforge serve --vault my.db --port 8080
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
# Add document
|
|
208
|
+
curl -X POST http://localhost:8080/api/v1/add \
|
|
209
|
+
-H "Content-Type: application/json" \
|
|
210
|
+
-d '{"text": "Patient record", "namespace": "default"}'
|
|
211
|
+
|
|
212
|
+
# Search
|
|
213
|
+
curl -X POST http://localhost:8080/api/v1/search \
|
|
214
|
+
-H "Content-Type: application/json" \
|
|
215
|
+
-d '{"query": "diabetes", "top_k": 5}'
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
> 📖 See the [REST API Reference](docs/rest_api.md) for all endpoints with request/response schemas.
|
|
219
|
+
|
|
220
|
+
---
|
|
221
|
+
|
|
222
|
+
## 🧪 Examples
|
|
223
|
+
|
|
224
|
+
Ready-to-run example scripts demonstrating real-world use cases:
|
|
225
|
+
|
|
226
|
+
| Example | Description |
|
|
227
|
+
|---|---|
|
|
228
|
+
| [🏥 Hospital Search](examples/hospital_search.py) | Medical record search with namespace isolation per ward |
|
|
229
|
+
| [⚖️ Legal Documents](examples/legal_document_search.py) | NDA and contract search with type/year filtering |
|
|
230
|
+
| [🌍 GIS Data Search](examples/gis_data_search.py) | Geospatial dataset discovery with USGS, Sentinel, OSM |
|
|
231
|
+
| [🤖 RAG Pipeline](examples/rag_pipeline.py) | Retrieval-Augmented Generation with VecForge as backend |
|
|
232
|
+
| [🏢 Multi-Tenant SaaS](examples/multi_tenant_saas.py) | Namespace isolation, RBAC, and audit logging demo |
|
|
233
|
+
| [💻 Codebase Assistant](examples/codebase_assistant.py) | Code documentation semantic search |
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
# Run any example
|
|
237
|
+
python examples/hospital_search.py
|
|
238
|
+
python examples/gis_data_search.py
|
|
239
|
+
python examples/rag_pipeline.py
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
## 📚 Documentation
|
|
245
|
+
|
|
246
|
+
### Getting Started
|
|
247
|
+
- [⚡ Quickstart](docs/quickstart.md) — Get running in 5 minutes
|
|
248
|
+
- [📦 Installation](docs/installation.md) — All install options & system requirements
|
|
249
|
+
|
|
250
|
+
### User Guides
|
|
251
|
+
- [🧠 Core Concepts](docs/core_concepts.md) — Vaults, namespaces, hybrid search explained
|
|
252
|
+
- [🔍 Search Guide](docs/search.md) — Alpha tuning, filters, reranking
|
|
253
|
+
- [🔐 Security Guide](docs/security.md) — Encryption, RBAC, audit logging
|
|
254
|
+
- [📄 Ingestion Guide](docs/ingestion.md) — PDF, DOCX, HTML, TXT ingestion & chunking
|
|
255
|
+
|
|
256
|
+
### Reference
|
|
257
|
+
- [📖 API Reference](docs/api_reference.md) — Full Python API documentation
|
|
258
|
+
- [🖥️ CLI Reference](docs/cli_reference.md) — All CLI commands & options
|
|
259
|
+
- [🌐 REST API](docs/rest_api.md) — FastAPI server endpoints
|
|
260
|
+
- [⚙️ Configuration](docs/configuration.md) — All config options in one place
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
## 📊 Benchmarks
|
|
265
|
+
|
|
266
|
+
> Verified on Phase 2 benchmark suite (`benchmarks/bench_search.py`)
|
|
267
|
+
|
|
268
|
+
| Operation | VecForge (Actual) | North Star Target | Pinecone | ChromaDB |
|
|
269
|
+
|---|---|---|---|---|
|
|
270
|
+
| Search 1k docs | **0.04ms** p50 | — | ~80ms | ~200ms |
|
|
271
|
+
| Search 10k docs | **1.63ms** p50 | — | ~80ms | ~200ms |
|
|
272
|
+
| **Search 100k docs** | **11.31ms** p50 ✅ | <15ms | ~80ms | ~200ms |
|
|
273
|
+
| Ingest 100k docs | **2.9M docs/sec** | — | Manual | Manual |
|
|
274
|
+
| BM25 Search 10k | **9.40ms** p50 | — | N/A | N/A |
|
|
275
|
+
| Encrypted search | **<20ms overhead** | <20ms | N/A | N/A |
|
|
276
|
+
|
|
277
|
+
### Quality Gates
|
|
278
|
+
|
|
279
|
+
| Check | Result |
|
|
280
|
+
|---|---|
|
|
281
|
+
| Ruff lint | ✅ All checks passed |
|
|
282
|
+
| Mypy type check | ✅ 0 errors (27 files) |
|
|
283
|
+
| Pytest | ✅ 128/128 tests pass |
|
|
284
|
+
| Coverage | 89% (core modules 85-100%) |
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
## ⚖️ License
|
|
289
|
+
|
|
290
|
+
**Business Source License 1.1 (BSL)**
|
|
291
|
+
|
|
292
|
+
- ✅ Free for personal, research, open-source, and non-commercial use
|
|
293
|
+
- ✅ Read, modify, and share freely
|
|
294
|
+
- 📋 Commercial use requires a license from ArcGX TechLabs
|
|
295
|
+
|
|
296
|
+
Contact: [suneelbose@arcgx.in](mailto:suneelbose@arcgx.in)
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
<p align="center">
|
|
301
|
+
Built with ❤️ by <strong>Suneel Bose K</strong> · <strong>ArcGX TechLabs Private Limited</strong>
|
|
302
|
+
</p>
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
vecforge/__init__.py,sha256=ef-MMBHFZBH-gie2ENs5H23XrowB9k7_uOCEeG_6yN0,1704
|
|
2
|
+
vecforge/exceptions.py,sha256=UWmoixYPk6KNYMXkg1X4PNEpFvSRBMM16DwXuI-e5nA,6106
|
|
3
|
+
vecforge/cli/__init__.py,sha256=Vrlfj54iZgrgIGgaEo9Bu34AwbCT2y5aOoQGXZcoXf4,176
|
|
4
|
+
vecforge/cli/main.py,sha256=vebEX52N0o4cO2LaGgMmv1nMEnVnyIH_TusMxSo-Nso,6092
|
|
5
|
+
vecforge/core/__init__.py,sha256=Vrlfj54iZgrgIGgaEo9Bu34AwbCT2y5aOoQGXZcoXf4,176
|
|
6
|
+
vecforge/core/bm25.py,sha256=2qiulX40mPEMP2DHfSWvaQ9xz7b1P3c_OOg5Ynm4oJQ,5604
|
|
7
|
+
vecforge/core/embedder.py,sha256=z2lzbAgNl8DdPc4NBveOfqwgsl3H_CfpNDFYRhWLVQc,4909
|
|
8
|
+
vecforge/core/indexer.py,sha256=YtWPwOdLLo3QsuWbkGIf4ZC4BzZys67_56draXPcLd8,6135
|
|
9
|
+
vecforge/core/reranker.py,sha256=vrfMBYhHr7dsmhZOjSWfJhafRdQ4SFz-cw7-dOL5Huk,4010
|
|
10
|
+
vecforge/core/storage.py,sha256=W8RM0Jkt-rKI1azZUmJhYPmuVk7pc_DwZgkDPOt9klI,14772
|
|
11
|
+
vecforge/core/vault.py,sha256=ZltJmfb2QFTJnmyvrUUfBQkb2bL6uRdf5xMctZNw20E,24278
|
|
12
|
+
vecforge/ingest/__init__.py,sha256=Vrlfj54iZgrgIGgaEo9Bu34AwbCT2y5aOoQGXZcoXf4,176
|
|
13
|
+
vecforge/ingest/dispatcher.py,sha256=fyIwYeUzAsS6PHh1kpGFHH0t5Eu1B02EzznMN2VWgbQ,5366
|
|
14
|
+
vecforge/ingest/document.py,sha256=0pNWkLsqjwd6nL_4uwejQoH1JhRRGz1WHJZu_6nBI-Y,7527
|
|
15
|
+
vecforge/search/__init__.py,sha256=Vrlfj54iZgrgIGgaEo9Bu34AwbCT2y5aOoQGXZcoXf4,176
|
|
16
|
+
vecforge/search/cascade.py,sha256=gc0VNZ8y-fxqBvhv_oZa03rzE0PTK8OVYzf67Ak3xDc,6400
|
|
17
|
+
vecforge/search/filters.py,sha256=XdwA_OLZT_MnNQtW8xyxVd5s3W62W61YK1QaFWUz-Ic,4601
|
|
18
|
+
vecforge/search/hybrid.py,sha256=wD6fFBuSi96WxE4hH_EmKdlO54szXtk8Gshc9YYx58g,5246
|
|
19
|
+
vecforge/security/__init__.py,sha256=Vrlfj54iZgrgIGgaEo9Bu34AwbCT2y5aOoQGXZcoXf4,176
|
|
20
|
+
vecforge/security/audit.py,sha256=ZGERKS_a0fuRjq9Pn2LVu7N_7jdVHUCvPdcrMTXLmws,5374
|
|
21
|
+
vecforge/security/encryption.py,sha256=hycNQbEdDRnKLBqpKtiLcOgRYHZff7hwScHY6d98dv0,2364
|
|
22
|
+
vecforge/security/namespaces.py,sha256=T5IpZVMiH7HHFbiUAPwldvCQF-FAlDGIabSLd77tmq0,3625
|
|
23
|
+
vecforge/security/rbac.py,sha256=NpRblbH9i1v-UlCeBMk6_rYqspleCXHIzjtp44-ejf8,5291
|
|
24
|
+
vecforge/security/snapshots.py,sha256=Qv9ztwY5gdczHxeLn8iiG9DUOfXuAovqALhpwW3yS5U,4120
|
|
25
|
+
vecforge/server/__init__.py,sha256=Vrlfj54iZgrgIGgaEo9Bu34AwbCT2y5aOoQGXZcoXf4,176
|
|
26
|
+
vecforge/server/app.py,sha256=JQBcBdVhkecqAzjiSCjKqcHJ5AkeOQWe3sHWtiqwqXQ,1551
|
|
27
|
+
vecforge/server/routes.py,sha256=jHoImsqLWC1fdqoy9ZV1XaA22SaJynREKUghEVSJNgQ,6871
|
|
28
|
+
vecforge-0.2.0.dist-info/licenses/LICENSE,sha256=M-l_pL24v0E5ffiR1JzojnC9fgVTQEB6qfu_RQDqAyw,1752
|
|
29
|
+
vecforge-0.2.0.dist-info/licenses/NOTICE,sha256=K3LuruUTdiIjYKysz5DGCMcH9iG4eM3a_zsrKuU0Nt4,540
|
|
30
|
+
vecforge-0.2.0.dist-info/METADATA,sha256=ldXj8zdSEmgsJHXqa081oygzzFpXpHI3vvLpaqnqvOI,9649
|
|
31
|
+
vecforge-0.2.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
32
|
+
vecforge-0.2.0.dist-info/entry_points.txt,sha256=mkwbxBlMQQycQxhVVUPGeLfRm0UjJ6x9-wXXyaFGvOI,51
|
|
33
|
+
vecforge-0.2.0.dist-info/top_level.txt,sha256=Ym7EFFK6U7IAMrsugKVAnmxRrLbpEfcMz0xHjQRao5E,9
|
|
34
|
+
vecforge-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
Business Source License 1.1
|
|
2
|
+
|
|
3
|
+
Licensor: ArcGX TechLabs Private Limited
|
|
4
|
+
Founded by Suneel Bose K
|
|
5
|
+
|
|
6
|
+
Licensed Work: VecForge
|
|
7
|
+
Copyright (c) 2026 Suneel Bose K · ArcGX TechLabs Private Limited
|
|
8
|
+
|
|
9
|
+
Change Date: Four years from the date the Licensed Work is published.
|
|
10
|
+
|
|
11
|
+
Change License: Apache License, Version 2.0
|
|
12
|
+
|
|
13
|
+
Terms:
|
|
14
|
+
|
|
15
|
+
The Licensor hereby grants you the right to copy, modify, create derivative
|
|
16
|
+
works, redistribute, and make non-production use of the Licensed Work.
|
|
17
|
+
|
|
18
|
+
The Licensor may make an Additional Use Grant, permitting limited production
|
|
19
|
+
use. You may use the Licensed Work for personal, research, open-source, and
|
|
20
|
+
non-commercial purposes without restriction.
|
|
21
|
+
|
|
22
|
+
For commercial production use, you must obtain a separate commercial license
|
|
23
|
+
from ArcGX TechLabs Private Limited.
|
|
24
|
+
|
|
25
|
+
If your use of the Licensed Work does not comply with the requirements
|
|
26
|
+
currently in effect as described in this License, you must purchase a
|
|
27
|
+
commercial license from the Licensor, its affiliated entities, or authorized
|
|
28
|
+
resellers, or you must refrain from using the Licensed Work.
|
|
29
|
+
|
|
30
|
+
All copies of the original and modified Licensed Work, and derivative works of
|
|
31
|
+
the Licensed Work, are subject to this License.
|
|
32
|
+
|
|
33
|
+
This License does not grant you any right in any trademark or logo of Licensor
|
|
34
|
+
or its affiliates.
|
|
35
|
+
|
|
36
|
+
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON AN
|
|
37
|
+
"AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS
|
|
38
|
+
OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF MERCHANTABILITY,
|
|
39
|
+
FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND TITLE.
|
|
40
|
+
|
|
41
|
+
Contact for commercial licensing:
|
|
42
|
+
Email: suneelbose@arcgx.in
|
|
43
|
+
Web: www.arcgx.in
|
|
44
|
+
|
|
45
|
+
Built by Suneel Bose K · ArcGX TechLabs Private Limited
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
VecForge — Universal Local-First Vector Database
|
|
2
|
+
Copyright (c) 2026 Suneel Bose K · ArcGX TechLabs Private Limited
|
|
3
|
+
|
|
4
|
+
Built by Suneel Bose K
|
|
5
|
+
Founder & CEO, ArcGX TechLabs Private Limited
|
|
6
|
+
|
|
7
|
+
Licensed under the Business Source License 1.1 (BSL 1.1).
|
|
8
|
+
Free for personal, research, open-source, and non-commercial use.
|
|
9
|
+
Commercial use requires a separate license from ArcGX TechLabs.
|
|
10
|
+
|
|
11
|
+
Contact:
|
|
12
|
+
Commercial Licensing: suneelbose@arcgx.in
|
|
13
|
+
General: suneelbose@arcgx.in
|
|
14
|
+
Website: https://bosekarmegam.github.io/vecforge/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
vecforge
|