vecforge 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. vecforge-0.2.0/LICENSE +45 -0
  2. vecforge-0.2.0/NOTICE +14 -0
  3. vecforge-0.2.0/PKG-INFO +302 -0
  4. vecforge-0.2.0/README.md +256 -0
  5. vecforge-0.2.0/pyproject.toml +88 -0
  6. vecforge-0.2.0/setup.cfg +4 -0
  7. vecforge-0.2.0/vecforge/__init__.py +59 -0
  8. vecforge-0.2.0/vecforge/cli/__init__.py +3 -0
  9. vecforge-0.2.0/vecforge/cli/main.py +197 -0
  10. vecforge-0.2.0/vecforge/core/__init__.py +3 -0
  11. vecforge-0.2.0/vecforge/core/bm25.py +187 -0
  12. vecforge-0.2.0/vecforge/core/embedder.py +152 -0
  13. vecforge-0.2.0/vecforge/core/indexer.py +196 -0
  14. vecforge-0.2.0/vecforge/core/reranker.py +120 -0
  15. vecforge-0.2.0/vecforge/core/storage.py +493 -0
  16. vecforge-0.2.0/vecforge/core/vault.py +760 -0
  17. vecforge-0.2.0/vecforge/exceptions.py +164 -0
  18. vecforge-0.2.0/vecforge/ingest/__init__.py +3 -0
  19. vecforge-0.2.0/vecforge/ingest/dispatcher.py +181 -0
  20. vecforge-0.2.0/vecforge/ingest/document.py +237 -0
  21. vecforge-0.2.0/vecforge/search/__init__.py +3 -0
  22. vecforge-0.2.0/vecforge/search/cascade.py +186 -0
  23. vecforge-0.2.0/vecforge/search/filters.py +146 -0
  24. vecforge-0.2.0/vecforge/search/hybrid.py +146 -0
  25. vecforge-0.2.0/vecforge/security/__init__.py +3 -0
  26. vecforge-0.2.0/vecforge/security/audit.py +169 -0
  27. vecforge-0.2.0/vecforge/security/encryption.py +84 -0
  28. vecforge-0.2.0/vecforge/security/namespaces.py +127 -0
  29. vecforge-0.2.0/vecforge/security/rbac.py +172 -0
  30. vecforge-0.2.0/vecforge/security/snapshots.py +135 -0
  31. vecforge-0.2.0/vecforge/server/__init__.py +3 -0
  32. vecforge-0.2.0/vecforge/server/app.py +54 -0
  33. vecforge-0.2.0/vecforge/server/routes.py +215 -0
  34. vecforge-0.2.0/vecforge.egg-info/PKG-INFO +302 -0
  35. vecforge-0.2.0/vecforge.egg-info/SOURCES.txt +37 -0
  36. vecforge-0.2.0/vecforge.egg-info/dependency_links.txt +1 -0
  37. vecforge-0.2.0/vecforge.egg-info/entry_points.txt +2 -0
  38. vecforge-0.2.0/vecforge.egg-info/requires.txt +26 -0
  39. vecforge-0.2.0/vecforge.egg-info/top_level.txt +1 -0
vecforge-0.2.0/LICENSE ADDED
@@ -0,0 +1,45 @@
1
+ Business Source License 1.1
2
+
3
+ Licensor: ArcGX TechLabs Private Limited
4
+ Founded by Suneel Bose K
5
+
6
+ Licensed Work: VecForge
7
+ Copyright (c) 2026 Suneel Bose K ยท ArcGX TechLabs Private Limited
8
+
9
+ Change Date: Four years from the date the Licensed Work is published.
10
+
11
+ Change License: Apache License, Version 2.0
12
+
13
+ Terms:
14
+
15
+ The Licensor hereby grants you the right to copy, modify, create derivative
16
+ works, redistribute, and make non-production use of the Licensed Work.
17
+
18
+ The Licensor may make an Additional Use Grant, permitting limited production
19
+ use. You may use the Licensed Work for personal, research, open-source, and
20
+ non-commercial purposes without restriction.
21
+
22
+ For commercial production use, you must obtain a separate commercial license
23
+ from ArcGX TechLabs Private Limited.
24
+
25
+ If your use of the Licensed Work does not comply with the requirements
26
+ currently in effect as described in this License, you must purchase a
27
+ commercial license from the Licensor, its affiliated entities, or authorized
28
+ resellers, or you must refrain from using the Licensed Work.
29
+
30
+ All copies of the original and modified Licensed Work, and derivative works of
31
+ the Licensed Work, are subject to this License.
32
+
33
+ This License does not grant you any right in any trademark or logo of Licensor
34
+ or its affiliates.
35
+
36
+ TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON AN
37
+ "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS
38
+ OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF MERCHANTABILITY,
39
+ FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND TITLE.
40
+
41
+ Contact for commercial licensing:
42
+ Email: suneelbose@arcgx.in
43
+ Web: www.arcgx.in
44
+
45
+ Built by Suneel Bose K ยท ArcGX TechLabs Private Limited
vecforge-0.2.0/NOTICE ADDED
@@ -0,0 +1,14 @@
1
+ VecForge โ€” Universal Local-First Vector Database
2
+ Copyright (c) 2026 Suneel Bose K ยท ArcGX TechLabs Private Limited
3
+
4
+ Built by Suneel Bose K
5
+ Founder & CEO, ArcGX TechLabs Private Limited
6
+
7
+ Licensed under the Business Source License 1.1 (BSL 1.1).
8
+ Free for personal, research, open-source, and non-commercial use.
9
+ Commercial use requires a separate license from ArcGX TechLabs.
10
+
11
+ Contact:
12
+ Commercial Licensing: suneelbose@arcgx.in
13
+ General: suneelbose@arcgx.in
14
+ Website: https://bosekarmegam.github.io/vecforge/
@@ -0,0 +1,302 @@
1
+ Metadata-Version: 2.4
2
+ Name: vecforge
3
+ Version: 0.2.0
4
+ Summary: Forge your vector database. Own it forever. Local-first, encrypted, quantum-inspired.
5
+ Author-email: Suneel Bose K <suneelbose@arcgx.in>
6
+ License: Business Source License 1.1
7
+ Project-URL: Homepage, https://vecforge.arcgx.in
8
+ Project-URL: Repository, https://github.com/bosekarmegam/vecforge
9
+ Project-URL: Issues, https://github.com/bosekarmegam/vecforge/issues
10
+ Keywords: vector-database,faiss,embeddings,search,local-first,encrypted
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Topic :: Database
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ License-File: NOTICE
22
+ Requires-Dist: faiss-cpu>=1.7.4
23
+ Requires-Dist: sentence-transformers>=2.2.0
24
+ Requires-Dist: numpy>=1.24.0
25
+ Requires-Dist: rank-bm25>=0.2.2
26
+ Requires-Dist: fastapi>=0.100.0
27
+ Requires-Dist: uvicorn[standard]>=0.23.0
28
+ Requires-Dist: pymupdf>=1.23.0
29
+ Requires-Dist: numba>=0.58.0
30
+ Requires-Dist: joblib>=1.3.0
31
+ Requires-Dist: click>=8.1.0
32
+ Requires-Dist: python-docx>=1.0.0
33
+ Requires-Dist: beautifulsoup4>=4.12.0
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
36
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
37
+ Requires-Dist: mypy>=1.5.0; extra == "dev"
38
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
39
+ Requires-Dist: black>=23.7.0; extra == "dev"
40
+ Requires-Dist: types-beautifulsoup4; extra == "dev"
41
+ Provides-Extra: gpu
42
+ Requires-Dist: faiss-gpu>=1.7.4; extra == "gpu"
43
+ Requires-Dist: cupy>=12.0.0; extra == "gpu"
44
+ Provides-Extra: quantum
45
+ Dynamic: license-file
46
+
47
+ <p align="center">
48
+ <h1 align="center">โšก VecForge</h1>
49
+ <p align="center"><strong>Forge your vector database. Own it forever.</strong></p>
50
+ <p align="center">
51
+ Local-first ยท Encrypted ยท Hybrid Search ยท Zero Cloud Dependency
52
+ </p>
53
+ </p>
54
+
55
+ ---
56
+
57
+ **VecForge** is a universal, local-first Python vector database with enterprise security, multimodal ingestion, and optional quantum-inspired acceleration.
58
+
59
+ Built by **Suneel Bose K** โ€” Founder & CEO, [ArcGX TechLabs Private Limited](https://arcgx.in)
60
+
61
+ [![PyPI version](https://img.shields.io/pypi/v/vecforge.svg)](https://pypi.org/project/vecforge/)
62
+ [![PyPI downloads](https://img.shields.io/pypi/dm/vecforge.svg)](https://pypi.org/project/vecforge/)
63
+ [![License: BSL 1.1](https://img.shields.io/badge/License-BSL%201.1-blue.svg)](LICENSE)
64
+ [![Python 3.10+](https://img.shields.io/badge/Python-3.10+-green.svg)](https://python.org)
65
+ [![Tests](https://github.com/bosekarmegam/vecforge/actions/workflows/tests.yml/badge.svg)](https://github.com/bosekarmegam/vecforge/actions/workflows/tests.yml)
66
+ [![Coverage](https://img.shields.io/badge/Coverage-89%25-brightgreen.svg)](#-benchmarks)
67
+ [![Ruff](https://img.shields.io/badge/Linting-Ruff%20โœ…-brightgreen.svg)](https://github.com/astral-sh/ruff)
68
+ [![Mypy](https://img.shields.io/badge/Typing-Mypy%20โœ…-brightgreen.svg)](https://mypy-lang.org/)
69
+ [![Benchmark](https://img.shields.io/badge/100k%20Search-11.31ms%20โœ…-brightgreen.svg)](#-benchmarks)
70
+
71
+ ---
72
+
73
+ ## โšก 5-Line Quickstart
74
+
75
+ ```python
76
+ from vecforge import VecForge
77
+
78
+ db = VecForge("my_vault")
79
+ db.add("Patient admitted with type 2 diabetes", metadata={"ward": "7"})
80
+ results = db.search("diabetic patient")
81
+ print(results[0].text)
82
+ ```
83
+
84
+ That's it. No API keys. No cloud. No config files. **Your data stays on your machine.**
85
+
86
+ ---
87
+
88
+ ## ๐Ÿ”ฅ Why VecForge?
89
+
90
+ | Feature | Pinecone | ChromaDB | **VecForge** |
91
+ |---|---|---|---|
92
+ | Local-first | โŒ Cloud-only | โœ… | โœ… **Always** |
93
+ | Encryption at rest | โŒ | โŒ | โœ… **AES-256** |
94
+ | Hybrid search | โœ… | โŒ | โœ… **Dense + BM25** |
95
+ | Namespace isolation | โœ… Cloud | โŒ | โœ… **Local** |
96
+ | RBAC | โœ… Cloud | โŒ | โœ… **Built-in** |
97
+ | Audit logging | โŒ | โŒ | โœ… **JSONL** |
98
+ | Price | $$$$ | Free | โœ… **Free** |
99
+
100
+ ---
101
+
102
+ ## ๐Ÿ“ฆ Install
103
+
104
+ ```bash
105
+ pip install vecforge
106
+ ```
107
+
108
+ ### From source (development)
109
+
110
+ ```bash
111
+ git clone https://github.com/bosekarmegam/vecforge.git
112
+ cd vecforge
113
+ pip install -e ".[dev]"
114
+ ```
115
+
116
+ ### System Requirements
117
+
118
+ > **Windows users:** VecForge uses PyTorch under the hood, which requires the
119
+ > [Microsoft Visual C++ Redistributable](https://aka.ms/vs/17/release/vc_redist.x64.exe).
120
+ > Install it before running VecForge.
121
+
122
+ > ๐Ÿ“– See the full [Installation Guide](docs/installation.md) for GPU, encryption, and platform-specific options.
123
+
124
+ ---
125
+
126
+ ## ๐Ÿ” Encrypted Vault
127
+
128
+ ```python
129
+ import os
130
+ from vecforge import VecForge
131
+
132
+ db = VecForge(
133
+ "secure_vault",
134
+ encryption_key=os.environ["VECFORGE_KEY"],
135
+ audit_log="audit.jsonl",
136
+ deletion_protection=True,
137
+ )
138
+ db.add("Top secret patient data", namespace="ward_7")
139
+ ```
140
+
141
+ ---
142
+
143
+ ## ๐Ÿ” Hybrid Search
144
+
145
+ ```python
146
+ results = db.search(
147
+ "elderly diabetic hip fracture",
148
+ top_k=5,
149
+ alpha=0.7, # 70% semantic, 30% keyword
150
+ rerank=True, # cross-encoder precision boost
151
+ namespace="ward_7",
152
+ filters={"year": {"gte": 2023}},
153
+ )
154
+ ```
155
+
156
+ > ๐Ÿ“– See the [Search Guide](docs/search.md) for alpha tuning, metadata operators, and reranking strategies.
157
+
158
+ ---
159
+
160
+ ## ๐Ÿ“„ Auto-Ingest Documents
161
+
162
+ ```python
163
+ # Ingest entire directories โ€” auto-detects format
164
+ db.ingest("medical_records/") # PDF, DOCX, TXT, MD, HTML
165
+ ```
166
+
167
+ > ๐Ÿ“– See the [Ingestion Guide](docs/ingestion.md) for chunking configuration and supported formats.
168
+
169
+ ---
170
+
171
+ ## ๐Ÿ›ก๏ธ Multi-Tenant Namespaces
172
+
173
+ ```python
174
+ db.create_namespace("hospital_a")
175
+ db.create_namespace("hospital_b")
176
+
177
+ db.add("Patient data A", namespace="hospital_a")
178
+ db.add("Patient data B", namespace="hospital_b")
179
+
180
+ # Tenant isolation โ€” hospital_a never sees hospital_b's data
181
+ results = db.search("patient", namespace="hospital_a")
182
+ ```
183
+
184
+ ---
185
+
186
+ ## ๐Ÿ–ฅ๏ธ CLI
187
+
188
+ ```bash
189
+ vecforge ingest my_docs/ --vault my.db
190
+ vecforge search "diabetes" --vault my.db --top-k 5
191
+ vecforge stats my.db
192
+ vecforge export my.db -o data.json
193
+ vecforge serve --vault my.db --port 8080
194
+ ```
195
+
196
+ > ๐Ÿ“– See the [CLI Reference](docs/cli_reference.md) for all commands and options.
197
+
198
+ ---
199
+
200
+ ## ๐ŸŒ REST API
201
+
202
+ ```bash
203
+ vecforge serve --vault my.db --port 8080
204
+ ```
205
+
206
+ ```bash
207
+ # Add document
208
+ curl -X POST http://localhost:8080/api/v1/add \
209
+ -H "Content-Type: application/json" \
210
+ -d '{"text": "Patient record", "namespace": "default"}'
211
+
212
+ # Search
213
+ curl -X POST http://localhost:8080/api/v1/search \
214
+ -H "Content-Type: application/json" \
215
+ -d '{"query": "diabetes", "top_k": 5}'
216
+ ```
217
+
218
+ > ๐Ÿ“– See the [REST API Reference](docs/rest_api.md) for all endpoints with request/response schemas.
219
+
220
+ ---
221
+
222
+ ## ๐Ÿงช Examples
223
+
224
+ Ready-to-run example scripts demonstrating real-world use cases:
225
+
226
+ | Example | Description |
227
+ |---|---|
228
+ | [๐Ÿฅ Hospital Search](examples/hospital_search.py) | Medical record search with namespace isolation per ward |
229
+ | [โš–๏ธ Legal Documents](examples/legal_document_search.py) | NDA and contract search with type/year filtering |
230
+ | [๐ŸŒ GIS Data Search](examples/gis_data_search.py) | Geospatial dataset discovery with USGS, Sentinel, OSM |
231
+ | [๐Ÿค– RAG Pipeline](examples/rag_pipeline.py) | Retrieval-Augmented Generation with VecForge as backend |
232
+ | [๐Ÿข Multi-Tenant SaaS](examples/multi_tenant_saas.py) | Namespace isolation, RBAC, and audit logging demo |
233
+ | [๐Ÿ’ป Codebase Assistant](examples/codebase_assistant.py) | Code documentation semantic search |
234
+
235
+ ```bash
236
+ # Run any example
237
+ python examples/hospital_search.py
238
+ python examples/gis_data_search.py
239
+ python examples/rag_pipeline.py
240
+ ```
241
+
242
+ ---
243
+
244
+ ## ๐Ÿ“š Documentation
245
+
246
+ ### Getting Started
247
+ - [โšก Quickstart](docs/quickstart.md) โ€” Get running in 5 minutes
248
+ - [๐Ÿ“ฆ Installation](docs/installation.md) โ€” All install options & system requirements
249
+
250
+ ### User Guides
251
+ - [๐Ÿง  Core Concepts](docs/core_concepts.md) โ€” Vaults, namespaces, hybrid search explained
252
+ - [๐Ÿ” Search Guide](docs/search.md) โ€” Alpha tuning, filters, reranking
253
+ - [๐Ÿ” Security Guide](docs/security.md) โ€” Encryption, RBAC, audit logging
254
+ - [๐Ÿ“„ Ingestion Guide](docs/ingestion.md) โ€” PDF, DOCX, HTML, TXT ingestion & chunking
255
+
256
+ ### Reference
257
+ - [๐Ÿ“– API Reference](docs/api_reference.md) โ€” Full Python API documentation
258
+ - [๐Ÿ–ฅ๏ธ CLI Reference](docs/cli_reference.md) โ€” All CLI commands & options
259
+ - [๐ŸŒ REST API](docs/rest_api.md) โ€” FastAPI server endpoints
260
+ - [โš™๏ธ Configuration](docs/configuration.md) โ€” All config options in one place
261
+
262
+ ---
263
+
264
+ ## ๐Ÿ“Š Benchmarks
265
+
266
+ > Verified on Phase 2 benchmark suite (`benchmarks/bench_search.py`)
267
+
268
+ | Operation | VecForge (Actual) | North Star Target | Pinecone | ChromaDB |
269
+ |---|---|---|---|---|
270
+ | Search 1k docs | **0.04ms** p50 | โ€” | ~80ms | ~200ms |
271
+ | Search 10k docs | **1.63ms** p50 | โ€” | ~80ms | ~200ms |
272
+ | **Search 100k docs** | **11.31ms** p50 โœ… | <15ms | ~80ms | ~200ms |
273
+ | Ingest 100k docs | **2.9M docs/sec** | โ€” | Manual | Manual |
274
+ | BM25 Search 10k | **9.40ms** p50 | โ€” | N/A | N/A |
275
+ | Encrypted search | **<20ms overhead** | <20ms | N/A | N/A |
276
+
277
+ ### Quality Gates
278
+
279
+ | Check | Result |
280
+ |---|---|
281
+ | Ruff lint | โœ… All checks passed |
282
+ | Mypy type check | โœ… 0 errors (27 files) |
283
+ | Pytest | โœ… 128/128 tests pass |
284
+ | Coverage | 89% (core modules 85-100%) |
285
+
286
+ ---
287
+
288
+ ## โš–๏ธ License
289
+
290
+ **Business Source License 1.1 (BSL)**
291
+
292
+ - โœ… Free for personal, research, open-source, and non-commercial use
293
+ - โœ… Read, modify, and share freely
294
+ - ๐Ÿ“‹ Commercial use requires a license from ArcGX TechLabs
295
+
296
+ Contact: [suneelbose@arcgx.in](mailto:suneelbose@arcgx.in)
297
+
298
+ ---
299
+
300
+ <p align="center">
301
+ Built with โค๏ธ by <strong>Suneel Bose K</strong> ยท <strong>ArcGX TechLabs Private Limited</strong>
302
+ </p>
@@ -0,0 +1,256 @@
1
+ <p align="center">
2
+ <h1 align="center">โšก VecForge</h1>
3
+ <p align="center"><strong>Forge your vector database. Own it forever.</strong></p>
4
+ <p align="center">
5
+ Local-first ยท Encrypted ยท Hybrid Search ยท Zero Cloud Dependency
6
+ </p>
7
+ </p>
8
+
9
+ ---
10
+
11
+ **VecForge** is a universal, local-first Python vector database with enterprise security, multimodal ingestion, and optional quantum-inspired acceleration.
12
+
13
+ Built by **Suneel Bose K** โ€” Founder & CEO, [ArcGX TechLabs Private Limited](https://arcgx.in)
14
+
15
+ [![PyPI version](https://img.shields.io/pypi/v/vecforge.svg)](https://pypi.org/project/vecforge/)
16
+ [![PyPI downloads](https://img.shields.io/pypi/dm/vecforge.svg)](https://pypi.org/project/vecforge/)
17
+ [![License: BSL 1.1](https://img.shields.io/badge/License-BSL%201.1-blue.svg)](LICENSE)
18
+ [![Python 3.10+](https://img.shields.io/badge/Python-3.10+-green.svg)](https://python.org)
19
+ [![Tests](https://github.com/bosekarmegam/vecforge/actions/workflows/tests.yml/badge.svg)](https://github.com/bosekarmegam/vecforge/actions/workflows/tests.yml)
20
+ [![Coverage](https://img.shields.io/badge/Coverage-89%25-brightgreen.svg)](#-benchmarks)
21
+ [![Ruff](https://img.shields.io/badge/Linting-Ruff%20โœ…-brightgreen.svg)](https://github.com/astral-sh/ruff)
22
+ [![Mypy](https://img.shields.io/badge/Typing-Mypy%20โœ…-brightgreen.svg)](https://mypy-lang.org/)
23
+ [![Benchmark](https://img.shields.io/badge/100k%20Search-11.31ms%20โœ…-brightgreen.svg)](#-benchmarks)
24
+
25
+ ---
26
+
27
+ ## โšก 5-Line Quickstart
28
+
29
+ ```python
30
+ from vecforge import VecForge
31
+
32
+ db = VecForge("my_vault")
33
+ db.add("Patient admitted with type 2 diabetes", metadata={"ward": "7"})
34
+ results = db.search("diabetic patient")
35
+ print(results[0].text)
36
+ ```
37
+
38
+ That's it. No API keys. No cloud. No config files. **Your data stays on your machine.**
39
+
40
+ ---
41
+
42
+ ## ๐Ÿ”ฅ Why VecForge?
43
+
44
+ | Feature | Pinecone | ChromaDB | **VecForge** |
45
+ |---|---|---|---|
46
+ | Local-first | โŒ Cloud-only | โœ… | โœ… **Always** |
47
+ | Encryption at rest | โŒ | โŒ | โœ… **AES-256** |
48
+ | Hybrid search | โœ… | โŒ | โœ… **Dense + BM25** |
49
+ | Namespace isolation | โœ… Cloud | โŒ | โœ… **Local** |
50
+ | RBAC | โœ… Cloud | โŒ | โœ… **Built-in** |
51
+ | Audit logging | โŒ | โŒ | โœ… **JSONL** |
52
+ | Price | $$$$ | Free | โœ… **Free** |
53
+
54
+ ---
55
+
56
+ ## ๐Ÿ“ฆ Install
57
+
58
+ ```bash
59
+ pip install vecforge
60
+ ```
61
+
62
+ ### From source (development)
63
+
64
+ ```bash
65
+ git clone https://github.com/bosekarmegam/vecforge.git
66
+ cd vecforge
67
+ pip install -e ".[dev]"
68
+ ```
69
+
70
+ ### System Requirements
71
+
72
+ > **Windows users:** VecForge uses PyTorch under the hood, which requires the
73
+ > [Microsoft Visual C++ Redistributable](https://aka.ms/vs/17/release/vc_redist.x64.exe).
74
+ > Install it before running VecForge.
75
+
76
+ > ๐Ÿ“– See the full [Installation Guide](docs/installation.md) for GPU, encryption, and platform-specific options.
77
+
78
+ ---
79
+
80
+ ## ๐Ÿ” Encrypted Vault
81
+
82
+ ```python
83
+ import os
84
+ from vecforge import VecForge
85
+
86
+ db = VecForge(
87
+ "secure_vault",
88
+ encryption_key=os.environ["VECFORGE_KEY"],
89
+ audit_log="audit.jsonl",
90
+ deletion_protection=True,
91
+ )
92
+ db.add("Top secret patient data", namespace="ward_7")
93
+ ```
94
+
95
+ ---
96
+
97
+ ## ๐Ÿ” Hybrid Search
98
+
99
+ ```python
100
+ results = db.search(
101
+ "elderly diabetic hip fracture",
102
+ top_k=5,
103
+ alpha=0.7, # 70% semantic, 30% keyword
104
+ rerank=True, # cross-encoder precision boost
105
+ namespace="ward_7",
106
+ filters={"year": {"gte": 2023}},
107
+ )
108
+ ```
109
+
110
+ > ๐Ÿ“– See the [Search Guide](docs/search.md) for alpha tuning, metadata operators, and reranking strategies.
111
+
112
+ ---
113
+
114
+ ## ๐Ÿ“„ Auto-Ingest Documents
115
+
116
+ ```python
117
+ # Ingest entire directories โ€” auto-detects format
118
+ db.ingest("medical_records/") # PDF, DOCX, TXT, MD, HTML
119
+ ```
120
+
121
+ > ๐Ÿ“– See the [Ingestion Guide](docs/ingestion.md) for chunking configuration and supported formats.
122
+
123
+ ---
124
+
125
+ ## ๐Ÿ›ก๏ธ Multi-Tenant Namespaces
126
+
127
+ ```python
128
+ db.create_namespace("hospital_a")
129
+ db.create_namespace("hospital_b")
130
+
131
+ db.add("Patient data A", namespace="hospital_a")
132
+ db.add("Patient data B", namespace="hospital_b")
133
+
134
+ # Tenant isolation โ€” hospital_a never sees hospital_b's data
135
+ results = db.search("patient", namespace="hospital_a")
136
+ ```
137
+
138
+ ---
139
+
140
+ ## ๐Ÿ–ฅ๏ธ CLI
141
+
142
+ ```bash
143
+ vecforge ingest my_docs/ --vault my.db
144
+ vecforge search "diabetes" --vault my.db --top-k 5
145
+ vecforge stats my.db
146
+ vecforge export my.db -o data.json
147
+ vecforge serve --vault my.db --port 8080
148
+ ```
149
+
150
+ > ๐Ÿ“– See the [CLI Reference](docs/cli_reference.md) for all commands and options.
151
+
152
+ ---
153
+
154
+ ## ๐ŸŒ REST API
155
+
156
+ ```bash
157
+ vecforge serve --vault my.db --port 8080
158
+ ```
159
+
160
+ ```bash
161
+ # Add document
162
+ curl -X POST http://localhost:8080/api/v1/add \
163
+ -H "Content-Type: application/json" \
164
+ -d '{"text": "Patient record", "namespace": "default"}'
165
+
166
+ # Search
167
+ curl -X POST http://localhost:8080/api/v1/search \
168
+ -H "Content-Type: application/json" \
169
+ -d '{"query": "diabetes", "top_k": 5}'
170
+ ```
171
+
172
+ > ๐Ÿ“– See the [REST API Reference](docs/rest_api.md) for all endpoints with request/response schemas.
173
+
174
+ ---
175
+
176
+ ## ๐Ÿงช Examples
177
+
178
+ Ready-to-run example scripts demonstrating real-world use cases:
179
+
180
+ | Example | Description |
181
+ |---|---|
182
+ | [๐Ÿฅ Hospital Search](examples/hospital_search.py) | Medical record search with namespace isolation per ward |
183
+ | [โš–๏ธ Legal Documents](examples/legal_document_search.py) | NDA and contract search with type/year filtering |
184
+ | [๐ŸŒ GIS Data Search](examples/gis_data_search.py) | Geospatial dataset discovery with USGS, Sentinel, OSM |
185
+ | [๐Ÿค– RAG Pipeline](examples/rag_pipeline.py) | Retrieval-Augmented Generation with VecForge as backend |
186
+ | [๐Ÿข Multi-Tenant SaaS](examples/multi_tenant_saas.py) | Namespace isolation, RBAC, and audit logging demo |
187
+ | [๐Ÿ’ป Codebase Assistant](examples/codebase_assistant.py) | Code documentation semantic search |
188
+
189
+ ```bash
190
+ # Run any example
191
+ python examples/hospital_search.py
192
+ python examples/gis_data_search.py
193
+ python examples/rag_pipeline.py
194
+ ```
195
+
196
+ ---
197
+
198
+ ## ๐Ÿ“š Documentation
199
+
200
+ ### Getting Started
201
+ - [โšก Quickstart](docs/quickstart.md) โ€” Get running in 5 minutes
202
+ - [๐Ÿ“ฆ Installation](docs/installation.md) โ€” All install options & system requirements
203
+
204
+ ### User Guides
205
+ - [๐Ÿง  Core Concepts](docs/core_concepts.md) โ€” Vaults, namespaces, hybrid search explained
206
+ - [๐Ÿ” Search Guide](docs/search.md) โ€” Alpha tuning, filters, reranking
207
+ - [๐Ÿ” Security Guide](docs/security.md) โ€” Encryption, RBAC, audit logging
208
+ - [๐Ÿ“„ Ingestion Guide](docs/ingestion.md) โ€” PDF, DOCX, HTML, TXT ingestion & chunking
209
+
210
+ ### Reference
211
+ - [๐Ÿ“– API Reference](docs/api_reference.md) โ€” Full Python API documentation
212
+ - [๐Ÿ–ฅ๏ธ CLI Reference](docs/cli_reference.md) โ€” All CLI commands & options
213
+ - [๐ŸŒ REST API](docs/rest_api.md) โ€” FastAPI server endpoints
214
+ - [โš™๏ธ Configuration](docs/configuration.md) โ€” All config options in one place
215
+
216
+ ---
217
+
218
+ ## ๐Ÿ“Š Benchmarks
219
+
220
+ > Verified on Phase 2 benchmark suite (`benchmarks/bench_search.py`)
221
+
222
+ | Operation | VecForge (Actual) | North Star Target | Pinecone | ChromaDB |
223
+ |---|---|---|---|---|
224
+ | Search 1k docs | **0.04ms** p50 | โ€” | ~80ms | ~200ms |
225
+ | Search 10k docs | **1.63ms** p50 | โ€” | ~80ms | ~200ms |
226
+ | **Search 100k docs** | **11.31ms** p50 โœ… | <15ms | ~80ms | ~200ms |
227
+ | Ingest 100k docs | **2.9M docs/sec** | โ€” | Manual | Manual |
228
+ | BM25 Search 10k | **9.40ms** p50 | โ€” | N/A | N/A |
229
+ | Encrypted search | **<20ms overhead** | <20ms | N/A | N/A |
230
+
231
+ ### Quality Gates
232
+
233
+ | Check | Result |
234
+ |---|---|
235
+ | Ruff lint | โœ… All checks passed |
236
+ | Mypy type check | โœ… 0 errors (27 files) |
237
+ | Pytest | โœ… 128/128 tests pass |
238
+ | Coverage | 89% (core modules 85-100%) |
239
+
240
+ ---
241
+
242
+ ## โš–๏ธ License
243
+
244
+ **Business Source License 1.1 (BSL)**
245
+
246
+ - โœ… Free for personal, research, open-source, and non-commercial use
247
+ - โœ… Read, modify, and share freely
248
+ - ๐Ÿ“‹ Commercial use requires a license from ArcGX TechLabs
249
+
250
+ Contact: [suneelbose@arcgx.in](mailto:suneelbose@arcgx.in)
251
+
252
+ ---
253
+
254
+ <p align="center">
255
+ Built with โค๏ธ by <strong>Suneel Bose K</strong> ยท <strong>ArcGX TechLabs Private Limited</strong>
256
+ </p>
@@ -0,0 +1,88 @@
1
+ # VecForge โ€” Universal Local-First Vector Database
2
+ # Copyright (c) 2026 Suneel Bose K ยท ArcGX TechLabs Private Limited
3
+ # Licensed under BSL 1.1
4
+
5
+ [build-system]
6
+ requires = ["setuptools>=68.0", "wheel"]
7
+ build-backend = "setuptools.build_meta"
8
+
9
+ [project]
10
+ name = "vecforge"
11
+ version = "0.2.0"
12
+ description = "Forge your vector database. Own it forever. Local-first, encrypted, quantum-inspired."
13
+ readme = "README.md"
14
+ license = {text = "Business Source License 1.1"}
15
+ requires-python = ">=3.10"
16
+ authors = [{name = "Suneel Bose K", email = "suneelbose@arcgx.in"}]
17
+ keywords = ["vector-database", "faiss", "embeddings", "search", "local-first", "encrypted"]
18
+ classifiers = [
19
+ "Development Status :: 3 - Alpha",
20
+ "Intended Audience :: Developers",
21
+ "Topic :: Database",
22
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ ]
27
+
28
+ dependencies = [
29
+ "faiss-cpu>=1.7.4",
30
+ "sentence-transformers>=2.2.0",
31
+ "numpy>=1.24.0",
32
+ "rank-bm25>=0.2.2",
33
+ "fastapi>=0.100.0",
34
+ "uvicorn[standard]>=0.23.0",
35
+ "pymupdf>=1.23.0",
36
+ "numba>=0.58.0",
37
+ "joblib>=1.3.0",
38
+ "click>=8.1.0",
39
+ "python-docx>=1.0.0",
40
+ "beautifulsoup4>=4.12.0",
41
+ ]
42
+
43
+ [project.optional-dependencies]
44
+ dev = [
45
+ "pytest>=7.4.0",
46
+ "pytest-cov>=4.1.0",
47
+ "mypy>=1.5.0",
48
+ "ruff>=0.1.0",
49
+ "black>=23.7.0",
50
+ "types-beautifulsoup4",
51
+ ]
52
+ gpu = [
53
+ "faiss-gpu>=1.7.4",
54
+ "cupy>=12.0.0",
55
+ ]
56
+ quantum = []
57
+
58
+ [project.scripts]
59
+ vecforge = "vecforge.cli.main:cli"
60
+
61
+ [project.urls]
62
+ Homepage = "https://vecforge.arcgx.in"
63
+ Repository = "https://github.com/bosekarmegam/vecforge"
64
+ Issues = "https://github.com/bosekarmegam/vecforge/issues"
65
+
66
+ [tool.setuptools.packages.find]
67
+ include = ["vecforge*"]
68
+
69
+ [tool.ruff]
70
+ line-length = 88
71
+ target-version = "py310"
72
+
73
+ [tool.ruff.lint]
74
+ select = ["E", "F", "I", "N", "W", "UP", "B", "SIM"]
75
+
76
+ [tool.black]
77
+ line-length = 88
78
+ target-version = ["py310"]
79
+
80
+ [tool.mypy]
81
+ python_version = "3.10"
82
+ strict = true
83
+ warn_return_any = true
84
+ warn_unused_configs = true
85
+
86
+ [tool.pytest.ini_options]
87
+ testpaths = ["tests"]
88
+ addopts = "-v --tb=short"