swiftrag 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swiftrag-0.1.0/.gitignore +25 -0
- swiftrag-0.1.0/LICENSE +21 -0
- swiftrag-0.1.0/PKG-INFO +229 -0
- swiftrag-0.1.0/README.md +180 -0
- swiftrag-0.1.0/pyproject.toml +88 -0
- swiftrag-0.1.0/src/swiftrag/__init__.py +58 -0
- swiftrag-0.1.0/src/swiftrag/chunking.py +213 -0
- swiftrag-0.1.0/src/swiftrag/core.py +508 -0
- swiftrag-0.1.0/src/swiftrag/embeddings.py +201 -0
- swiftrag-0.1.0/src/swiftrag/exceptions.py +27 -0
- swiftrag-0.1.0/src/swiftrag/llms.py +213 -0
- swiftrag-0.1.0/src/swiftrag/py.typed +0 -0
- swiftrag-0.1.0/src/swiftrag/store.py +189 -0
- swiftrag-0.1.0/src/swiftrag/types.py +69 -0
- swiftrag-0.1.0/tests/test_async_batch.py +49 -0
- swiftrag-0.1.0/tests/test_chunking.py +24 -0
- swiftrag-0.1.0/tests/test_context.py +45 -0
- swiftrag-0.1.0/tests/test_dx.py +57 -0
- swiftrag-0.1.0/tests/test_filtering.py +54 -0
- swiftrag-0.1.0/tests/test_rag.py +61 -0
- swiftrag-0.1.0/tests/test_store.py +35 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
*.pkl
|
|
9
|
+
|
|
10
|
+
# Envs
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
env/
|
|
14
|
+
|
|
15
|
+
# Tooling
|
|
16
|
+
.pytest_cache/
|
|
17
|
+
.ruff_cache/
|
|
18
|
+
.mypy_cache/
|
|
19
|
+
.coverage
|
|
20
|
+
htmlcov/
|
|
21
|
+
|
|
22
|
+
# OS / editor
|
|
23
|
+
.DS_Store
|
|
24
|
+
.idea/
|
|
25
|
+
.vscode/
|
swiftrag-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 swiftrag contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
swiftrag-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: swiftrag
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Instant, optimized Retrieval-Augmented Generation. Pass your text + a model, get a RAG-powered LLM in one line.
|
|
5
|
+
Project-URL: Homepage, https://github.com/behradmoeini/swiftrag
|
|
6
|
+
Project-URL: Repository, https://github.com/behradmoeini/swiftrag
|
|
7
|
+
Project-URL: Issues, https://github.com/behradmoeini/swiftrag/issues
|
|
8
|
+
Author: Behrad Moeini
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,embeddings,llm,nlp,openai,rag,retrieval-augmented-generation,vector-search
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.9
|
|
25
|
+
Requires-Dist: numpy>=1.21
|
|
26
|
+
Provides-Extra: all
|
|
27
|
+
Requires-Dist: anthropic>=0.25; extra == 'all'
|
|
28
|
+
Requires-Dist: faiss-cpu>=1.7; extra == 'all'
|
|
29
|
+
Requires-Dist: openai>=1.0; extra == 'all'
|
|
30
|
+
Requires-Dist: sentence-transformers>=2.2; extra == 'all'
|
|
31
|
+
Requires-Dist: tiktoken>=0.5; extra == 'all'
|
|
32
|
+
Provides-Extra: anthropic
|
|
33
|
+
Requires-Dist: anthropic>=0.25; extra == 'anthropic'
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: build>=1.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
39
|
+
Requires-Dist: twine>=5.0; extra == 'dev'
|
|
40
|
+
Provides-Extra: faiss
|
|
41
|
+
Requires-Dist: faiss-cpu>=1.7; extra == 'faiss'
|
|
42
|
+
Provides-Extra: local
|
|
43
|
+
Requires-Dist: sentence-transformers>=2.2; extra == 'local'
|
|
44
|
+
Provides-Extra: openai
|
|
45
|
+
Requires-Dist: openai>=1.0; extra == 'openai'
|
|
46
|
+
Provides-Extra: tokenize
|
|
47
|
+
Requires-Dist: tiktoken>=0.5; extra == 'tokenize'
|
|
48
|
+
Description-Content-Type: text/markdown
|
|
49
|
+
|
|
50
|
+
# swiftrag
|
|
51
|
+
|
|
52
|
+
[](https://github.com/behradmoeini/swiftrag/actions/workflows/ci.yml)
|
|
53
|
+
[](https://pypi.org/project/swiftrag/)
|
|
54
|
+
[](https://pypi.org/project/swiftrag/)
|
|
55
|
+
[](LICENSE)
|
|
56
|
+
|
|
57
|
+
**Instant, optimized Retrieval-Augmented Generation.** Pass your text and a model — get a RAG-powered LLM in one line.
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from swiftrag import RAG
|
|
61
|
+
|
|
62
|
+
rag = RAG(
|
|
63
|
+
documents="The Eiffel Tower is 330 metres tall and located in Paris.",
|
|
64
|
+
embedding_model="openai:text-embedding-3-small",
|
|
65
|
+
llm_model="openai:gpt-4o-mini",
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
print(rag.query("How tall is the Eiffel Tower?"))
|
|
69
|
+
# -> "The Eiffel Tower is 330 metres tall."
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
That's the whole API. You bring documents (a string, a list of strings, or dicts) and a model spec; swiftrag handles chunking, embedding, vector indexing, retrieval, and prompt construction.
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Why swiftrag
|
|
77
|
+
|
|
78
|
+
- **One line to a working RAG.** No glue code, no framework to learn.
|
|
79
|
+
- **Optimized core.** L2-normalized embeddings + a single BLAS matmul for search, `argpartition` top-k (no full sort), batched & concurrent embedding requests, token-aware chunking, and an optional FAISS backend for large corpora.
|
|
80
|
+
- **Tiny footprint.** The core depends only on `numpy`. It installs in seconds.
|
|
81
|
+
- **Runs offline out of the box.** With no API key it uses a built-in hashing embedder and an extractive answerer, so the full pipeline works in tests/CI/demos.
|
|
82
|
+
- **Provider-agnostic.** OpenAI, Anthropic, local sentence-transformers, or any custom callable/object you plug in.
|
|
83
|
+
- **MIT licensed.**
|
|
84
|
+
|
|
85
|
+
## Install
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
pip install swiftrag # core (numpy only) — works offline
|
|
89
|
+
pip install "swiftrag[openai]" # OpenAI embeddings + LLM
|
|
90
|
+
pip install "swiftrag[anthropic]" # Claude LLM
|
|
91
|
+
pip install "swiftrag[local]" # local sentence-transformers embeddings
|
|
92
|
+
pip install "swiftrag[faiss]" # FAISS backend for big corpora
|
|
93
|
+
pip install "swiftrag[all]" # everything
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Usage
|
|
97
|
+
|
|
98
|
+
### Pick your models with a simple `"provider:model"` string
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
# OpenAI (needs OPENAI_API_KEY)
|
|
102
|
+
RAG(documents=text, embedding_model="openai:text-embedding-3-small", llm_model="openai:gpt-4o-mini")
|
|
103
|
+
|
|
104
|
+
# Anthropic for generation, local embeddings (no embedding API calls)
|
|
105
|
+
RAG(documents=text, embedding_model="local:all-MiniLM-L6-v2", llm_model="anthropic:claude-3-5-sonnet-latest")
|
|
106
|
+
|
|
107
|
+
# Fully offline (default) — no keys required
|
|
108
|
+
RAG(documents=text)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Build straight from files or a folder
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
rag = RAG.from_files("docs/", embedding_model="openai:text-embedding-3-small")
|
|
115
|
+
# each file becomes a document tagged with metadata={"source": <path>}
|
|
116
|
+
|
|
117
|
+
resp = rag.query("What's our deployment process?")
|
|
118
|
+
print(resp.answer)
|
|
119
|
+
print(resp.format_sources()) # numbered, human-readable citations
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Documents can be a string, list, or dicts with metadata
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
rag = RAG(documents=[
|
|
126
|
+
"Plain string document.",
|
|
127
|
+
{"text": "Document with metadata.", "metadata": {"source": "handbook", "page": 12}},
|
|
128
|
+
])
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Query, stream, or just retrieve
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
resp = rag.query("What does the handbook say about refunds?")
|
|
135
|
+
print(resp.answer)
|
|
136
|
+
for s in resp.sources:
|
|
137
|
+
print(s.score, s.metadata, s.text[:80])
|
|
138
|
+
|
|
139
|
+
# Token streaming
|
|
140
|
+
for token in rag.stream("Summarize the refund policy."):
|
|
141
|
+
print(token, end="", flush=True)
|
|
142
|
+
|
|
143
|
+
# Retrieval only (no LLM call)
|
|
144
|
+
chunks = rag.retrieve("refunds", top_k=5)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Filter by metadata and score
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
# Only consider chunks whose metadata matches, and drop weak matches.
|
|
151
|
+
resp = rag.query(
|
|
152
|
+
"What is the refund window?",
|
|
153
|
+
where={"source": "handbook"}, # exact metadata match (or pass a Chunk -> bool callable)
|
|
154
|
+
min_score=0.25, # cosine threshold; weaker chunks are ignored
|
|
155
|
+
top_k=3,
|
|
156
|
+
)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Repeated queries reuse a cached query embedding (LRU, configurable via
|
|
160
|
+
`query_cache_size`), so re-asking the same question skips the embedding call.
|
|
161
|
+
|
|
162
|
+
### Add documents incrementally, save, and reload
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
rag = RAG().add("first batch").add("second batch")
|
|
166
|
+
rag.save("index.pkl")
|
|
167
|
+
|
|
168
|
+
rag = RAG.load("index.pkl", embedding_model="openai:text-embedding-3-small",
|
|
169
|
+
llm_model="openai:gpt-4o-mini")
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Batch and async
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# Answer many questions at once — embeddings are batched, generation is parallelized.
|
|
176
|
+
responses = rag.query_many(["q1?", "q2?", "q3?"], max_workers=8)
|
|
177
|
+
|
|
178
|
+
# Async API (non-blocking, great for web servers):
|
|
179
|
+
resp = await rag.aquery("your question")
|
|
180
|
+
async for token in rag.astream("your question"):
|
|
181
|
+
print(token, end="", flush=True)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Bring your own provider
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
# Any callable fn(prompt) -> str works as an LLM:
|
|
188
|
+
rag = RAG(documents=text, llm_model=lambda prompt: my_model.generate(prompt))
|
|
189
|
+
|
|
190
|
+
# Any object with embed_documents(list[str]) and embed_query(str) works as an embedder.
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Configuration
|
|
194
|
+
|
|
195
|
+
| Argument | Default | Description |
|
|
196
|
+
| --- | --- | --- |
|
|
197
|
+
| `documents` | `None` | str / list[str] / list[dict] / `Document`(s) to index. |
|
|
198
|
+
| `embedding_model` | `"hash"` | `"provider:model"` string or a custom provider. |
|
|
199
|
+
| `llm_model` | `None` (offline) | `"provider:model"` string, callable, or provider. |
|
|
200
|
+
| `chunk_size` | `512` | Target chunk size in tokens. |
|
|
201
|
+
| `chunk_overlap` | `64` | Token overlap between chunks. |
|
|
202
|
+
| `top_k` | `4` | Chunks retrieved per query. |
|
|
203
|
+
| `use_mmr` | `False` | Maximal Marginal Relevance re-ranking for diverse results. |
|
|
204
|
+
| `use_faiss` | `False` | Use FAISS index (install `swiftrag[faiss]`). |
|
|
205
|
+
| `min_score` | `None` | Default cosine threshold for dropping weak matches. |
|
|
206
|
+
| `max_context_tokens` | `None` | Cap the tokens of retrieved context packed into the prompt. |
|
|
207
|
+
| `dedup` | `True` | Skip exact-duplicate chunks on ingest. |
|
|
208
|
+
| `query_cache_size` | `128` | LRU size for cached query embeddings (`0` disables). |
|
|
209
|
+
| `system_prompt` | grounded default | System prompt for the LLM. |
|
|
210
|
+
|
|
211
|
+
## How it works
|
|
212
|
+
|
|
213
|
+
```
|
|
214
|
+
documents ─▶ chunk (token-aware) ─▶ embed (batched) ─▶ normalize ─▶ vector store
|
|
215
|
+
│
|
|
216
|
+
query ─▶ embed ─▶ cosine (BLAS matmul) ─▶ top-k (argpartition) ─▶ context ─▶ LLM ─▶ answer
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## Development
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
pip install -e ".[dev]"
|
|
223
|
+
pytest
|
|
224
|
+
ruff check .
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## License
|
|
228
|
+
|
|
229
|
+
MIT — see [LICENSE](LICENSE).
|
swiftrag-0.1.0/README.md
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# swiftrag
|
|
2
|
+
|
|
3
|
+
[](https://github.com/behradmoeini/swiftrag/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/swiftrag/)
|
|
5
|
+
[](https://pypi.org/project/swiftrag/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
**Instant, optimized Retrieval-Augmented Generation.** Pass your text and a model — get a RAG-powered LLM in one line.
|
|
9
|
+
|
|
10
|
+
```python
|
|
11
|
+
from swiftrag import RAG
|
|
12
|
+
|
|
13
|
+
rag = RAG(
|
|
14
|
+
documents="The Eiffel Tower is 330 metres tall and located in Paris.",
|
|
15
|
+
embedding_model="openai:text-embedding-3-small",
|
|
16
|
+
llm_model="openai:gpt-4o-mini",
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
print(rag.query("How tall is the Eiffel Tower?"))
|
|
20
|
+
# -> "The Eiffel Tower is 330 metres tall."
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
That's the whole API. You bring documents (a string, a list of strings, or dicts) and a model spec; swiftrag handles chunking, embedding, vector indexing, retrieval, and prompt construction.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Why swiftrag
|
|
28
|
+
|
|
29
|
+
- **One line to a working RAG.** No glue code, no framework to learn.
|
|
30
|
+
- **Optimized core.** L2-normalized embeddings + a single BLAS matmul for search, `argpartition` top-k (no full sort), batched & concurrent embedding requests, token-aware chunking, and an optional FAISS backend for large corpora.
|
|
31
|
+
- **Tiny footprint.** The core depends only on `numpy`. It installs in seconds.
|
|
32
|
+
- **Runs offline out of the box.** With no API key it uses a built-in hashing embedder and an extractive answerer, so the full pipeline works in tests/CI/demos.
|
|
33
|
+
- **Provider-agnostic.** OpenAI, Anthropic, local sentence-transformers, or any custom callable/object you plug in.
|
|
34
|
+
- **MIT licensed.**
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install swiftrag # core (numpy only) — works offline
|
|
40
|
+
pip install "swiftrag[openai]" # OpenAI embeddings + LLM
|
|
41
|
+
pip install "swiftrag[anthropic]" # Claude LLM
|
|
42
|
+
pip install "swiftrag[local]" # local sentence-transformers embeddings
|
|
43
|
+
pip install "swiftrag[faiss]" # FAISS backend for big corpora
|
|
44
|
+
pip install "swiftrag[all]" # everything
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Usage
|
|
48
|
+
|
|
49
|
+
### Pick your models with a simple `"provider:model"` string
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
# OpenAI (needs OPENAI_API_KEY)
|
|
53
|
+
RAG(documents=text, embedding_model="openai:text-embedding-3-small", llm_model="openai:gpt-4o-mini")
|
|
54
|
+
|
|
55
|
+
# Anthropic for generation, local embeddings (no embedding API calls)
|
|
56
|
+
RAG(documents=text, embedding_model="local:all-MiniLM-L6-v2", llm_model="anthropic:claude-3-5-sonnet-latest")
|
|
57
|
+
|
|
58
|
+
# Fully offline (default) — no keys required
|
|
59
|
+
RAG(documents=text)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Build straight from files or a folder
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
rag = RAG.from_files("docs/", embedding_model="openai:text-embedding-3-small")
|
|
66
|
+
# each file becomes a document tagged with metadata={"source": <path>}
|
|
67
|
+
|
|
68
|
+
resp = rag.query("What's our deployment process?")
|
|
69
|
+
print(resp.answer)
|
|
70
|
+
print(resp.format_sources()) # numbered, human-readable citations
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Documents can be a string, list, or dicts with metadata
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
rag = RAG(documents=[
|
|
77
|
+
"Plain string document.",
|
|
78
|
+
{"text": "Document with metadata.", "metadata": {"source": "handbook", "page": 12}},
|
|
79
|
+
])
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Query, stream, or just retrieve
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
resp = rag.query("What does the handbook say about refunds?")
|
|
86
|
+
print(resp.answer)
|
|
87
|
+
for s in resp.sources:
|
|
88
|
+
print(s.score, s.metadata, s.text[:80])
|
|
89
|
+
|
|
90
|
+
# Token streaming
|
|
91
|
+
for token in rag.stream("Summarize the refund policy."):
|
|
92
|
+
print(token, end="", flush=True)
|
|
93
|
+
|
|
94
|
+
# Retrieval only (no LLM call)
|
|
95
|
+
chunks = rag.retrieve("refunds", top_k=5)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Filter by metadata and score
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
# Only consider chunks whose metadata matches, and drop weak matches.
|
|
102
|
+
resp = rag.query(
|
|
103
|
+
"What is the refund window?",
|
|
104
|
+
where={"source": "handbook"}, # exact metadata match (or pass a Chunk -> bool callable)
|
|
105
|
+
min_score=0.25, # cosine threshold; weaker chunks are ignored
|
|
106
|
+
top_k=3,
|
|
107
|
+
)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Repeated queries reuse a cached query embedding (LRU, configurable via
|
|
111
|
+
`query_cache_size`), so re-asking the same question skips the embedding call.
|
|
112
|
+
|
|
113
|
+
### Add documents incrementally, save, and reload
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
rag = RAG().add("first batch").add("second batch")
|
|
117
|
+
rag.save("index.pkl")
|
|
118
|
+
|
|
119
|
+
rag = RAG.load("index.pkl", embedding_model="openai:text-embedding-3-small",
|
|
120
|
+
llm_model="openai:gpt-4o-mini")
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Batch and async
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
# Answer many questions at once — embeddings are batched, generation is parallelized.
|
|
127
|
+
responses = rag.query_many(["q1?", "q2?", "q3?"], max_workers=8)
|
|
128
|
+
|
|
129
|
+
# Async API (non-blocking, great for web servers):
|
|
130
|
+
resp = await rag.aquery("your question")
|
|
131
|
+
async for token in rag.astream("your question"):
|
|
132
|
+
print(token, end="", flush=True)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Bring your own provider
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
# Any callable fn(prompt) -> str works as an LLM:
|
|
139
|
+
rag = RAG(documents=text, llm_model=lambda prompt: my_model.generate(prompt))
|
|
140
|
+
|
|
141
|
+
# Any object with embed_documents(list[str]) and embed_query(str) works as an embedder.
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Configuration
|
|
145
|
+
|
|
146
|
+
| Argument | Default | Description |
|
|
147
|
+
| --- | --- | --- |
|
|
148
|
+
| `documents` | `None` | str / list[str] / list[dict] / `Document`(s) to index. |
|
|
149
|
+
| `embedding_model` | `"hash"` | `"provider:model"` string or a custom provider. |
|
|
150
|
+
| `llm_model` | `None` (offline) | `"provider:model"` string, callable, or provider. |
|
|
151
|
+
| `chunk_size` | `512` | Target chunk size in tokens. |
|
|
152
|
+
| `chunk_overlap` | `64` | Token overlap between chunks. |
|
|
153
|
+
| `top_k` | `4` | Chunks retrieved per query. |
|
|
154
|
+
| `use_mmr` | `False` | Maximal Marginal Relevance re-ranking for diverse results. |
|
|
155
|
+
| `use_faiss` | `False` | Use FAISS index (install `swiftrag[faiss]`). |
|
|
156
|
+
| `min_score` | `None` | Default cosine threshold for dropping weak matches. |
|
|
157
|
+
| `max_context_tokens` | `None` | Cap the tokens of retrieved context packed into the prompt. |
|
|
158
|
+
| `dedup` | `True` | Skip exact-duplicate chunks on ingest. |
|
|
159
|
+
| `query_cache_size` | `128` | LRU size for cached query embeddings (`0` disables). |
|
|
160
|
+
| `system_prompt` | grounded default | System prompt for the LLM. |
|
|
161
|
+
|
|
162
|
+
## How it works
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
documents ─▶ chunk (token-aware) ─▶ embed (batched) ─▶ normalize ─▶ vector store
|
|
166
|
+
│
|
|
167
|
+
query ─▶ embed ─▶ cosine (BLAS matmul) ─▶ top-k (argpartition) ─▶ context ─▶ LLM ─▶ answer
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Development
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
pip install -e ".[dev]"
|
|
174
|
+
pytest
|
|
175
|
+
ruff check .
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.21"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "swiftrag"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Instant, optimized Retrieval-Augmented Generation. Pass your text + a model, get a RAG-powered LLM in one line."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Behrad Moeini" }]
|
|
13
|
+
keywords = [
|
|
14
|
+
"rag",
|
|
15
|
+
"retrieval-augmented-generation",
|
|
16
|
+
"llm",
|
|
17
|
+
"embeddings",
|
|
18
|
+
"vector-search",
|
|
19
|
+
"openai",
|
|
20
|
+
"ai",
|
|
21
|
+
"nlp",
|
|
22
|
+
]
|
|
23
|
+
classifiers = [
|
|
24
|
+
"Development Status :: 4 - Beta",
|
|
25
|
+
"Intended Audience :: Developers",
|
|
26
|
+
"License :: OSI Approved :: MIT License",
|
|
27
|
+
"Operating System :: OS Independent",
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"Programming Language :: Python :: 3.9",
|
|
30
|
+
"Programming Language :: Python :: 3.10",
|
|
31
|
+
"Programming Language :: Python :: 3.11",
|
|
32
|
+
"Programming Language :: Python :: 3.12",
|
|
33
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
34
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
35
|
+
"Typing :: Typed",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
# Core stays intentionally tiny. numpy is the only hard runtime dependency so
|
|
39
|
+
# the package installs in seconds and works fully offline (hash embeddings +
|
|
40
|
+
# echo LLM) with zero API keys.
|
|
41
|
+
dependencies = [
|
|
42
|
+
"numpy>=1.21",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.optional-dependencies]
|
|
46
|
+
openai = ["openai>=1.0"]
|
|
47
|
+
anthropic = ["anthropic>=0.25"]
|
|
48
|
+
local = ["sentence-transformers>=2.2"]
|
|
49
|
+
faiss = ["faiss-cpu>=1.7"]
|
|
50
|
+
tokenize = ["tiktoken>=0.5"]
|
|
51
|
+
# Everything you might want, in one shot.
|
|
52
|
+
all = [
|
|
53
|
+
"openai>=1.0",
|
|
54
|
+
"anthropic>=0.25",
|
|
55
|
+
"sentence-transformers>=2.2",
|
|
56
|
+
"faiss-cpu>=1.7",
|
|
57
|
+
"tiktoken>=0.5",
|
|
58
|
+
]
|
|
59
|
+
dev = [
|
|
60
|
+
"pytest>=7.0",
|
|
61
|
+
"pytest-cov>=4.0",
|
|
62
|
+
"ruff>=0.4",
|
|
63
|
+
"build>=1.0",
|
|
64
|
+
"twine>=5.0",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
[project.urls]
|
|
68
|
+
Homepage = "https://github.com/behradmoeini/swiftrag"
|
|
69
|
+
Repository = "https://github.com/behradmoeini/swiftrag"
|
|
70
|
+
Issues = "https://github.com/behradmoeini/swiftrag/issues"
|
|
71
|
+
|
|
72
|
+
[tool.hatch.build.targets.wheel]
|
|
73
|
+
packages = ["src/swiftrag"]
|
|
74
|
+
|
|
75
|
+
[tool.hatch.build.targets.sdist]
|
|
76
|
+
include = ["src/swiftrag", "README.md", "LICENSE", "tests"]
|
|
77
|
+
|
|
78
|
+
[tool.ruff]
|
|
79
|
+
line-length = 100
|
|
80
|
+
target-version = "py39"
|
|
81
|
+
|
|
82
|
+
[tool.ruff.lint]
|
|
83
|
+
select = ["E", "F", "I", "UP", "B", "W"]
|
|
84
|
+
ignore = ["E501"]
|
|
85
|
+
|
|
86
|
+
[tool.pytest.ini_options]
|
|
87
|
+
testpaths = ["tests"]
|
|
88
|
+
addopts = "-q"
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""swiftrag — instant, optimized Retrieval-Augmented Generation.
|
|
2
|
+
|
|
3
|
+
Pass your text and (optionally) a model. Get a RAG-powered LLM in one line.
|
|
4
|
+
|
|
5
|
+
from swiftrag import RAG
|
|
6
|
+
|
|
7
|
+
rag = RAG(
|
|
8
|
+
documents="your knowledge as a string (or a list of strings/dicts)",
|
|
9
|
+
embedding_model="openai:text-embedding-3-small",
|
|
10
|
+
llm_model="openai:gpt-4o-mini",
|
|
11
|
+
)
|
|
12
|
+
print(rag.query("your question"))
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from .chunking import chunk_text, count_tokens
|
|
18
|
+
from .core import RAG
|
|
19
|
+
from .embeddings import (
|
|
20
|
+
EmbeddingProvider,
|
|
21
|
+
HashEmbeddings,
|
|
22
|
+
OpenAIEmbeddings,
|
|
23
|
+
SentenceTransformerEmbeddings,
|
|
24
|
+
)
|
|
25
|
+
from .exceptions import (
|
|
26
|
+
ConfigurationError,
|
|
27
|
+
DependencyError,
|
|
28
|
+
EmptyCorpusError,
|
|
29
|
+
SwiftRagError,
|
|
30
|
+
)
|
|
31
|
+
from .llms import AnthropicLLM, CallableLLM, EchoLLM, LLMProvider, OpenAILLM
|
|
32
|
+
from .types import Chunk, Document, RAGResponse, ScoredChunk
|
|
33
|
+
|
|
34
|
+
__version__ = "0.1.0"
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"RAG",
|
|
38
|
+
"chunk_text",
|
|
39
|
+
"count_tokens",
|
|
40
|
+
"Document",
|
|
41
|
+
"Chunk",
|
|
42
|
+
"ScoredChunk",
|
|
43
|
+
"RAGResponse",
|
|
44
|
+
"EmbeddingProvider",
|
|
45
|
+
"HashEmbeddings",
|
|
46
|
+
"OpenAIEmbeddings",
|
|
47
|
+
"SentenceTransformerEmbeddings",
|
|
48
|
+
"LLMProvider",
|
|
49
|
+
"OpenAILLM",
|
|
50
|
+
"AnthropicLLM",
|
|
51
|
+
"EchoLLM",
|
|
52
|
+
"CallableLLM",
|
|
53
|
+
"SwiftRagError",
|
|
54
|
+
"ConfigurationError",
|
|
55
|
+
"DependencyError",
|
|
56
|
+
"EmptyCorpusError",
|
|
57
|
+
"__version__",
|
|
58
|
+
]
|