visual-rag-toolkit 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/PKG-INFO +98 -17
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/README.md +97 -16
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/app.py +20 -8
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/evaluation.py +5 -45
- visual_rag_toolkit-0.1.3/demo/indexing.py +274 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/qdrant_utils.py +12 -5
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/playground.py +1 -1
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/sidebar.py +4 -3
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/upload.py +5 -4
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/examples/config.yaml +6 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/examples/process_pdfs.py +6 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/examples/search_demo.py +6 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/pyproject.toml +1 -1
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/__init__.py +43 -1
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/config.py +4 -7
- visual_rag_toolkit-0.1.3/visual_rag/indexing/__init__.py +38 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/indexing/qdrant_indexer.py +92 -42
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/retrieval/multi_vector.py +63 -65
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/retrieval/single_stage.py +7 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/retrieval/two_stage.py +8 -10
- visual_rag_toolkit-0.1.1/demo/indexing.py +0 -286
- visual_rag_toolkit-0.1.1/visual_rag/indexing/__init__.py +0 -21
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/.github/workflows/ci.yaml +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/.github/workflows/publish_pypi.yaml +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/.gitignore +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/LICENSE +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/README.md +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/__init__.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/analyze_results.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/benchmark_datasets.txt +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/prepare_submission.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/quick_test.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/run_vidore.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_beir_qdrant/run_qdrant_beir.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_tatdqa_test/__init__.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_tatdqa_test/dataset_loader.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_tatdqa_test/metrics.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_tatdqa_test/run_qdrant.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_tatdqa_test/sweep_eval.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/__init__.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/commands.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/config.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/download_models.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/example_metadata_mapping_sigir.json +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/results.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/test_qdrant_connection.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/__init__.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/benchmark.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/header.py +0 -0
- {visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test → visual_rag_toolkit-0.1.3/examples}/COMMANDS.md +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/requirements.txt +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/tests/__init__.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/tests/test_config.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/tests/test_pdf_processor.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/tests/test_pooling.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/tests/test_strategies.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/cli/__init__.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/cli/main.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/demo_runner.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/embedding/__init__.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/embedding/pooling.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/embedding/visual_embedder.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/indexing/cloudinary_uploader.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/indexing/pdf_processor.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/indexing/pipeline.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/preprocessing/__init__.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/preprocessing/crop_empty.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/qdrant_admin.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/retrieval/__init__.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/retrieval/three_stage.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/visualization/__init__.py +0 -0
- {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/visualization/saliency.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: visual-rag-toolkit
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: End-to-end visual document retrieval with ColPali, featuring two-stage pooling for scalable search
|
|
5
5
|
Project-URL: Homepage, https://github.com/Ara-Yeroyan/visual-rag-toolkit
|
|
6
6
|
Project-URL: Documentation, https://github.com/Ara-Yeroyan/visual-rag-toolkit#readme
|
|
@@ -85,10 +85,9 @@ Description-Content-Type: text/markdown
|
|
|
85
85
|
|
|
86
86
|
# Visual RAG Toolkit
|
|
87
87
|
|
|
88
|
-
[](https://www.python.org/downloads/)
|
|
88
|
+
[](https://pypi.org/project/visual-rag-toolkit/)
|
|
89
|
+
[](https://pypi.org/project/visual-rag-toolkit/)
|
|
90
|
+
[](LICENSE)
|
|
92
91
|
|
|
93
92
|
End-to-end visual document retrieval toolkit featuring **fast multi-stage retrieval** (prefetch with pooled vectors + exact MaxSim reranking).
|
|
94
93
|
|
|
@@ -112,11 +111,10 @@ This repo contains:
|
|
|
112
111
|
pip install visual-rag-toolkit
|
|
113
112
|
|
|
114
113
|
# With specific features
|
|
115
|
-
pip install visual-rag-toolkit[
|
|
116
|
-
pip install visual-rag-toolkit[pdf] # PDF processing
|
|
114
|
+
pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
|
|
117
115
|
pip install visual-rag-toolkit[qdrant] # Vector database
|
|
116
|
+
pip install visual-rag-toolkit[embedding] # ColSmol/ColPali embedding support
|
|
118
117
|
pip install visual-rag-toolkit[cloudinary] # Image CDN
|
|
119
|
-
pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
|
|
120
118
|
|
|
121
119
|
# All dependencies
|
|
122
120
|
pip install visual-rag-toolkit[all]
|
|
@@ -157,6 +155,95 @@ for r in results[:3]:
|
|
|
157
155
|
print(r["id"], r["score_final"])
|
|
158
156
|
```
|
|
159
157
|
|
|
158
|
+
### End-to-end: ingest PDFs (with cropping) → index in Qdrant
|
|
159
|
+
|
|
160
|
+
This is the "SDK-style" pipeline: PDF → images → optional crop → embed → store vectors + payload in Qdrant.
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
import os
|
|
164
|
+
from pathlib import Path
|
|
165
|
+
|
|
166
|
+
import numpy as np
|
|
167
|
+
import torch
|
|
168
|
+
|
|
169
|
+
from visual_rag import VisualEmbedder
|
|
170
|
+
from visual_rag.indexing import ProcessingPipeline, QdrantIndexer
|
|
171
|
+
|
|
172
|
+
QDRANT_URL = os.environ["QDRANT_URL"]
|
|
173
|
+
QDRANT_KEY = os.getenv("QDRANT_API_KEY", "")
|
|
174
|
+
|
|
175
|
+
collection = "my_visual_docs"
|
|
176
|
+
|
|
177
|
+
embedder = VisualEmbedder(
|
|
178
|
+
model_name="vidore/colSmol-500M",
|
|
179
|
+
torch_dtype=torch.float16,
|
|
180
|
+
output_dtype=np.float16,
|
|
181
|
+
batch_size=8,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
indexer = QdrantIndexer(
|
|
185
|
+
url=QDRANT_URL,
|
|
186
|
+
api_key=QDRANT_KEY,
|
|
187
|
+
collection_name=collection,
|
|
188
|
+
prefer_grpc=True,
|
|
189
|
+
vector_datatype="float16",
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Creates collection + required payload indexes (e.g., "filename" for skip_existing)
|
|
193
|
+
indexer.create_collection(force_recreate=False)
|
|
194
|
+
|
|
195
|
+
pipeline = ProcessingPipeline(
|
|
196
|
+
embedder=embedder,
|
|
197
|
+
indexer=indexer,
|
|
198
|
+
embedding_strategy="all", # store full tokens + pooled vectors in one pass
|
|
199
|
+
crop_empty=True,
|
|
200
|
+
crop_empty_percentage_to_remove=0.99, # kept for traceability
|
|
201
|
+
crop_empty_remove_page_number=True,
|
|
202
|
+
crop_empty_preserve_border_px=1,
|
|
203
|
+
crop_empty_uniform_rowcol_std_threshold=3.0,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
pdfs = [Path("docs/a.pdf"), Path("docs/b.pdf")]
|
|
207
|
+
for pdf_path in pdfs:
|
|
208
|
+
result = pipeline.process_pdf(
|
|
209
|
+
pdf_path,
|
|
210
|
+
skip_existing=True, # Skip pages already in Qdrant (uses filename index)
|
|
211
|
+
upload_to_cloudinary=False,
|
|
212
|
+
upload_to_qdrant=True,
|
|
213
|
+
)
|
|
214
|
+
# Logs automatically shown:
|
|
215
|
+
# [10:23:45] 📚 Processing PDF: a.pdf
|
|
216
|
+
# [10:23:45] 🖼️ Converting PDF to images...
|
|
217
|
+
# [10:23:46] ✅ Converted 12 pages
|
|
218
|
+
# [10:23:46] 📦 Processing pages 1-8/12
|
|
219
|
+
# [10:23:46] 🤖 Generating embeddings for 8 pages...
|
|
220
|
+
# [10:23:48] 📤 Uploading batch of 8 pages...
|
|
221
|
+
# [10:23:48] ✅ Uploaded 8 points to Qdrant
|
|
222
|
+
# [10:23:48] 📦 Processing pages 9-12/12
|
|
223
|
+
# [10:23:48] 🤖 Generating embeddings for 4 pages...
|
|
224
|
+
# [10:23:50] 📤 Uploading batch of 4 pages...
|
|
225
|
+
# [10:23:50] ✅ Uploaded 4 points to Qdrant
|
|
226
|
+
# [10:23:50] ✅ Completed a.pdf: 12 uploaded, 0 skipped, 0 failed
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
CLI equivalent:
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
export QDRANT_URL="https://YOUR_QDRANT"
|
|
233
|
+
export QDRANT_API_KEY="YOUR_KEY"
|
|
234
|
+
|
|
235
|
+
visual-rag process \
|
|
236
|
+
--reports-dir ./docs \
|
|
237
|
+
--collection my_visual_docs \
|
|
238
|
+
--model vidore/colSmol-500M \
|
|
239
|
+
--strategy all \
|
|
240
|
+
--batch-size 8 \
|
|
241
|
+
--qdrant-vector-dtype float16 \
|
|
242
|
+
--prefer-grpc \
|
|
243
|
+
--crop-empty \
|
|
244
|
+
--crop-empty-remove-page-number
|
|
245
|
+
```
|
|
246
|
+
|
|
160
247
|
### Process a PDF into images (no embedding, no vector DB)
|
|
161
248
|
|
|
162
249
|
```python
|
|
@@ -186,7 +273,7 @@ Stage 2: Exact MaxSim reranking on candidates
|
|
|
186
273
|
└── Return top-k results (e.g., 10)
|
|
187
274
|
```
|
|
188
275
|
|
|
189
|
-
Three-stage extends this with an additional
|
|
276
|
+
Three-stage extends this with an additional "cheap prefetch" stage before stage 2.
|
|
190
277
|
|
|
191
278
|
## 📁 Package Structure
|
|
192
279
|
|
|
@@ -209,16 +296,11 @@ visual-rag-toolkit/
|
|
|
209
296
|
Configure via environment variables or YAML:
|
|
210
297
|
|
|
211
298
|
```bash
|
|
212
|
-
# Qdrant credentials (preferred names used by the demo + scripts)
|
|
213
|
-
export SIGIR_QDRANT_URL="https://your-cluster.qdrant.io"
|
|
214
|
-
export SIGIR_QDRANT_KEY="your-api-key"
|
|
215
299
|
|
|
216
|
-
#
|
|
300
|
+
# Qdrant credentials (preferred names used by the demo + scripts)
|
|
217
301
|
export QDRANT_URL="https://your-cluster.qdrant.io"
|
|
218
302
|
export QDRANT_API_KEY="your-api-key"
|
|
219
303
|
|
|
220
|
-
export VISUALRAG_MODEL="vidore/colSmol-500M"
|
|
221
|
-
|
|
222
304
|
# Special token handling (default: filter them out)
|
|
223
305
|
export VISUALRAG_INCLUDE_SPECIAL_TOKENS=true # Include special tokens
|
|
224
306
|
```
|
|
@@ -269,7 +351,7 @@ python -m benchmarks.vidore_beir_qdrant.run_qdrant_beir \
|
|
|
269
351
|
```
|
|
270
352
|
|
|
271
353
|
More commands (including multi-stage variants and cropping configs) live in:
|
|
272
|
-
- `
|
|
354
|
+
- `examples/COMMANDS.md`
|
|
273
355
|
|
|
274
356
|
## 🔧 Development
|
|
275
357
|
|
|
@@ -302,4 +384,3 @@ MIT License - see [LICENSE](LICENSE) for details.
|
|
|
302
384
|
- [Qdrant](https://qdrant.tech/) - Vector database with multi-vector support
|
|
303
385
|
- [ColPali](https://github.com/illuin-tech/colpali) - Visual document retrieval models
|
|
304
386
|
- [ViDoRe](https://huggingface.co/spaces/vidore/vidore-leaderboard) - Benchmark dataset
|
|
305
|
-
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
# Visual RAG Toolkit
|
|
2
2
|
|
|
3
|
-
[](https://www.python.org/downloads/)
|
|
3
|
+
[](https://pypi.org/project/visual-rag-toolkit/)
|
|
4
|
+
[](https://pypi.org/project/visual-rag-toolkit/)
|
|
5
|
+
[](LICENSE)
|
|
7
6
|
|
|
8
7
|
End-to-end visual document retrieval toolkit featuring **fast multi-stage retrieval** (prefetch with pooled vectors + exact MaxSim reranking).
|
|
9
8
|
|
|
@@ -27,11 +26,10 @@ This repo contains:
|
|
|
27
26
|
pip install visual-rag-toolkit
|
|
28
27
|
|
|
29
28
|
# With specific features
|
|
30
|
-
pip install visual-rag-toolkit[
|
|
31
|
-
pip install visual-rag-toolkit[pdf] # PDF processing
|
|
29
|
+
pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
|
|
32
30
|
pip install visual-rag-toolkit[qdrant] # Vector database
|
|
31
|
+
pip install visual-rag-toolkit[embedding] # ColSmol/ColPali embedding support
|
|
33
32
|
pip install visual-rag-toolkit[cloudinary] # Image CDN
|
|
34
|
-
pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
|
|
35
33
|
|
|
36
34
|
# All dependencies
|
|
37
35
|
pip install visual-rag-toolkit[all]
|
|
@@ -72,6 +70,95 @@ for r in results[:3]:
|
|
|
72
70
|
print(r["id"], r["score_final"])
|
|
73
71
|
```
|
|
74
72
|
|
|
73
|
+
### End-to-end: ingest PDFs (with cropping) → index in Qdrant
|
|
74
|
+
|
|
75
|
+
This is the "SDK-style" pipeline: PDF → images → optional crop → embed → store vectors + payload in Qdrant.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
import os
|
|
79
|
+
from pathlib import Path
|
|
80
|
+
|
|
81
|
+
import numpy as np
|
|
82
|
+
import torch
|
|
83
|
+
|
|
84
|
+
from visual_rag import VisualEmbedder
|
|
85
|
+
from visual_rag.indexing import ProcessingPipeline, QdrantIndexer
|
|
86
|
+
|
|
87
|
+
QDRANT_URL = os.environ["QDRANT_URL"]
|
|
88
|
+
QDRANT_KEY = os.getenv("QDRANT_API_KEY", "")
|
|
89
|
+
|
|
90
|
+
collection = "my_visual_docs"
|
|
91
|
+
|
|
92
|
+
embedder = VisualEmbedder(
|
|
93
|
+
model_name="vidore/colSmol-500M",
|
|
94
|
+
torch_dtype=torch.float16,
|
|
95
|
+
output_dtype=np.float16,
|
|
96
|
+
batch_size=8,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
indexer = QdrantIndexer(
|
|
100
|
+
url=QDRANT_URL,
|
|
101
|
+
api_key=QDRANT_KEY,
|
|
102
|
+
collection_name=collection,
|
|
103
|
+
prefer_grpc=True,
|
|
104
|
+
vector_datatype="float16",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Creates collection + required payload indexes (e.g., "filename" for skip_existing)
|
|
108
|
+
indexer.create_collection(force_recreate=False)
|
|
109
|
+
|
|
110
|
+
pipeline = ProcessingPipeline(
|
|
111
|
+
embedder=embedder,
|
|
112
|
+
indexer=indexer,
|
|
113
|
+
embedding_strategy="all", # store full tokens + pooled vectors in one pass
|
|
114
|
+
crop_empty=True,
|
|
115
|
+
crop_empty_percentage_to_remove=0.99, # kept for traceability
|
|
116
|
+
crop_empty_remove_page_number=True,
|
|
117
|
+
crop_empty_preserve_border_px=1,
|
|
118
|
+
crop_empty_uniform_rowcol_std_threshold=3.0,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
pdfs = [Path("docs/a.pdf"), Path("docs/b.pdf")]
|
|
122
|
+
for pdf_path in pdfs:
|
|
123
|
+
result = pipeline.process_pdf(
|
|
124
|
+
pdf_path,
|
|
125
|
+
skip_existing=True, # Skip pages already in Qdrant (uses filename index)
|
|
126
|
+
upload_to_cloudinary=False,
|
|
127
|
+
upload_to_qdrant=True,
|
|
128
|
+
)
|
|
129
|
+
# Logs automatically shown:
|
|
130
|
+
# [10:23:45] 📚 Processing PDF: a.pdf
|
|
131
|
+
# [10:23:45] 🖼️ Converting PDF to images...
|
|
132
|
+
# [10:23:46] ✅ Converted 12 pages
|
|
133
|
+
# [10:23:46] 📦 Processing pages 1-8/12
|
|
134
|
+
# [10:23:46] 🤖 Generating embeddings for 8 pages...
|
|
135
|
+
# [10:23:48] 📤 Uploading batch of 8 pages...
|
|
136
|
+
# [10:23:48] ✅ Uploaded 8 points to Qdrant
|
|
137
|
+
# [10:23:48] 📦 Processing pages 9-12/12
|
|
138
|
+
# [10:23:48] 🤖 Generating embeddings for 4 pages...
|
|
139
|
+
# [10:23:50] 📤 Uploading batch of 4 pages...
|
|
140
|
+
# [10:23:50] ✅ Uploaded 4 points to Qdrant
|
|
141
|
+
# [10:23:50] ✅ Completed a.pdf: 12 uploaded, 0 skipped, 0 failed
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
CLI equivalent:
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
export QDRANT_URL="https://YOUR_QDRANT"
|
|
148
|
+
export QDRANT_API_KEY="YOUR_KEY"
|
|
149
|
+
|
|
150
|
+
visual-rag process \
|
|
151
|
+
--reports-dir ./docs \
|
|
152
|
+
--collection my_visual_docs \
|
|
153
|
+
--model vidore/colSmol-500M \
|
|
154
|
+
--strategy all \
|
|
155
|
+
--batch-size 8 \
|
|
156
|
+
--qdrant-vector-dtype float16 \
|
|
157
|
+
--prefer-grpc \
|
|
158
|
+
--crop-empty \
|
|
159
|
+
--crop-empty-remove-page-number
|
|
160
|
+
```
|
|
161
|
+
|
|
75
162
|
### Process a PDF into images (no embedding, no vector DB)
|
|
76
163
|
|
|
77
164
|
```python
|
|
@@ -101,7 +188,7 @@ Stage 2: Exact MaxSim reranking on candidates
|
|
|
101
188
|
└── Return top-k results (e.g., 10)
|
|
102
189
|
```
|
|
103
190
|
|
|
104
|
-
Three-stage extends this with an additional
|
|
191
|
+
Three-stage extends this with an additional "cheap prefetch" stage before stage 2.
|
|
105
192
|
|
|
106
193
|
## 📁 Package Structure
|
|
107
194
|
|
|
@@ -124,16 +211,11 @@ visual-rag-toolkit/
|
|
|
124
211
|
Configure via environment variables or YAML:
|
|
125
212
|
|
|
126
213
|
```bash
|
|
127
|
-
# Qdrant credentials (preferred names used by the demo + scripts)
|
|
128
|
-
export SIGIR_QDRANT_URL="https://your-cluster.qdrant.io"
|
|
129
|
-
export SIGIR_QDRANT_KEY="your-api-key"
|
|
130
214
|
|
|
131
|
-
#
|
|
215
|
+
# Qdrant credentials (preferred names used by the demo + scripts)
|
|
132
216
|
export QDRANT_URL="https://your-cluster.qdrant.io"
|
|
133
217
|
export QDRANT_API_KEY="your-api-key"
|
|
134
218
|
|
|
135
|
-
export VISUALRAG_MODEL="vidore/colSmol-500M"
|
|
136
|
-
|
|
137
219
|
# Special token handling (default: filter them out)
|
|
138
220
|
export VISUALRAG_INCLUDE_SPECIAL_TOKENS=true # Include special tokens
|
|
139
221
|
```
|
|
@@ -184,7 +266,7 @@ python -m benchmarks.vidore_beir_qdrant.run_qdrant_beir \
|
|
|
184
266
|
```
|
|
185
267
|
|
|
186
268
|
More commands (including multi-stage variants and cropping configs) live in:
|
|
187
|
-
- `
|
|
269
|
+
- `examples/COMMANDS.md`
|
|
188
270
|
|
|
189
271
|
## 🔧 Development
|
|
190
272
|
|
|
@@ -217,4 +299,3 @@ MIT License - see [LICENSE](LICENSE) for details.
|
|
|
217
299
|
- [Qdrant](https://qdrant.tech/) - Vector database with multi-vector support
|
|
218
300
|
- [ColPali](https://github.com/illuin-tech/colpali) - Visual document retrieval models
|
|
219
301
|
- [ViDoRe](https://huggingface.co/spaces/vidore/vidore-leaderboard) - Benchmark dataset
|
|
220
|
-
|
|
@@ -1,13 +1,23 @@
|
|
|
1
1
|
"""Main entry point for the Visual RAG Toolkit demo application."""
|
|
2
2
|
|
|
3
|
+
import os
|
|
3
4
|
import sys
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
|
|
7
|
+
# Ensure repo root is in sys.path for local development
|
|
8
|
+
# (In HF Space / Docker, PYTHONPATH is already set correctly)
|
|
9
|
+
_app_dir = Path(__file__).resolve().parent
|
|
10
|
+
_repo_root = _app_dir.parent
|
|
11
|
+
if str(_repo_root) not in sys.path:
|
|
12
|
+
sys.path.insert(0, str(_repo_root))
|
|
8
13
|
|
|
9
14
|
from dotenv import load_dotenv
|
|
10
|
-
|
|
15
|
+
|
|
16
|
+
# Load .env from the repo root (works both locally and in Docker)
|
|
17
|
+
if (_repo_root / ".env").exists():
|
|
18
|
+
load_dotenv(_repo_root / ".env")
|
|
19
|
+
if (_app_dir / ".env").exists():
|
|
20
|
+
load_dotenv(_app_dir / ".env")
|
|
11
21
|
|
|
12
22
|
import streamlit as st
|
|
13
23
|
|
|
@@ -28,15 +38,17 @@ from demo.ui.benchmark import render_benchmark_tab
|
|
|
28
38
|
def main():
|
|
29
39
|
render_header()
|
|
30
40
|
render_sidebar()
|
|
31
|
-
|
|
32
|
-
tab_upload, tab_playground, tab_benchmark = st.tabs(
|
|
33
|
-
|
|
41
|
+
|
|
42
|
+
tab_upload, tab_playground, tab_benchmark = st.tabs(
|
|
43
|
+
["📤 Upload", "🎮 Playground", "📊 Benchmarking"]
|
|
44
|
+
)
|
|
45
|
+
|
|
34
46
|
with tab_upload:
|
|
35
47
|
render_upload_tab()
|
|
36
|
-
|
|
48
|
+
|
|
37
49
|
with tab_playground:
|
|
38
50
|
render_playground_tab()
|
|
39
|
-
|
|
51
|
+
|
|
40
52
|
with tab_benchmark:
|
|
41
53
|
render_benchmark_tab()
|
|
42
54
|
|
|
@@ -1,20 +1,23 @@
|
|
|
1
1
|
"""Evaluation runner with UI updates."""
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
|
-
import importlib.util
|
|
5
4
|
import json
|
|
6
5
|
import logging
|
|
7
6
|
import time
|
|
8
7
|
import traceback
|
|
9
8
|
from datetime import datetime
|
|
10
|
-
from pathlib import Path
|
|
11
9
|
from typing import Any, Dict, List, Optional
|
|
12
10
|
|
|
13
11
|
import numpy as np
|
|
14
12
|
import streamlit as st
|
|
15
13
|
import torch
|
|
14
|
+
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
|
16
15
|
|
|
17
16
|
from visual_rag import VisualEmbedder
|
|
17
|
+
from visual_rag.retrieval import MultiVectorRetriever
|
|
18
|
+
from benchmarks.vidore_tatdqa_test.dataset_loader import load_vidore_beir_dataset
|
|
19
|
+
from benchmarks.vidore_tatdqa_test.metrics import ndcg_at_k, mrr_at_k, recall_at_k
|
|
20
|
+
from demo.qdrant_utils import get_qdrant_credentials
|
|
18
21
|
|
|
19
22
|
|
|
20
23
|
TORCH_DTYPE_MAP = {
|
|
@@ -22,49 +25,6 @@ TORCH_DTYPE_MAP = {
|
|
|
22
25
|
"float32": torch.float32,
|
|
23
26
|
"bfloat16": torch.bfloat16,
|
|
24
27
|
}
|
|
25
|
-
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
|
26
|
-
|
|
27
|
-
from visual_rag.retrieval import MultiVectorRetriever
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def _load_local_benchmark_module(module_filename: str):
|
|
31
|
-
"""
|
|
32
|
-
Load `benchmarks/vidore_tatdqa_test/<module_filename>` via file path.
|
|
33
|
-
|
|
34
|
-
Motivation:
|
|
35
|
-
- Some environments (notably containers / Spaces) can have a third-party
|
|
36
|
-
`benchmarks` package installed, causing `import benchmarks...` to resolve
|
|
37
|
-
to the wrong module.
|
|
38
|
-
- This fallback guarantees we load the repo's benchmark utilities.
|
|
39
|
-
"""
|
|
40
|
-
root = Path(__file__).resolve().parents[1] # demo/.. = repo root
|
|
41
|
-
target = root / "benchmarks" / "vidore_tatdqa_test" / module_filename
|
|
42
|
-
if not target.exists():
|
|
43
|
-
raise ModuleNotFoundError(f"Missing local benchmark module file: {target}")
|
|
44
|
-
|
|
45
|
-
name = f"_visual_rag_toolkit_local_{target.stem}"
|
|
46
|
-
spec = importlib.util.spec_from_file_location(name, str(target))
|
|
47
|
-
if spec is None or spec.loader is None:
|
|
48
|
-
raise ModuleNotFoundError(f"Could not load module spec for: {target}")
|
|
49
|
-
mod = importlib.util.module_from_spec(spec)
|
|
50
|
-
spec.loader.exec_module(mod) # type: ignore[attr-defined]
|
|
51
|
-
return mod
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
try:
|
|
55
|
-
# Preferred: normal import
|
|
56
|
-
from benchmarks.vidore_tatdqa_test.dataset_loader import load_vidore_beir_dataset
|
|
57
|
-
from benchmarks.vidore_tatdqa_test.metrics import ndcg_at_k, mrr_at_k, recall_at_k
|
|
58
|
-
except ModuleNotFoundError:
|
|
59
|
-
# Robust fallback: load from local file paths
|
|
60
|
-
_dl = _load_local_benchmark_module("dataset_loader.py")
|
|
61
|
-
_mx = _load_local_benchmark_module("metrics.py")
|
|
62
|
-
load_vidore_beir_dataset = _dl.load_vidore_beir_dataset
|
|
63
|
-
ndcg_at_k = _mx.ndcg_at_k
|
|
64
|
-
mrr_at_k = _mx.mrr_at_k
|
|
65
|
-
recall_at_k = _mx.recall_at_k
|
|
66
|
-
|
|
67
|
-
from demo.qdrant_utils import get_qdrant_credentials
|
|
68
28
|
|
|
69
29
|
logger = logging.getLogger(__name__)
|
|
70
30
|
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|