visual-rag-toolkit 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/PKG-INFO +98 -17
  2. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/README.md +97 -16
  3. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/app.py +20 -8
  4. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/evaluation.py +5 -45
  5. visual_rag_toolkit-0.1.3/demo/indexing.py +274 -0
  6. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/qdrant_utils.py +12 -5
  7. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/playground.py +1 -1
  8. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/sidebar.py +4 -3
  9. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/upload.py +5 -4
  10. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/examples/config.yaml +6 -0
  11. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/examples/process_pdfs.py +6 -0
  12. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/examples/search_demo.py +6 -0
  13. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/pyproject.toml +1 -1
  14. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/__init__.py +43 -1
  15. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/config.py +4 -7
  16. visual_rag_toolkit-0.1.3/visual_rag/indexing/__init__.py +38 -0
  17. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/indexing/qdrant_indexer.py +92 -42
  18. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/retrieval/multi_vector.py +63 -65
  19. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/retrieval/single_stage.py +7 -0
  20. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/retrieval/two_stage.py +8 -10
  21. visual_rag_toolkit-0.1.1/demo/indexing.py +0 -286
  22. visual_rag_toolkit-0.1.1/visual_rag/indexing/__init__.py +0 -21
  23. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/.github/workflows/ci.yaml +0 -0
  24. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/.github/workflows/publish_pypi.yaml +0 -0
  25. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/.gitignore +0 -0
  26. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/LICENSE +0 -0
  27. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/README.md +0 -0
  28. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/__init__.py +0 -0
  29. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/analyze_results.py +0 -0
  30. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/benchmark_datasets.txt +0 -0
  31. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/prepare_submission.py +0 -0
  32. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/quick_test.py +0 -0
  33. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/run_vidore.py +0 -0
  34. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_beir_qdrant/run_qdrant_beir.py +0 -0
  35. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_tatdqa_test/__init__.py +0 -0
  36. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_tatdqa_test/dataset_loader.py +0 -0
  37. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_tatdqa_test/metrics.py +0 -0
  38. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_tatdqa_test/run_qdrant.py +0 -0
  39. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/benchmarks/vidore_tatdqa_test/sweep_eval.py +0 -0
  40. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/__init__.py +0 -0
  41. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/commands.py +0 -0
  42. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/config.py +0 -0
  43. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/download_models.py +0 -0
  44. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/example_metadata_mapping_sigir.json +0 -0
  45. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/results.py +0 -0
  46. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/test_qdrant_connection.py +0 -0
  47. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/__init__.py +0 -0
  48. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/benchmark.py +0 -0
  49. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/demo/ui/header.py +0 -0
  50. {visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test → visual_rag_toolkit-0.1.3/examples}/COMMANDS.md +0 -0
  51. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/requirements.txt +0 -0
  52. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/tests/__init__.py +0 -0
  53. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/tests/test_config.py +0 -0
  54. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/tests/test_pdf_processor.py +0 -0
  55. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/tests/test_pooling.py +0 -0
  56. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/tests/test_strategies.py +0 -0
  57. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/cli/__init__.py +0 -0
  58. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/cli/main.py +0 -0
  59. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/demo_runner.py +0 -0
  60. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/embedding/__init__.py +0 -0
  61. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/embedding/pooling.py +0 -0
  62. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/embedding/visual_embedder.py +0 -0
  63. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/indexing/cloudinary_uploader.py +0 -0
  64. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/indexing/pdf_processor.py +0 -0
  65. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/indexing/pipeline.py +0 -0
  66. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/preprocessing/__init__.py +0 -0
  67. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/preprocessing/crop_empty.py +0 -0
  68. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/qdrant_admin.py +0 -0
  69. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/retrieval/__init__.py +0 -0
  70. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/retrieval/three_stage.py +0 -0
  71. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/visualization/__init__.py +0 -0
  72. {visual_rag_toolkit-0.1.1 → visual_rag_toolkit-0.1.3}/visual_rag/visualization/saliency.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: visual-rag-toolkit
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: End-to-end visual document retrieval with ColPali, featuring two-stage pooling for scalable search
5
5
  Project-URL: Homepage, https://github.com/Ara-Yeroyan/visual-rag-toolkit
6
6
  Project-URL: Documentation, https://github.com/Ara-Yeroyan/visual-rag-toolkit#readme
@@ -85,10 +85,9 @@ Description-Content-Type: text/markdown
85
85
 
86
86
  # Visual RAG Toolkit
87
87
 
88
- [![PyPI version](https://badge.fury.io/py/visual-rag-toolkit.svg)](https://badge.fury.io/py/visual-rag-toolkit)
89
- [![CI](https://github.com/Ara-Yeroyan/visual-rag-toolkit/actions/workflows/ci.yaml/badge.svg)](https://github.com/Ara-Yeroyan/visual-rag-toolkit/actions/workflows/ci.yaml)
90
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
91
- [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
88
+ [![PyPI](https://img.shields.io/pypi/v/visual-rag-toolkit)](https://pypi.org/project/visual-rag-toolkit/)
89
+ [![Python](https://img.shields.io/pypi/pyversions/visual-rag-toolkit)](https://pypi.org/project/visual-rag-toolkit/)
90
+ [![License](https://img.shields.io/pypi/l/visual-rag-toolkit)](LICENSE)
92
91
 
93
92
  End-to-end visual document retrieval toolkit featuring **fast multi-stage retrieval** (prefetch with pooled vectors + exact MaxSim reranking).
94
93
 
@@ -112,11 +111,10 @@ This repo contains:
112
111
  pip install visual-rag-toolkit
113
112
 
114
113
  # With specific features
115
- pip install visual-rag-toolkit[embedding] # ColSmol/ColPali embedding support
116
- pip install visual-rag-toolkit[pdf] # PDF processing
114
+ pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
117
115
  pip install visual-rag-toolkit[qdrant] # Vector database
116
+ pip install visual-rag-toolkit[embedding] # ColSmol/ColPali embedding support
118
117
  pip install visual-rag-toolkit[cloudinary] # Image CDN
119
- pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
120
118
 
121
119
  # All dependencies
122
120
  pip install visual-rag-toolkit[all]
@@ -157,6 +155,95 @@ for r in results[:3]:
157
155
  print(r["id"], r["score_final"])
158
156
  ```
159
157
 
158
+ ### End-to-end: ingest PDFs (with cropping) → index in Qdrant
159
+
160
+ This is the "SDK-style" pipeline: PDF → images → optional crop → embed → store vectors + payload in Qdrant.
161
+
162
+ ```python
163
+ import os
164
+ from pathlib import Path
165
+
166
+ import numpy as np
167
+ import torch
168
+
169
+ from visual_rag import VisualEmbedder
170
+ from visual_rag.indexing import ProcessingPipeline, QdrantIndexer
171
+
172
+ QDRANT_URL = os.environ["QDRANT_URL"]
173
+ QDRANT_KEY = os.getenv("QDRANT_API_KEY", "")
174
+
175
+ collection = "my_visual_docs"
176
+
177
+ embedder = VisualEmbedder(
178
+ model_name="vidore/colSmol-500M",
179
+ torch_dtype=torch.float16,
180
+ output_dtype=np.float16,
181
+ batch_size=8,
182
+ )
183
+
184
+ indexer = QdrantIndexer(
185
+ url=QDRANT_URL,
186
+ api_key=QDRANT_KEY,
187
+ collection_name=collection,
188
+ prefer_grpc=True,
189
+ vector_datatype="float16",
190
+ )
191
+
192
+ # Creates collection + required payload indexes (e.g., "filename" for skip_existing)
193
+ indexer.create_collection(force_recreate=False)
194
+
195
+ pipeline = ProcessingPipeline(
196
+ embedder=embedder,
197
+ indexer=indexer,
198
+ embedding_strategy="all", # store full tokens + pooled vectors in one pass
199
+ crop_empty=True,
200
+ crop_empty_percentage_to_remove=0.99, # kept for traceability
201
+ crop_empty_remove_page_number=True,
202
+ crop_empty_preserve_border_px=1,
203
+ crop_empty_uniform_rowcol_std_threshold=3.0,
204
+ )
205
+
206
+ pdfs = [Path("docs/a.pdf"), Path("docs/b.pdf")]
207
+ for pdf_path in pdfs:
208
+ result = pipeline.process_pdf(
209
+ pdf_path,
210
+ skip_existing=True, # Skip pages already in Qdrant (uses filename index)
211
+ upload_to_cloudinary=False,
212
+ upload_to_qdrant=True,
213
+ )
214
+ # Logs automatically shown:
215
+ # [10:23:45] 📚 Processing PDF: a.pdf
216
+ # [10:23:45] 🖼️ Converting PDF to images...
217
+ # [10:23:46] ✅ Converted 12 pages
218
+ # [10:23:46] 📦 Processing pages 1-8/12
219
+ # [10:23:46] 🤖 Generating embeddings for 8 pages...
220
+ # [10:23:48] 📤 Uploading batch of 8 pages...
221
+ # [10:23:48] ✅ Uploaded 8 points to Qdrant
222
+ # [10:23:48] 📦 Processing pages 9-12/12
223
+ # [10:23:48] 🤖 Generating embeddings for 4 pages...
224
+ # [10:23:50] 📤 Uploading batch of 4 pages...
225
+ # [10:23:50] ✅ Uploaded 4 points to Qdrant
226
+ # [10:23:50] ✅ Completed a.pdf: 12 uploaded, 0 skipped, 0 failed
227
+ ```
228
+
229
+ CLI equivalent:
230
+
231
+ ```bash
232
+ export QDRANT_URL="https://YOUR_QDRANT"
233
+ export QDRANT_API_KEY="YOUR_KEY"
234
+
235
+ visual-rag process \
236
+ --reports-dir ./docs \
237
+ --collection my_visual_docs \
238
+ --model vidore/colSmol-500M \
239
+ --strategy all \
240
+ --batch-size 8 \
241
+ --qdrant-vector-dtype float16 \
242
+ --prefer-grpc \
243
+ --crop-empty \
244
+ --crop-empty-remove-page-number
245
+ ```
246
+
160
247
  ### Process a PDF into images (no embedding, no vector DB)
161
248
 
162
249
  ```python
@@ -186,7 +273,7 @@ Stage 2: Exact MaxSim reranking on candidates
186
273
  └── Return top-k results (e.g., 10)
187
274
  ```
188
275
 
189
- Three-stage extends this with an additional cheap prefetch stage before stage 2.
276
+ Three-stage extends this with an additional "cheap prefetch" stage before stage 2.
190
277
 
191
278
  ## 📁 Package Structure
192
279
 
@@ -209,16 +296,11 @@ visual-rag-toolkit/
209
296
  Configure via environment variables or YAML:
210
297
 
211
298
  ```bash
212
- # Qdrant credentials (preferred names used by the demo + scripts)
213
- export SIGIR_QDRANT_URL="https://your-cluster.qdrant.io"
214
- export SIGIR_QDRANT_KEY="your-api-key"
215
299
 
216
- # Backwards-compatible fallbacks (also supported)
300
+ # Qdrant credentials (preferred names used by the demo + scripts)
217
301
  export QDRANT_URL="https://your-cluster.qdrant.io"
218
302
  export QDRANT_API_KEY="your-api-key"
219
303
 
220
- export VISUALRAG_MODEL="vidore/colSmol-500M"
221
-
222
304
  # Special token handling (default: filter them out)
223
305
  export VISUALRAG_INCLUDE_SPECIAL_TOKENS=true # Include special tokens
224
306
  ```
@@ -269,7 +351,7 @@ python -m benchmarks.vidore_beir_qdrant.run_qdrant_beir \
269
351
  ```
270
352
 
271
353
  More commands (including multi-stage variants and cropping configs) live in:
272
- - `benchmarks/vidore_tatdqa_test/COMMANDS.md`
354
+ - `examples/COMMANDS.md`
273
355
 
274
356
  ## 🔧 Development
275
357
 
@@ -302,4 +384,3 @@ MIT License - see [LICENSE](LICENSE) for details.
302
384
  - [Qdrant](https://qdrant.tech/) - Vector database with multi-vector support
303
385
  - [ColPali](https://github.com/illuin-tech/colpali) - Visual document retrieval models
304
386
  - [ViDoRe](https://huggingface.co/spaces/vidore/vidore-leaderboard) - Benchmark dataset
305
-
@@ -1,9 +1,8 @@
1
1
  # Visual RAG Toolkit
2
2
 
3
- [![PyPI version](https://badge.fury.io/py/visual-rag-toolkit.svg)](https://badge.fury.io/py/visual-rag-toolkit)
4
- [![CI](https://github.com/Ara-Yeroyan/visual-rag-toolkit/actions/workflows/ci.yaml/badge.svg)](https://github.com/Ara-Yeroyan/visual-rag-toolkit/actions/workflows/ci.yaml)
5
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
- [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
3
+ [![PyPI](https://img.shields.io/pypi/v/visual-rag-toolkit)](https://pypi.org/project/visual-rag-toolkit/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/visual-rag-toolkit)](https://pypi.org/project/visual-rag-toolkit/)
5
+ [![License](https://img.shields.io/pypi/l/visual-rag-toolkit)](LICENSE)
7
6
 
8
7
  End-to-end visual document retrieval toolkit featuring **fast multi-stage retrieval** (prefetch with pooled vectors + exact MaxSim reranking).
9
8
 
@@ -27,11 +26,10 @@ This repo contains:
27
26
  pip install visual-rag-toolkit
28
27
 
29
28
  # With specific features
30
- pip install visual-rag-toolkit[embedding] # ColSmol/ColPali embedding support
31
- pip install visual-rag-toolkit[pdf] # PDF processing
29
+ pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
32
30
  pip install visual-rag-toolkit[qdrant] # Vector database
31
+ pip install visual-rag-toolkit[embedding] # ColSmol/ColPali embedding support
33
32
  pip install visual-rag-toolkit[cloudinary] # Image CDN
34
- pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
35
33
 
36
34
  # All dependencies
37
35
  pip install visual-rag-toolkit[all]
@@ -72,6 +70,95 @@ for r in results[:3]:
72
70
  print(r["id"], r["score_final"])
73
71
  ```
74
72
 
73
+ ### End-to-end: ingest PDFs (with cropping) → index in Qdrant
74
+
75
+ This is the "SDK-style" pipeline: PDF → images → optional crop → embed → store vectors + payload in Qdrant.
76
+
77
+ ```python
78
+ import os
79
+ from pathlib import Path
80
+
81
+ import numpy as np
82
+ import torch
83
+
84
+ from visual_rag import VisualEmbedder
85
+ from visual_rag.indexing import ProcessingPipeline, QdrantIndexer
86
+
87
+ QDRANT_URL = os.environ["QDRANT_URL"]
88
+ QDRANT_KEY = os.getenv("QDRANT_API_KEY", "")
89
+
90
+ collection = "my_visual_docs"
91
+
92
+ embedder = VisualEmbedder(
93
+ model_name="vidore/colSmol-500M",
94
+ torch_dtype=torch.float16,
95
+ output_dtype=np.float16,
96
+ batch_size=8,
97
+ )
98
+
99
+ indexer = QdrantIndexer(
100
+ url=QDRANT_URL,
101
+ api_key=QDRANT_KEY,
102
+ collection_name=collection,
103
+ prefer_grpc=True,
104
+ vector_datatype="float16",
105
+ )
106
+
107
+ # Creates collection + required payload indexes (e.g., "filename" for skip_existing)
108
+ indexer.create_collection(force_recreate=False)
109
+
110
+ pipeline = ProcessingPipeline(
111
+ embedder=embedder,
112
+ indexer=indexer,
113
+ embedding_strategy="all", # store full tokens + pooled vectors in one pass
114
+ crop_empty=True,
115
+ crop_empty_percentage_to_remove=0.99, # kept for traceability
116
+ crop_empty_remove_page_number=True,
117
+ crop_empty_preserve_border_px=1,
118
+ crop_empty_uniform_rowcol_std_threshold=3.0,
119
+ )
120
+
121
+ pdfs = [Path("docs/a.pdf"), Path("docs/b.pdf")]
122
+ for pdf_path in pdfs:
123
+ result = pipeline.process_pdf(
124
+ pdf_path,
125
+ skip_existing=True, # Skip pages already in Qdrant (uses filename index)
126
+ upload_to_cloudinary=False,
127
+ upload_to_qdrant=True,
128
+ )
129
+ # Logs automatically shown:
130
+ # [10:23:45] 📚 Processing PDF: a.pdf
131
+ # [10:23:45] 🖼️ Converting PDF to images...
132
+ # [10:23:46] ✅ Converted 12 pages
133
+ # [10:23:46] 📦 Processing pages 1-8/12
134
+ # [10:23:46] 🤖 Generating embeddings for 8 pages...
135
+ # [10:23:48] 📤 Uploading batch of 8 pages...
136
+ # [10:23:48] ✅ Uploaded 8 points to Qdrant
137
+ # [10:23:48] 📦 Processing pages 9-12/12
138
+ # [10:23:48] 🤖 Generating embeddings for 4 pages...
139
+ # [10:23:50] 📤 Uploading batch of 4 pages...
140
+ # [10:23:50] ✅ Uploaded 4 points to Qdrant
141
+ # [10:23:50] ✅ Completed a.pdf: 12 uploaded, 0 skipped, 0 failed
142
+ ```
143
+
144
+ CLI equivalent:
145
+
146
+ ```bash
147
+ export QDRANT_URL="https://YOUR_QDRANT"
148
+ export QDRANT_API_KEY="YOUR_KEY"
149
+
150
+ visual-rag process \
151
+ --reports-dir ./docs \
152
+ --collection my_visual_docs \
153
+ --model vidore/colSmol-500M \
154
+ --strategy all \
155
+ --batch-size 8 \
156
+ --qdrant-vector-dtype float16 \
157
+ --prefer-grpc \
158
+ --crop-empty \
159
+ --crop-empty-remove-page-number
160
+ ```
161
+
75
162
  ### Process a PDF into images (no embedding, no vector DB)
76
163
 
77
164
  ```python
@@ -101,7 +188,7 @@ Stage 2: Exact MaxSim reranking on candidates
101
188
  └── Return top-k results (e.g., 10)
102
189
  ```
103
190
 
104
- Three-stage extends this with an additional cheap prefetch stage before stage 2.
191
+ Three-stage extends this with an additional "cheap prefetch" stage before stage 2.
105
192
 
106
193
  ## 📁 Package Structure
107
194
 
@@ -124,16 +211,11 @@ visual-rag-toolkit/
124
211
  Configure via environment variables or YAML:
125
212
 
126
213
  ```bash
127
- # Qdrant credentials (preferred names used by the demo + scripts)
128
- export SIGIR_QDRANT_URL="https://your-cluster.qdrant.io"
129
- export SIGIR_QDRANT_KEY="your-api-key"
130
214
 
131
- # Backwards-compatible fallbacks (also supported)
215
+ # Qdrant credentials (preferred names used by the demo + scripts)
132
216
  export QDRANT_URL="https://your-cluster.qdrant.io"
133
217
  export QDRANT_API_KEY="your-api-key"
134
218
 
135
- export VISUALRAG_MODEL="vidore/colSmol-500M"
136
-
137
219
  # Special token handling (default: filter them out)
138
220
  export VISUALRAG_INCLUDE_SPECIAL_TOKENS=true # Include special tokens
139
221
  ```
@@ -184,7 +266,7 @@ python -m benchmarks.vidore_beir_qdrant.run_qdrant_beir \
184
266
  ```
185
267
 
186
268
  More commands (including multi-stage variants and cropping configs) live in:
187
- - `benchmarks/vidore_tatdqa_test/COMMANDS.md`
269
+ - `examples/COMMANDS.md`
188
270
 
189
271
  ## 🔧 Development
190
272
 
@@ -217,4 +299,3 @@ MIT License - see [LICENSE](LICENSE) for details.
217
299
  - [Qdrant](https://qdrant.tech/) - Vector database with multi-vector support
218
300
  - [ColPali](https://github.com/illuin-tech/colpali) - Visual document retrieval models
219
301
  - [ViDoRe](https://huggingface.co/spaces/vidore/vidore-leaderboard) - Benchmark dataset
220
-
@@ -1,13 +1,23 @@
1
1
  """Main entry point for the Visual RAG Toolkit demo application."""
2
2
 
3
+ import os
3
4
  import sys
4
5
  from pathlib import Path
5
6
 
6
- ROOT_DIR = Path(__file__).parent.parent
7
- sys.path.insert(0, str(ROOT_DIR))
7
+ # Ensure repo root is in sys.path for local development
8
+ # (In HF Space / Docker, PYTHONPATH is already set correctly)
9
+ _app_dir = Path(__file__).resolve().parent
10
+ _repo_root = _app_dir.parent
11
+ if str(_repo_root) not in sys.path:
12
+ sys.path.insert(0, str(_repo_root))
8
13
 
9
14
  from dotenv import load_dotenv
10
- load_dotenv(ROOT_DIR / ".env")
15
+
16
+ # Load .env from the repo root (works both locally and in Docker)
17
+ if (_repo_root / ".env").exists():
18
+ load_dotenv(_repo_root / ".env")
19
+ if (_app_dir / ".env").exists():
20
+ load_dotenv(_app_dir / ".env")
11
21
 
12
22
  import streamlit as st
13
23
 
@@ -28,15 +38,17 @@ from demo.ui.benchmark import render_benchmark_tab
28
38
  def main():
29
39
  render_header()
30
40
  render_sidebar()
31
-
32
- tab_upload, tab_playground, tab_benchmark = st.tabs(["📤 Upload", "🎮 Playground", "📊 Benchmarking"])
33
-
41
+
42
+ tab_upload, tab_playground, tab_benchmark = st.tabs(
43
+ ["📤 Upload", "🎮 Playground", "📊 Benchmarking"]
44
+ )
45
+
34
46
  with tab_upload:
35
47
  render_upload_tab()
36
-
48
+
37
49
  with tab_playground:
38
50
  render_playground_tab()
39
-
51
+
40
52
  with tab_benchmark:
41
53
  render_benchmark_tab()
42
54
 
@@ -1,20 +1,23 @@
1
1
  """Evaluation runner with UI updates."""
2
2
 
3
3
  import hashlib
4
- import importlib.util
5
4
  import json
6
5
  import logging
7
6
  import time
8
7
  import traceback
9
8
  from datetime import datetime
10
- from pathlib import Path
11
9
  from typing import Any, Dict, List, Optional
12
10
 
13
11
  import numpy as np
14
12
  import streamlit as st
15
13
  import torch
14
+ from qdrant_client.models import FieldCondition, Filter, MatchValue
16
15
 
17
16
  from visual_rag import VisualEmbedder
17
+ from visual_rag.retrieval import MultiVectorRetriever
18
+ from benchmarks.vidore_tatdqa_test.dataset_loader import load_vidore_beir_dataset
19
+ from benchmarks.vidore_tatdqa_test.metrics import ndcg_at_k, mrr_at_k, recall_at_k
20
+ from demo.qdrant_utils import get_qdrant_credentials
18
21
 
19
22
 
20
23
  TORCH_DTYPE_MAP = {
@@ -22,49 +25,6 @@ TORCH_DTYPE_MAP = {
22
25
  "float32": torch.float32,
23
26
  "bfloat16": torch.bfloat16,
24
27
  }
25
- from qdrant_client.models import Filter, FieldCondition, MatchValue
26
-
27
- from visual_rag.retrieval import MultiVectorRetriever
28
-
29
-
30
- def _load_local_benchmark_module(module_filename: str):
31
- """
32
- Load `benchmarks/vidore_tatdqa_test/<module_filename>` via file path.
33
-
34
- Motivation:
35
- - Some environments (notably containers / Spaces) can have a third-party
36
- `benchmarks` package installed, causing `import benchmarks...` to resolve
37
- to the wrong module.
38
- - This fallback guarantees we load the repo's benchmark utilities.
39
- """
40
- root = Path(__file__).resolve().parents[1] # demo/.. = repo root
41
- target = root / "benchmarks" / "vidore_tatdqa_test" / module_filename
42
- if not target.exists():
43
- raise ModuleNotFoundError(f"Missing local benchmark module file: {target}")
44
-
45
- name = f"_visual_rag_toolkit_local_{target.stem}"
46
- spec = importlib.util.spec_from_file_location(name, str(target))
47
- if spec is None or spec.loader is None:
48
- raise ModuleNotFoundError(f"Could not load module spec for: {target}")
49
- mod = importlib.util.module_from_spec(spec)
50
- spec.loader.exec_module(mod) # type: ignore[attr-defined]
51
- return mod
52
-
53
-
54
- try:
55
- # Preferred: normal import
56
- from benchmarks.vidore_tatdqa_test.dataset_loader import load_vidore_beir_dataset
57
- from benchmarks.vidore_tatdqa_test.metrics import ndcg_at_k, mrr_at_k, recall_at_k
58
- except ModuleNotFoundError:
59
- # Robust fallback: load from local file paths
60
- _dl = _load_local_benchmark_module("dataset_loader.py")
61
- _mx = _load_local_benchmark_module("metrics.py")
62
- load_vidore_beir_dataset = _dl.load_vidore_beir_dataset
63
- ndcg_at_k = _mx.ndcg_at_k
64
- mrr_at_k = _mx.mrr_at_k
65
- recall_at_k = _mx.recall_at_k
66
-
67
- from demo.qdrant_utils import get_qdrant_credentials
68
28
 
69
29
  logger = logging.getLogger(__name__)
70
30
  logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")