visual-rag-toolkit 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. visual_rag_toolkit-0.1.1/.github/workflows/ci.yaml +94 -0
  2. visual_rag_toolkit-0.1.1/.github/workflows/publish_pypi.yaml +42 -0
  3. visual_rag_toolkit-0.1.1/.gitignore +67 -0
  4. visual_rag_toolkit-0.1.1/LICENSE +22 -0
  5. visual_rag_toolkit-0.1.1/PKG-INFO +305 -0
  6. visual_rag_toolkit-0.1.1/README.md +220 -0
  7. visual_rag_toolkit-0.1.1/benchmarks/README.md +101 -0
  8. visual_rag_toolkit-0.1.1/benchmarks/__init__.py +11 -0
  9. visual_rag_toolkit-0.1.1/benchmarks/analyze_results.py +187 -0
  10. visual_rag_toolkit-0.1.1/benchmarks/benchmark_datasets.txt +105 -0
  11. visual_rag_toolkit-0.1.1/benchmarks/prepare_submission.py +205 -0
  12. visual_rag_toolkit-0.1.1/benchmarks/quick_test.py +566 -0
  13. visual_rag_toolkit-0.1.1/benchmarks/run_vidore.py +513 -0
  14. visual_rag_toolkit-0.1.1/benchmarks/vidore_beir_qdrant/run_qdrant_beir.py +1365 -0
  15. visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/COMMANDS.md +83 -0
  16. visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/__init__.py +6 -0
  17. visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/dataset_loader.py +363 -0
  18. visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/metrics.py +44 -0
  19. visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/run_qdrant.py +799 -0
  20. visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/sweep_eval.py +372 -0
  21. visual_rag_toolkit-0.1.1/demo/__init__.py +10 -0
  22. visual_rag_toolkit-0.1.1/demo/app.py +45 -0
  23. visual_rag_toolkit-0.1.1/demo/commands.py +334 -0
  24. visual_rag_toolkit-0.1.1/demo/config.py +34 -0
  25. visual_rag_toolkit-0.1.1/demo/download_models.py +75 -0
  26. visual_rag_toolkit-0.1.1/demo/evaluation.py +602 -0
  27. visual_rag_toolkit-0.1.1/demo/example_metadata_mapping_sigir.json +37 -0
  28. visual_rag_toolkit-0.1.1/demo/indexing.py +286 -0
  29. visual_rag_toolkit-0.1.1/demo/qdrant_utils.py +211 -0
  30. visual_rag_toolkit-0.1.1/demo/results.py +35 -0
  31. visual_rag_toolkit-0.1.1/demo/test_qdrant_connection.py +119 -0
  32. visual_rag_toolkit-0.1.1/demo/ui/__init__.py +15 -0
  33. visual_rag_toolkit-0.1.1/demo/ui/benchmark.py +355 -0
  34. visual_rag_toolkit-0.1.1/demo/ui/header.py +30 -0
  35. visual_rag_toolkit-0.1.1/demo/ui/playground.py +339 -0
  36. visual_rag_toolkit-0.1.1/demo/ui/sidebar.py +162 -0
  37. visual_rag_toolkit-0.1.1/demo/ui/upload.py +487 -0
  38. visual_rag_toolkit-0.1.1/examples/config.yaml +83 -0
  39. visual_rag_toolkit-0.1.1/examples/process_pdfs.py +196 -0
  40. visual_rag_toolkit-0.1.1/examples/search_demo.py +161 -0
  41. visual_rag_toolkit-0.1.1/pyproject.toml +153 -0
  42. visual_rag_toolkit-0.1.1/requirements.txt +25 -0
  43. visual_rag_toolkit-0.1.1/tests/__init__.py +8 -0
  44. visual_rag_toolkit-0.1.1/tests/test_config.py +116 -0
  45. visual_rag_toolkit-0.1.1/tests/test_pdf_processor.py +132 -0
  46. visual_rag_toolkit-0.1.1/tests/test_pooling.py +199 -0
  47. visual_rag_toolkit-0.1.1/tests/test_strategies.py +101 -0
  48. visual_rag_toolkit-0.1.1/visual_rag/__init__.py +98 -0
  49. visual_rag_toolkit-0.1.1/visual_rag/cli/__init__.py +1 -0
  50. visual_rag_toolkit-0.1.1/visual_rag/cli/main.py +629 -0
  51. visual_rag_toolkit-0.1.1/visual_rag/config.py +230 -0
  52. visual_rag_toolkit-0.1.1/visual_rag/demo_runner.py +90 -0
  53. visual_rag_toolkit-0.1.1/visual_rag/embedding/__init__.py +26 -0
  54. visual_rag_toolkit-0.1.1/visual_rag/embedding/pooling.py +343 -0
  55. visual_rag_toolkit-0.1.1/visual_rag/embedding/visual_embedder.py +622 -0
  56. visual_rag_toolkit-0.1.1/visual_rag/indexing/__init__.py +21 -0
  57. visual_rag_toolkit-0.1.1/visual_rag/indexing/cloudinary_uploader.py +274 -0
  58. visual_rag_toolkit-0.1.1/visual_rag/indexing/pdf_processor.py +324 -0
  59. visual_rag_toolkit-0.1.1/visual_rag/indexing/pipeline.py +628 -0
  60. visual_rag_toolkit-0.1.1/visual_rag/indexing/qdrant_indexer.py +478 -0
  61. visual_rag_toolkit-0.1.1/visual_rag/preprocessing/__init__.py +3 -0
  62. visual_rag_toolkit-0.1.1/visual_rag/preprocessing/crop_empty.py +120 -0
  63. visual_rag_toolkit-0.1.1/visual_rag/qdrant_admin.py +222 -0
  64. visual_rag_toolkit-0.1.1/visual_rag/retrieval/__init__.py +19 -0
  65. visual_rag_toolkit-0.1.1/visual_rag/retrieval/multi_vector.py +222 -0
  66. visual_rag_toolkit-0.1.1/visual_rag/retrieval/single_stage.py +126 -0
  67. visual_rag_toolkit-0.1.1/visual_rag/retrieval/three_stage.py +173 -0
  68. visual_rag_toolkit-0.1.1/visual_rag/retrieval/two_stage.py +471 -0
  69. visual_rag_toolkit-0.1.1/visual_rag/visualization/__init__.py +19 -0
  70. visual_rag_toolkit-0.1.1/visual_rag/visualization/saliency.py +335 -0
@@ -0,0 +1,94 @@
1
+ # Modern CI/CD with GitHub Actions
2
+ # Replaces legacy travis.yml
3
+ name: CI
4
+
5
+ on:
6
+ push:
7
+ branches: [main, develop]
8
+ pull_request:
9
+ branches: [main]
10
+
11
+ jobs:
12
+ lint:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - name: Set up Python
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: "3.11"
21
+
22
+ - name: Install linting tools
23
+ run: pip install ruff black mypy
24
+
25
+ - name: Run ruff
26
+ run: ruff check visual_rag/
27
+
28
+ - name: Run black --check
29
+ run: black --check visual_rag/
30
+
31
+ test:
32
+ runs-on: ${{ matrix.os }}
33
+ strategy:
34
+ fail-fast: false
35
+ matrix:
36
+ os: [ubuntu-latest, macos-latest]
37
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
38
+
39
+ steps:
40
+ - uses: actions/checkout@v4
41
+
42
+ - name: Set up Python ${{ matrix.python-version }}
43
+ uses: actions/setup-python@v5
44
+ with:
45
+ python-version: ${{ matrix.python-version }}
46
+
47
+ - name: Install dependencies
48
+ run: |
49
+ python -m pip install --upgrade pip
50
+ pip install -e ".[dev]"
51
+
52
+ - name: Run tests
53
+ run: pytest tests/ -v --cov=visual_rag --cov-report=xml
54
+
55
+ - name: Upload coverage
56
+ uses: codecov/codecov-action@v4
57
+ if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11'
58
+ with:
59
+ file: ./coverage.xml
60
+
61
+ # GPU tests (optional, needs self-hosted runner)
62
+ # test-gpu:
63
+ # runs-on: [self-hosted, gpu]
64
+ # steps:
65
+ # - uses: actions/checkout@v4
66
+ # - name: Run GPU tests
67
+ # run: pytest tests/gpu/ -v
68
+
69
+ build:
70
+ runs-on: ubuntu-latest
71
+ needs: [lint, test]
72
+
73
+ steps:
74
+ - uses: actions/checkout@v4
75
+
76
+ - name: Set up Python
77
+ uses: actions/setup-python@v5
78
+ with:
79
+ python-version: "3.11"
80
+
81
+ - name: Install build tools
82
+ run: pip install build twine
83
+
84
+ - name: Build package
85
+ run: python -m build
86
+
87
+ - name: Check distribution
88
+ run: twine check dist/*
89
+
90
+ - name: Upload artifacts
91
+ uses: actions/upload-artifact@v4
92
+ with:
93
+ name: dist
94
+ path: dist/
@@ -0,0 +1,42 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+ workflow_dispatch: {}
8
+
9
+ permissions:
10
+ contents: read
11
+ id-token: write # required for PyPI trusted publishing (OIDC)
12
+
13
+ jobs:
14
+ build-and-publish:
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - name: Checkout
18
+ uses: actions/checkout@v4
19
+
20
+ - name: Set up Python
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: "3.11"
24
+
25
+ - name: Install build tools
26
+ run: |
27
+ python -m pip install --upgrade pip
28
+ pip install build twine
29
+
30
+ - name: Build package
31
+ run: python -m build
32
+
33
+ - name: Check distribution
34
+ run: twine check dist/*
35
+
36
+ - name: Publish to PyPI
37
+ uses: pypa/gh-action-pypi-publish@release/v1
38
+ with:
39
+ # Uses trusted publishing (no API token) once configured on PyPI:
40
+ # PyPI β†’ Your project β†’ Settings β†’ Publishing β†’ Add GitHub publisher
41
+ skip-existing: true
42
+
@@ -0,0 +1,67 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual environments
24
+ venv/
25
+ env/
26
+ ENV/
27
+
28
+ # IDE
29
+ .vscode/
30
+ .idea/
31
+ *.swp
32
+ *.swo
33
+ *~
34
+
35
+ # OS
36
+ .DS_Store
37
+ Thumbs.db
38
+
39
+ # Project specific
40
+ checkpoints/
41
+ results/
42
+ *.pkl
43
+ *.pickle
44
+
45
+ # Environment variables
46
+ .env
47
+ .env.local
48
+
49
+ # Test coverage
50
+ .coverage
51
+ htmlcov/
52
+ .pytest_cache/
53
+
54
+ # Jupyter
55
+ .ipynb_checkpoints/
56
+
57
+ # MyPy
58
+ .mypy_cache/
59
+ .dmypy.json
60
+ dmypy.json
61
+
62
+
63
+
64
+
65
+
66
+
67
+
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ara Yeroyan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,305 @@
1
+ Metadata-Version: 2.4
2
+ Name: visual-rag-toolkit
3
+ Version: 0.1.1
4
+ Summary: End-to-end visual document retrieval with ColPali, featuring two-stage pooling for scalable search
5
+ Project-URL: Homepage, https://github.com/Ara-Yeroyan/visual-rag-toolkit
6
+ Project-URL: Documentation, https://github.com/Ara-Yeroyan/visual-rag-toolkit#readme
7
+ Project-URL: Repository, https://github.com/Ara-Yeroyan/visual-rag-toolkit
8
+ Project-URL: Issues, https://github.com/Ara-Yeroyan/visual-rag-toolkit/issues
9
+ Author: Visual RAG Team
10
+ License: MIT License
11
+
12
+ Copyright (c) 2026 Ara Yeroyan
13
+
14
+ Permission is hereby granted, free of charge, to any person obtaining a copy
15
+ of this software and associated documentation files (the "Software"), to deal
16
+ in the Software without restriction, including without limitation the rights
17
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18
+ copies of the Software, and to permit persons to whom the Software is
19
+ furnished to do so, subject to the following conditions:
20
+
21
+ The above copyright notice and this permission notice shall be included in all
22
+ copies or substantial portions of the Software.
23
+
24
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
+ SOFTWARE.
31
+
32
+ License-File: LICENSE
33
+ Keywords: colbert,colpali,document-retrieval,late-interaction,multimodal-rag,pdf-processing,qdrant,visual-rag,visual-search
34
+ Classifier: Development Status :: 4 - Beta
35
+ Classifier: Intended Audience :: Developers
36
+ Classifier: Intended Audience :: Science/Research
37
+ Classifier: License :: OSI Approved :: MIT License
38
+ Classifier: Programming Language :: Python :: 3
39
+ Classifier: Programming Language :: Python :: 3.9
40
+ Classifier: Programming Language :: Python :: 3.10
41
+ Classifier: Programming Language :: Python :: 3.11
42
+ Classifier: Programming Language :: Python :: 3.12
43
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
44
+ Classifier: Topic :: Scientific/Engineering :: Image Processing
45
+ Requires-Python: >=3.9
46
+ Requires-Dist: numpy>=1.21.0
47
+ Requires-Dist: pillow>=9.0.0
48
+ Requires-Dist: python-dotenv>=0.19.0
49
+ Requires-Dist: pyyaml>=6.0
50
+ Requires-Dist: torch>=2.0.0
51
+ Requires-Dist: tqdm>=4.60.0
52
+ Provides-Extra: all
53
+ Requires-Dist: altair>=5.0.0; extra == 'all'
54
+ Requires-Dist: cloudinary>=1.30.0; extra == 'all'
55
+ Requires-Dist: colpali-engine>=0.3.0; extra == 'all'
56
+ Requires-Dist: httpx>=0.24.0; extra == 'all'
57
+ Requires-Dist: pandas>=2.0.0; extra == 'all'
58
+ Requires-Dist: pdf2image>=1.16.0; extra == 'all'
59
+ Requires-Dist: pypdf>=3.0.0; extra == 'all'
60
+ Requires-Dist: qdrant-client>=1.7.0; extra == 'all'
61
+ Requires-Dist: streamlit>=1.25.0; extra == 'all'
62
+ Requires-Dist: transformers>=4.35.0; extra == 'all'
63
+ Provides-Extra: cloudinary
64
+ Requires-Dist: cloudinary>=1.30.0; extra == 'cloudinary'
65
+ Provides-Extra: dev
66
+ Requires-Dist: black>=23.0.0; extra == 'dev'
67
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
68
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
69
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
70
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
71
+ Provides-Extra: embedding
72
+ Requires-Dist: colpali-engine>=0.3.0; extra == 'embedding'
73
+ Requires-Dist: transformers>=4.35.0; extra == 'embedding'
74
+ Provides-Extra: pdf
75
+ Requires-Dist: pdf2image>=1.16.0; extra == 'pdf'
76
+ Requires-Dist: pypdf>=3.0.0; extra == 'pdf'
77
+ Provides-Extra: qdrant
78
+ Requires-Dist: qdrant-client>=1.7.0; extra == 'qdrant'
79
+ Provides-Extra: ui
80
+ Requires-Dist: altair>=5.0.0; extra == 'ui'
81
+ Requires-Dist: httpx>=0.24.0; extra == 'ui'
82
+ Requires-Dist: pandas>=2.0.0; extra == 'ui'
83
+ Requires-Dist: streamlit>=1.25.0; extra == 'ui'
84
+ Description-Content-Type: text/markdown
85
+
86
+ # Visual RAG Toolkit
87
+
88
+ [![PyPI version](https://badge.fury.io/py/visual-rag-toolkit.svg)](https://badge.fury.io/py/visual-rag-toolkit)
89
+ [![CI](https://github.com/Ara-Yeroyan/visual-rag-toolkit/actions/workflows/ci.yaml/badge.svg)](https://github.com/Ara-Yeroyan/visual-rag-toolkit/actions/workflows/ci.yaml)
90
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
91
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
92
+
93
+ End-to-end visual document retrieval toolkit featuring **fast multi-stage retrieval** (prefetch with pooled vectors + exact MaxSim reranking).
94
+
95
+ This repo contains:
96
+ - a **Python package** (`visual_rag`)
97
+ - a **Streamlit demo app** (`demo/`)
98
+ - **benchmark & evaluation scripts** for ViDoRe v2 (`benchmarks/`)
99
+
100
+ ## 🎯 Key Features
101
+
102
+ - **Modular**: PDF β†’ images, embedding, Qdrant indexing, retrieval can be used independently.
103
+ - **Multi-stage retrieval**: two-stage and three-stage retrieval modes built for Qdrant named vectors.
104
+ - **Model-aware embedding**: ColSmol + ColPali support behind a single `VisualEmbedder` interface.
105
+ - **Token hygiene**: query special-token filtering by default for more stable MaxSim behavior.
106
+ - **Practical pipelines**: robust indexing, retries, optional Cloudinary image URLs, evaluation reporting.
107
+
108
+ ## πŸ“¦ Installation
109
+
110
+ ```bash
111
+ # Core package (minimal dependencies)
112
+ pip install visual-rag-toolkit
113
+
114
+ # With specific features
115
+ pip install visual-rag-toolkit[embedding] # ColSmol/ColPali embedding support
116
+ pip install visual-rag-toolkit[pdf] # PDF processing
117
+ pip install visual-rag-toolkit[qdrant] # Vector database
118
+ pip install visual-rag-toolkit[cloudinary] # Image CDN
119
+ pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
120
+
121
+ # All dependencies
122
+ pip install visual-rag-toolkit[all]
123
+ ```
124
+
125
+ ### System dependencies (PDF)
126
+
127
+ `pdf2image` requires Poppler.
128
+
129
+ - macOS: `brew install poppler`
130
+ - Ubuntu/Debian: `sudo apt-get update && sudo apt-get install -y poppler-utils`
131
+
132
+ ## πŸš€ Quick Start
133
+
134
+ ### Minimal: embed a query and run two-stage search (server-side)
135
+
136
+ ```python
137
+ from qdrant_client import QdrantClient
138
+ from visual_rag import VisualEmbedder, TwoStageRetriever
139
+
140
+ client = QdrantClient(url="https://YOUR_QDRANT", api_key="YOUR_KEY")
141
+ collection_name = "your_collection"
142
+
143
+ # Embed query tokens
144
+ embedder = VisualEmbedder(model_name="vidore/colpali-v1.3")
145
+ q = embedder.embed_query("What is the budget allocation?")
146
+
147
+ # Fast path: all stages computed in Qdrant (prefetch + exact rerank)
148
+ retriever = TwoStageRetriever(client, collection_name)
149
+ results = retriever.search_server_side(
150
+ query_embedding=q,
151
+ top_k=10,
152
+ prefetch_k=256,
153
+ stage1_mode="tokens_vs_experimental", # or: tokens_vs_tiles / pooled_query_vs_tiles / pooled_query_vs_global
154
+ )
155
+
156
+ for r in results[:3]:
157
+ print(r["id"], r["score_final"])
158
+ ```
159
+
160
+ ### Process a PDF into images (no embedding, no vector DB)
161
+
162
+ ```python
163
+ from pathlib import Path
164
+ from visual_rag import PDFProcessor
165
+
166
+ processor = PDFProcessor(dpi=140)
167
+ images, texts = processor.process_pdf(Path("report.pdf"))
168
+ print(len(images), "pages")
169
+ ```
170
+
171
+ ## πŸ”¬ Multi-stage Retrieval (Two-stage / Three-stage)
172
+
173
+ Traditional ColBERT-style MaxSim scoring compares all query tokens vs all document tokens, which becomes expensive at scale.
174
+
175
+ **Our approach:**
176
+
177
+ ```
178
+ Stage 1: Fast prefetch with tile-level pooled vectors
179
+ β”œβ”€β”€ Pool each tile (64 patches) β†’ num_tiles vectors
180
+ β”œβ”€β”€ Use HNSW index for O(log N) retrieval
181
+ └── Retrieve top-K candidates (e.g., 200)
182
+
183
+ Stage 2: Exact MaxSim reranking on candidates
184
+ β”œβ”€β”€ Load full multi-vector embeddings
185
+ β”œβ”€β”€ Compute exact ColBERT MaxSim scores
186
+ └── Return top-k results (e.g., 10)
187
+ ```
188
+
189
+ Three-stage extends this with an additional β€œcheap prefetch” stage before stage 2.
190
+
191
+ ## πŸ“ Package Structure
192
+
193
+ ```
194
+ visual-rag-toolkit/
195
+ β”œβ”€β”€ visual_rag/ # Import as: from visual_rag import ...
196
+ β”‚ β”œβ”€β”€ embedding/ # VisualEmbedder, pooling functions
197
+ β”‚ β”œβ”€β”€ indexing/ # PDFProcessor, QdrantIndexer, CloudinaryUploader
198
+ β”‚ β”œβ”€β”€ retrieval/ # TwoStageRetriever
199
+ β”‚ β”œβ”€β”€ visualization/ # Saliency maps
200
+ β”‚ β”œβ”€β”€ cli/ # Command-line: visual-rag process/search
201
+ β”‚ └── config.py # load_config, get, get_section
202
+ β”‚
203
+ β”œβ”€β”€ benchmarks/ # ViDoRe evaluation scripts
204
+ └── examples/ # Usage examples
205
+ ```
206
+
207
+ ## βš™οΈ Configuration
208
+
209
+ Configure via environment variables or YAML:
210
+
211
+ ```bash
212
+ # Qdrant credentials (preferred names used by the demo + scripts)
213
+ export SIGIR_QDRANT_URL="https://your-cluster.qdrant.io"
214
+ export SIGIR_QDRANT_KEY="your-api-key"
215
+
216
+ # Backwards-compatible fallbacks (also supported)
217
+ export QDRANT_URL="https://your-cluster.qdrant.io"
218
+ export QDRANT_API_KEY="your-api-key"
219
+
220
+ export VISUALRAG_MODEL="vidore/colSmol-500M"
221
+
222
+ # Special token handling (default: filter them out)
223
+ export VISUALRAG_INCLUDE_SPECIAL_TOKENS=true # Include special tokens
224
+ ```
225
+
226
+ Or use a config file (`visual_rag.yaml`):
227
+
228
+ ```yaml
229
+ model:
230
+ name: "vidore/colSmol-500M"
231
+ batch_size: 4
232
+
233
+ qdrant:
234
+ url: "https://your-cluster.qdrant.io"
235
+ collection: "my_documents"
236
+
237
+ search:
238
+ strategy: "two_stage" # or "multi_vector", "pooled"
239
+ prefetch_k: 200
240
+ top_k: 10
241
+ ```
242
+
243
+ ## πŸ–₯️ Demo (Streamlit)
244
+
245
+ ```bash
246
+ pip install "visual-rag-toolkit[ui,qdrant,embedding,pdf]"
247
+
248
+ # Option A: from Python
249
+ python -c "import visual_rag; visual_rag.demo()"
250
+
251
+ # Option B: CLI launcher
252
+ visual-rag-demo
253
+ ```
254
+
255
+ ## πŸ“Š Benchmark Evaluation
256
+
257
+ Run ViDoRe benchmark evaluation:
258
+
259
+ ```bash
260
+ # Example: evaluate a collection against ViDoRe BEIR datasets in Qdrant
261
+ python -m benchmarks.vidore_beir_qdrant.run_qdrant_beir \
262
+ --datasets vidore/esg_reports_v2 vidore/biomedical_lectures_v2 \
263
+ --collection YOUR_COLLECTION \
264
+ --mode two_stage \
265
+ --stage1-mode tokens_vs_experimental \
266
+ --prefetch-k 256 \
267
+ --top-k 100 \
268
+ --evaluation-scope union
269
+ ```
270
+
271
+ More commands (including multi-stage variants and cropping configs) live in:
272
+ - `benchmarks/vidore_tatdqa_test/COMMANDS.md`
273
+
274
+ ## πŸ”§ Development
275
+
276
+ ```bash
277
+ git clone https://github.com/Ara-Yeroyan/visual-rag-toolkit
278
+ cd visual-rag-toolkit
279
+ pip install -e ".[dev]"
280
+ pytest tests/ -v
281
+ ```
282
+
283
+ ## πŸ“„ Citation
284
+
285
+ If you use this toolkit in your research, please cite:
286
+
287
+ ```bibtex
288
+ @software{visual_rag_toolkit,
289
+ title = {Visual RAG Toolkit: Scalable Visual Document Retrieval with Two-Stage Pooling},
290
+ author = {Ara Yeroyan},
291
+ year = {2026},
292
+ url = {https://github.com/Ara-Yeroyan/visual-rag-toolkit}
293
+ }
294
+ ```
295
+
296
+ ## πŸ“ License
297
+
298
+ MIT License - see [LICENSE](LICENSE) for details.
299
+
300
+ ## πŸ™ Acknowledgments
301
+
302
+ - [Qdrant](https://qdrant.tech/) - Vector database with multi-vector support
303
+ - [ColPali](https://github.com/illuin-tech/colpali) - Visual document retrieval models
304
+ - [ViDoRe](https://huggingface.co/spaces/vidore/vidore-leaderboard) - Benchmark dataset
305
+