visual-rag-toolkit 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- visual_rag_toolkit-0.1.1/.github/workflows/ci.yaml +94 -0
- visual_rag_toolkit-0.1.1/.github/workflows/publish_pypi.yaml +42 -0
- visual_rag_toolkit-0.1.1/.gitignore +67 -0
- visual_rag_toolkit-0.1.1/LICENSE +22 -0
- visual_rag_toolkit-0.1.1/PKG-INFO +305 -0
- visual_rag_toolkit-0.1.1/README.md +220 -0
- visual_rag_toolkit-0.1.1/benchmarks/README.md +101 -0
- visual_rag_toolkit-0.1.1/benchmarks/__init__.py +11 -0
- visual_rag_toolkit-0.1.1/benchmarks/analyze_results.py +187 -0
- visual_rag_toolkit-0.1.1/benchmarks/benchmark_datasets.txt +105 -0
- visual_rag_toolkit-0.1.1/benchmarks/prepare_submission.py +205 -0
- visual_rag_toolkit-0.1.1/benchmarks/quick_test.py +566 -0
- visual_rag_toolkit-0.1.1/benchmarks/run_vidore.py +513 -0
- visual_rag_toolkit-0.1.1/benchmarks/vidore_beir_qdrant/run_qdrant_beir.py +1365 -0
- visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/COMMANDS.md +83 -0
- visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/__init__.py +6 -0
- visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/dataset_loader.py +363 -0
- visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/metrics.py +44 -0
- visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/run_qdrant.py +799 -0
- visual_rag_toolkit-0.1.1/benchmarks/vidore_tatdqa_test/sweep_eval.py +372 -0
- visual_rag_toolkit-0.1.1/demo/__init__.py +10 -0
- visual_rag_toolkit-0.1.1/demo/app.py +45 -0
- visual_rag_toolkit-0.1.1/demo/commands.py +334 -0
- visual_rag_toolkit-0.1.1/demo/config.py +34 -0
- visual_rag_toolkit-0.1.1/demo/download_models.py +75 -0
- visual_rag_toolkit-0.1.1/demo/evaluation.py +602 -0
- visual_rag_toolkit-0.1.1/demo/example_metadata_mapping_sigir.json +37 -0
- visual_rag_toolkit-0.1.1/demo/indexing.py +286 -0
- visual_rag_toolkit-0.1.1/demo/qdrant_utils.py +211 -0
- visual_rag_toolkit-0.1.1/demo/results.py +35 -0
- visual_rag_toolkit-0.1.1/demo/test_qdrant_connection.py +119 -0
- visual_rag_toolkit-0.1.1/demo/ui/__init__.py +15 -0
- visual_rag_toolkit-0.1.1/demo/ui/benchmark.py +355 -0
- visual_rag_toolkit-0.1.1/demo/ui/header.py +30 -0
- visual_rag_toolkit-0.1.1/demo/ui/playground.py +339 -0
- visual_rag_toolkit-0.1.1/demo/ui/sidebar.py +162 -0
- visual_rag_toolkit-0.1.1/demo/ui/upload.py +487 -0
- visual_rag_toolkit-0.1.1/examples/config.yaml +83 -0
- visual_rag_toolkit-0.1.1/examples/process_pdfs.py +196 -0
- visual_rag_toolkit-0.1.1/examples/search_demo.py +161 -0
- visual_rag_toolkit-0.1.1/pyproject.toml +153 -0
- visual_rag_toolkit-0.1.1/requirements.txt +25 -0
- visual_rag_toolkit-0.1.1/tests/__init__.py +8 -0
- visual_rag_toolkit-0.1.1/tests/test_config.py +116 -0
- visual_rag_toolkit-0.1.1/tests/test_pdf_processor.py +132 -0
- visual_rag_toolkit-0.1.1/tests/test_pooling.py +199 -0
- visual_rag_toolkit-0.1.1/tests/test_strategies.py +101 -0
- visual_rag_toolkit-0.1.1/visual_rag/__init__.py +98 -0
- visual_rag_toolkit-0.1.1/visual_rag/cli/__init__.py +1 -0
- visual_rag_toolkit-0.1.1/visual_rag/cli/main.py +629 -0
- visual_rag_toolkit-0.1.1/visual_rag/config.py +230 -0
- visual_rag_toolkit-0.1.1/visual_rag/demo_runner.py +90 -0
- visual_rag_toolkit-0.1.1/visual_rag/embedding/__init__.py +26 -0
- visual_rag_toolkit-0.1.1/visual_rag/embedding/pooling.py +343 -0
- visual_rag_toolkit-0.1.1/visual_rag/embedding/visual_embedder.py +622 -0
- visual_rag_toolkit-0.1.1/visual_rag/indexing/__init__.py +21 -0
- visual_rag_toolkit-0.1.1/visual_rag/indexing/cloudinary_uploader.py +274 -0
- visual_rag_toolkit-0.1.1/visual_rag/indexing/pdf_processor.py +324 -0
- visual_rag_toolkit-0.1.1/visual_rag/indexing/pipeline.py +628 -0
- visual_rag_toolkit-0.1.1/visual_rag/indexing/qdrant_indexer.py +478 -0
- visual_rag_toolkit-0.1.1/visual_rag/preprocessing/__init__.py +3 -0
- visual_rag_toolkit-0.1.1/visual_rag/preprocessing/crop_empty.py +120 -0
- visual_rag_toolkit-0.1.1/visual_rag/qdrant_admin.py +222 -0
- visual_rag_toolkit-0.1.1/visual_rag/retrieval/__init__.py +19 -0
- visual_rag_toolkit-0.1.1/visual_rag/retrieval/multi_vector.py +222 -0
- visual_rag_toolkit-0.1.1/visual_rag/retrieval/single_stage.py +126 -0
- visual_rag_toolkit-0.1.1/visual_rag/retrieval/three_stage.py +173 -0
- visual_rag_toolkit-0.1.1/visual_rag/retrieval/two_stage.py +471 -0
- visual_rag_toolkit-0.1.1/visual_rag/visualization/__init__.py +19 -0
- visual_rag_toolkit-0.1.1/visual_rag/visualization/saliency.py +335 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Modern CI/CD with GitHub Actions
|
|
2
|
+
# Replaces legacy travis.yml
|
|
3
|
+
name: CI
|
|
4
|
+
|
|
5
|
+
on:
|
|
6
|
+
push:
|
|
7
|
+
branches: [main, develop]
|
|
8
|
+
pull_request:
|
|
9
|
+
branches: [main]
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
lint:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Set up Python
|
|
18
|
+
uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: "3.11"
|
|
21
|
+
|
|
22
|
+
- name: Install linting tools
|
|
23
|
+
run: pip install ruff black mypy
|
|
24
|
+
|
|
25
|
+
- name: Run ruff
|
|
26
|
+
run: ruff check visual_rag/
|
|
27
|
+
|
|
28
|
+
- name: Run black --check
|
|
29
|
+
run: black --check visual_rag/
|
|
30
|
+
|
|
31
|
+
test:
|
|
32
|
+
runs-on: ${{ matrix.os }}
|
|
33
|
+
strategy:
|
|
34
|
+
fail-fast: false
|
|
35
|
+
matrix:
|
|
36
|
+
os: [ubuntu-latest, macos-latest]
|
|
37
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
|
38
|
+
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v4
|
|
41
|
+
|
|
42
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
43
|
+
uses: actions/setup-python@v5
|
|
44
|
+
with:
|
|
45
|
+
python-version: ${{ matrix.python-version }}
|
|
46
|
+
|
|
47
|
+
- name: Install dependencies
|
|
48
|
+
run: |
|
|
49
|
+
python -m pip install --upgrade pip
|
|
50
|
+
pip install -e ".[dev]"
|
|
51
|
+
|
|
52
|
+
- name: Run tests
|
|
53
|
+
run: pytest tests/ -v --cov=visual_rag --cov-report=xml
|
|
54
|
+
|
|
55
|
+
- name: Upload coverage
|
|
56
|
+
uses: codecov/codecov-action@v4
|
|
57
|
+
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11'
|
|
58
|
+
with:
|
|
59
|
+
file: ./coverage.xml
|
|
60
|
+
|
|
61
|
+
# GPU tests (optional, needs self-hosted runner)
|
|
62
|
+
# test-gpu:
|
|
63
|
+
# runs-on: [self-hosted, gpu]
|
|
64
|
+
# steps:
|
|
65
|
+
# - uses: actions/checkout@v4
|
|
66
|
+
# - name: Run GPU tests
|
|
67
|
+
# run: pytest tests/gpu/ -v
|
|
68
|
+
|
|
69
|
+
build:
|
|
70
|
+
runs-on: ubuntu-latest
|
|
71
|
+
needs: [lint, test]
|
|
72
|
+
|
|
73
|
+
steps:
|
|
74
|
+
- uses: actions/checkout@v4
|
|
75
|
+
|
|
76
|
+
- name: Set up Python
|
|
77
|
+
uses: actions/setup-python@v5
|
|
78
|
+
with:
|
|
79
|
+
python-version: "3.11"
|
|
80
|
+
|
|
81
|
+
- name: Install build tools
|
|
82
|
+
run: pip install build twine
|
|
83
|
+
|
|
84
|
+
- name: Build package
|
|
85
|
+
run: python -m build
|
|
86
|
+
|
|
87
|
+
- name: Check distribution
|
|
88
|
+
run: twine check dist/*
|
|
89
|
+
|
|
90
|
+
- name: Upload artifacts
|
|
91
|
+
uses: actions/upload-artifact@v4
|
|
92
|
+
with:
|
|
93
|
+
name: dist
|
|
94
|
+
path: dist/
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
workflow_dispatch: {}
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
id-token: write # required for PyPI trusted publishing (OIDC)
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
build-and-publish:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
steps:
|
|
17
|
+
- name: Checkout
|
|
18
|
+
uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Set up Python
|
|
21
|
+
uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.11"
|
|
24
|
+
|
|
25
|
+
- name: Install build tools
|
|
26
|
+
run: |
|
|
27
|
+
python -m pip install --upgrade pip
|
|
28
|
+
pip install build twine
|
|
29
|
+
|
|
30
|
+
- name: Build package
|
|
31
|
+
run: python -m build
|
|
32
|
+
|
|
33
|
+
- name: Check distribution
|
|
34
|
+
run: twine check dist/*
|
|
35
|
+
|
|
36
|
+
- name: Publish to PyPI
|
|
37
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
38
|
+
with:
|
|
39
|
+
# Uses trusted publishing (no API token) once configured on PyPI:
|
|
40
|
+
# PyPI β Your project β Settings β Publishing β Add GitHub publisher
|
|
41
|
+
skip-existing: true
|
|
42
|
+
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.egg
|
|
22
|
+
|
|
23
|
+
# Virtual environments
|
|
24
|
+
venv/
|
|
25
|
+
env/
|
|
26
|
+
ENV/
|
|
27
|
+
|
|
28
|
+
# IDE
|
|
29
|
+
.vscode/
|
|
30
|
+
.idea/
|
|
31
|
+
*.swp
|
|
32
|
+
*.swo
|
|
33
|
+
*~
|
|
34
|
+
|
|
35
|
+
# OS
|
|
36
|
+
.DS_Store
|
|
37
|
+
Thumbs.db
|
|
38
|
+
|
|
39
|
+
# Project specific
|
|
40
|
+
checkpoints/
|
|
41
|
+
results/
|
|
42
|
+
*.pkl
|
|
43
|
+
*.pickle
|
|
44
|
+
|
|
45
|
+
# Environment variables
|
|
46
|
+
.env
|
|
47
|
+
.env.local
|
|
48
|
+
|
|
49
|
+
# Test coverage
|
|
50
|
+
.coverage
|
|
51
|
+
htmlcov/
|
|
52
|
+
.pytest_cache/
|
|
53
|
+
|
|
54
|
+
# Jupyter
|
|
55
|
+
.ipynb_checkpoints/
|
|
56
|
+
|
|
57
|
+
# MyPy
|
|
58
|
+
.mypy_cache/
|
|
59
|
+
.dmypy.json
|
|
60
|
+
dmypy.json
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ara Yeroyan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: visual-rag-toolkit
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: End-to-end visual document retrieval with ColPali, featuring two-stage pooling for scalable search
|
|
5
|
+
Project-URL: Homepage, https://github.com/Ara-Yeroyan/visual-rag-toolkit
|
|
6
|
+
Project-URL: Documentation, https://github.com/Ara-Yeroyan/visual-rag-toolkit#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/Ara-Yeroyan/visual-rag-toolkit
|
|
8
|
+
Project-URL: Issues, https://github.com/Ara-Yeroyan/visual-rag-toolkit/issues
|
|
9
|
+
Author: Visual RAG Team
|
|
10
|
+
License: MIT License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2026 Ara Yeroyan
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
16
|
+
in the Software without restriction, including without limitation the rights
|
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
19
|
+
furnished to do so, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all
|
|
22
|
+
copies or substantial portions of the Software.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
31
|
+
|
|
32
|
+
License-File: LICENSE
|
|
33
|
+
Keywords: colbert,colpali,document-retrieval,late-interaction,multimodal-rag,pdf-processing,qdrant,visual-rag,visual-search
|
|
34
|
+
Classifier: Development Status :: 4 - Beta
|
|
35
|
+
Classifier: Intended Audience :: Developers
|
|
36
|
+
Classifier: Intended Audience :: Science/Research
|
|
37
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
38
|
+
Classifier: Programming Language :: Python :: 3
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
41
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
42
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
43
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
44
|
+
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
|
45
|
+
Requires-Python: >=3.9
|
|
46
|
+
Requires-Dist: numpy>=1.21.0
|
|
47
|
+
Requires-Dist: pillow>=9.0.0
|
|
48
|
+
Requires-Dist: python-dotenv>=0.19.0
|
|
49
|
+
Requires-Dist: pyyaml>=6.0
|
|
50
|
+
Requires-Dist: torch>=2.0.0
|
|
51
|
+
Requires-Dist: tqdm>=4.60.0
|
|
52
|
+
Provides-Extra: all
|
|
53
|
+
Requires-Dist: altair>=5.0.0; extra == 'all'
|
|
54
|
+
Requires-Dist: cloudinary>=1.30.0; extra == 'all'
|
|
55
|
+
Requires-Dist: colpali-engine>=0.3.0; extra == 'all'
|
|
56
|
+
Requires-Dist: httpx>=0.24.0; extra == 'all'
|
|
57
|
+
Requires-Dist: pandas>=2.0.0; extra == 'all'
|
|
58
|
+
Requires-Dist: pdf2image>=1.16.0; extra == 'all'
|
|
59
|
+
Requires-Dist: pypdf>=3.0.0; extra == 'all'
|
|
60
|
+
Requires-Dist: qdrant-client>=1.7.0; extra == 'all'
|
|
61
|
+
Requires-Dist: streamlit>=1.25.0; extra == 'all'
|
|
62
|
+
Requires-Dist: transformers>=4.35.0; extra == 'all'
|
|
63
|
+
Provides-Extra: cloudinary
|
|
64
|
+
Requires-Dist: cloudinary>=1.30.0; extra == 'cloudinary'
|
|
65
|
+
Provides-Extra: dev
|
|
66
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
67
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
68
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
69
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
70
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
71
|
+
Provides-Extra: embedding
|
|
72
|
+
Requires-Dist: colpali-engine>=0.3.0; extra == 'embedding'
|
|
73
|
+
Requires-Dist: transformers>=4.35.0; extra == 'embedding'
|
|
74
|
+
Provides-Extra: pdf
|
|
75
|
+
Requires-Dist: pdf2image>=1.16.0; extra == 'pdf'
|
|
76
|
+
Requires-Dist: pypdf>=3.0.0; extra == 'pdf'
|
|
77
|
+
Provides-Extra: qdrant
|
|
78
|
+
Requires-Dist: qdrant-client>=1.7.0; extra == 'qdrant'
|
|
79
|
+
Provides-Extra: ui
|
|
80
|
+
Requires-Dist: altair>=5.0.0; extra == 'ui'
|
|
81
|
+
Requires-Dist: httpx>=0.24.0; extra == 'ui'
|
|
82
|
+
Requires-Dist: pandas>=2.0.0; extra == 'ui'
|
|
83
|
+
Requires-Dist: streamlit>=1.25.0; extra == 'ui'
|
|
84
|
+
Description-Content-Type: text/markdown
|
|
85
|
+
|
|
86
|
+
# Visual RAG Toolkit
|
|
87
|
+
|
|
88
|
+
[](https://badge.fury.io/py/visual-rag-toolkit)
|
|
89
|
+
[](https://github.com/Ara-Yeroyan/visual-rag-toolkit/actions/workflows/ci.yaml)
|
|
90
|
+
[](https://opensource.org/licenses/MIT)
|
|
91
|
+
[](https://www.python.org/downloads/)
|
|
92
|
+
|
|
93
|
+
End-to-end visual document retrieval toolkit featuring **fast multi-stage retrieval** (prefetch with pooled vectors + exact MaxSim reranking).
|
|
94
|
+
|
|
95
|
+
This repo contains:
|
|
96
|
+
- a **Python package** (`visual_rag`)
|
|
97
|
+
- a **Streamlit demo app** (`demo/`)
|
|
98
|
+
- **benchmark & evaluation scripts** for ViDoRe v2 (`benchmarks/`)
|
|
99
|
+
|
|
100
|
+
## π― Key Features
|
|
101
|
+
|
|
102
|
+
- **Modular**: PDF β images, embedding, Qdrant indexing, retrieval can be used independently.
|
|
103
|
+
- **Multi-stage retrieval**: two-stage and three-stage retrieval modes built for Qdrant named vectors.
|
|
104
|
+
- **Model-aware embedding**: ColSmol + ColPali support behind a single `VisualEmbedder` interface.
|
|
105
|
+
- **Token hygiene**: query special-token filtering by default for more stable MaxSim behavior.
|
|
106
|
+
- **Practical pipelines**: robust indexing, retries, optional Cloudinary image URLs, evaluation reporting.
|
|
107
|
+
|
|
108
|
+
## π¦ Installation
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
# Core package (minimal dependencies)
|
|
112
|
+
pip install visual-rag-toolkit
|
|
113
|
+
|
|
114
|
+
# With specific features
|
|
115
|
+
pip install visual-rag-toolkit[embedding] # ColSmol/ColPali embedding support
|
|
116
|
+
pip install visual-rag-toolkit[pdf] # PDF processing
|
|
117
|
+
pip install visual-rag-toolkit[qdrant] # Vector database
|
|
118
|
+
pip install visual-rag-toolkit[cloudinary] # Image CDN
|
|
119
|
+
pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
|
|
120
|
+
|
|
121
|
+
# All dependencies
|
|
122
|
+
pip install visual-rag-toolkit[all]
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### System dependencies (PDF)
|
|
126
|
+
|
|
127
|
+
`pdf2image` requires Poppler.
|
|
128
|
+
|
|
129
|
+
- macOS: `brew install poppler`
|
|
130
|
+
- Ubuntu/Debian: `sudo apt-get update && sudo apt-get install -y poppler-utils`
|
|
131
|
+
|
|
132
|
+
## π Quick Start
|
|
133
|
+
|
|
134
|
+
### Minimal: embed a query and run two-stage search (server-side)
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from qdrant_client import QdrantClient
|
|
138
|
+
from visual_rag import VisualEmbedder, TwoStageRetriever
|
|
139
|
+
|
|
140
|
+
client = QdrantClient(url="https://YOUR_QDRANT", api_key="YOUR_KEY")
|
|
141
|
+
collection_name = "your_collection"
|
|
142
|
+
|
|
143
|
+
# Embed query tokens
|
|
144
|
+
embedder = VisualEmbedder(model_name="vidore/colpali-v1.3")
|
|
145
|
+
q = embedder.embed_query("What is the budget allocation?")
|
|
146
|
+
|
|
147
|
+
# Fast path: all stages computed in Qdrant (prefetch + exact rerank)
|
|
148
|
+
retriever = TwoStageRetriever(client, collection_name)
|
|
149
|
+
results = retriever.search_server_side(
|
|
150
|
+
query_embedding=q,
|
|
151
|
+
top_k=10,
|
|
152
|
+
prefetch_k=256,
|
|
153
|
+
stage1_mode="tokens_vs_experimental", # or: tokens_vs_tiles / pooled_query_vs_tiles / pooled_query_vs_global
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
for r in results[:3]:
|
|
157
|
+
print(r["id"], r["score_final"])
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Process a PDF into images (no embedding, no vector DB)
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
from pathlib import Path
|
|
164
|
+
from visual_rag import PDFProcessor
|
|
165
|
+
|
|
166
|
+
processor = PDFProcessor(dpi=140)
|
|
167
|
+
images, texts = processor.process_pdf(Path("report.pdf"))
|
|
168
|
+
print(len(images), "pages")
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## π¬ Multi-stage Retrieval (Two-stage / Three-stage)
|
|
172
|
+
|
|
173
|
+
Traditional ColBERT-style MaxSim scoring compares all query tokens vs all document tokens, which becomes expensive at scale.
|
|
174
|
+
|
|
175
|
+
**Our approach:**
|
|
176
|
+
|
|
177
|
+
```
|
|
178
|
+
Stage 1: Fast prefetch with tile-level pooled vectors
|
|
179
|
+
βββ Pool each tile (64 patches) β num_tiles vectors
|
|
180
|
+
βββ Use HNSW index for O(log N) retrieval
|
|
181
|
+
βββ Retrieve top-K candidates (e.g., 200)
|
|
182
|
+
|
|
183
|
+
Stage 2: Exact MaxSim reranking on candidates
|
|
184
|
+
βββ Load full multi-vector embeddings
|
|
185
|
+
βββ Compute exact ColBERT MaxSim scores
|
|
186
|
+
βββ Return top-k results (e.g., 10)
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Three-stage extends this with an additional βcheap prefetchβ stage before stage 2.
|
|
190
|
+
|
|
191
|
+
## π Package Structure
|
|
192
|
+
|
|
193
|
+
```
|
|
194
|
+
visual-rag-toolkit/
|
|
195
|
+
βββ visual_rag/ # Import as: from visual_rag import ...
|
|
196
|
+
β βββ embedding/ # VisualEmbedder, pooling functions
|
|
197
|
+
β βββ indexing/ # PDFProcessor, QdrantIndexer, CloudinaryUploader
|
|
198
|
+
β βββ retrieval/ # TwoStageRetriever
|
|
199
|
+
β βββ visualization/ # Saliency maps
|
|
200
|
+
β βββ cli/ # Command-line: visual-rag process/search
|
|
201
|
+
β βββ config.py # load_config, get, get_section
|
|
202
|
+
β
|
|
203
|
+
βββ benchmarks/ # ViDoRe evaluation scripts
|
|
204
|
+
βββ examples/ # Usage examples
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## βοΈ Configuration
|
|
208
|
+
|
|
209
|
+
Configure via environment variables or YAML:
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
# Qdrant credentials (preferred names used by the demo + scripts)
|
|
213
|
+
export SIGIR_QDRANT_URL="https://your-cluster.qdrant.io"
|
|
214
|
+
export SIGIR_QDRANT_KEY="your-api-key"
|
|
215
|
+
|
|
216
|
+
# Backwards-compatible fallbacks (also supported)
|
|
217
|
+
export QDRANT_URL="https://your-cluster.qdrant.io"
|
|
218
|
+
export QDRANT_API_KEY="your-api-key"
|
|
219
|
+
|
|
220
|
+
export VISUALRAG_MODEL="vidore/colSmol-500M"
|
|
221
|
+
|
|
222
|
+
# Special token handling (default: filter them out)
|
|
223
|
+
export VISUALRAG_INCLUDE_SPECIAL_TOKENS=true # Include special tokens
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
Or use a config file (`visual_rag.yaml`):
|
|
227
|
+
|
|
228
|
+
```yaml
|
|
229
|
+
model:
|
|
230
|
+
name: "vidore/colSmol-500M"
|
|
231
|
+
batch_size: 4
|
|
232
|
+
|
|
233
|
+
qdrant:
|
|
234
|
+
url: "https://your-cluster.qdrant.io"
|
|
235
|
+
collection: "my_documents"
|
|
236
|
+
|
|
237
|
+
search:
|
|
238
|
+
strategy: "two_stage" # or "multi_vector", "pooled"
|
|
239
|
+
prefetch_k: 200
|
|
240
|
+
top_k: 10
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## π₯οΈ Demo (Streamlit)
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
pip install "visual-rag-toolkit[ui,qdrant,embedding,pdf]"
|
|
247
|
+
|
|
248
|
+
# Option A: from Python
|
|
249
|
+
python -c "import visual_rag; visual_rag.demo()"
|
|
250
|
+
|
|
251
|
+
# Option B: CLI launcher
|
|
252
|
+
visual-rag-demo
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
## π Benchmark Evaluation
|
|
256
|
+
|
|
257
|
+
Run ViDoRe benchmark evaluation:
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
# Example: evaluate a collection against ViDoRe BEIR datasets in Qdrant
|
|
261
|
+
python -m benchmarks.vidore_beir_qdrant.run_qdrant_beir \
|
|
262
|
+
--datasets vidore/esg_reports_v2 vidore/biomedical_lectures_v2 \
|
|
263
|
+
--collection YOUR_COLLECTION \
|
|
264
|
+
--mode two_stage \
|
|
265
|
+
--stage1-mode tokens_vs_experimental \
|
|
266
|
+
--prefetch-k 256 \
|
|
267
|
+
--top-k 100 \
|
|
268
|
+
--evaluation-scope union
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
More commands (including multi-stage variants and cropping configs) live in:
|
|
272
|
+
- `benchmarks/vidore_tatdqa_test/COMMANDS.md`
|
|
273
|
+
|
|
274
|
+
## π§ Development
|
|
275
|
+
|
|
276
|
+
```bash
|
|
277
|
+
git clone https://github.com/Ara-Yeroyan/visual-rag-toolkit
|
|
278
|
+
cd visual-rag-toolkit
|
|
279
|
+
pip install -e ".[dev]"
|
|
280
|
+
pytest tests/ -v
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
## π Citation
|
|
284
|
+
|
|
285
|
+
If you use this toolkit in your research, please cite:
|
|
286
|
+
|
|
287
|
+
```bibtex
|
|
288
|
+
@software{visual_rag_toolkit,
|
|
289
|
+
title = {Visual RAG Toolkit: Scalable Visual Document Retrieval with Two-Stage Pooling},
|
|
290
|
+
author = {Ara Yeroyan},
|
|
291
|
+
year = {2026},
|
|
292
|
+
url = {https://github.com/Ara-Yeroyan/visual-rag-toolkit}
|
|
293
|
+
}
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
## π License
|
|
297
|
+
|
|
298
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
299
|
+
|
|
300
|
+
## π Acknowledgments
|
|
301
|
+
|
|
302
|
+
- [Qdrant](https://qdrant.tech/) - Vector database with multi-vector support
|
|
303
|
+
- [ColPali](https://github.com/illuin-tech/colpali) - Visual document retrieval models
|
|
304
|
+
- [ViDoRe](https://huggingface.co/spaces/vidore/vidore-leaderboard) - Benchmark dataset
|
|
305
|
+
|