visual-rag-toolkit 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
demo/indexing.py CHANGED
@@ -1,10 +1,12 @@
1
1
  """Indexing runner with UI updates."""
2
2
 
3
3
  import hashlib
4
+ import importlib.util
4
5
  import json
5
6
  import time
6
7
  import traceback
7
8
  from datetime import datetime
9
+ from pathlib import Path
8
10
  from typing import Any, Dict, Optional
9
11
 
10
12
  import numpy as np
@@ -19,8 +21,35 @@ TORCH_DTYPE_MAP = {
19
21
  "float32": torch.float32,
20
22
  "bfloat16": torch.bfloat16,
21
23
  }
22
- from visual_rag.indexing import QdrantIndexer
23
- from benchmarks.vidore_tatdqa_test.dataset_loader import load_vidore_beir_dataset
24
+
25
+ # --- Robust imports (Spaces-friendly) ---
26
+ # Some environments can have a third-party `benchmarks` package installed, or
27
+ # resolve `visual_rag.indexing` oddly. These fallbacks keep the demo working.
28
+ try:
29
+ from visual_rag.indexing import QdrantIndexer
30
+ except Exception: # pragma: no cover
31
+ from visual_rag.indexing.qdrant_indexer import QdrantIndexer
32
+
33
+
34
+ def _load_local_benchmark_module(module_filename: str):
35
+ root = Path(__file__).resolve().parents[1] # demo/.. = repo root
36
+ target = root / "benchmarks" / "vidore_tatdqa_test" / module_filename
37
+ if not target.exists():
38
+ raise ModuleNotFoundError(f"Missing local benchmark module file: {target}")
39
+ name = f"_visual_rag_toolkit_local_{target.stem}"
40
+ spec = importlib.util.spec_from_file_location(name, str(target))
41
+ if spec is None or spec.loader is None:
42
+ raise ModuleNotFoundError(f"Could not load module spec for: {target}")
43
+ mod = importlib.util.module_from_spec(spec)
44
+ spec.loader.exec_module(mod) # type: ignore[attr-defined]
45
+ return mod
46
+
47
+
48
+ try:
49
+ from benchmarks.vidore_tatdqa_test.dataset_loader import load_vidore_beir_dataset
50
+ except ModuleNotFoundError: # pragma: no cover
51
+ _dl = _load_local_benchmark_module("dataset_loader.py")
52
+ load_vidore_beir_dataset = _dl.load_vidore_beir_dataset
24
53
 
25
54
  from demo.qdrant_utils import get_qdrant_credentials
26
55
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: visual-rag-toolkit
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: End-to-end visual document retrieval with ColPali, featuring two-stage pooling for scalable search
5
5
  Project-URL: Homepage, https://github.com/Ara-Yeroyan/visual-rag-toolkit
6
6
  Project-URL: Documentation, https://github.com/Ara-Yeroyan/visual-rag-toolkit#readme
@@ -85,10 +85,14 @@ Description-Content-Type: text/markdown
85
85
 
86
86
  # Visual RAG Toolkit
87
87
 
88
- [![PyPI version](https://badge.fury.io/py/visual-rag-toolkit.svg)](https://badge.fury.io/py/visual-rag-toolkit)
89
- [![CI](https://github.com/Ara-Yeroyan/visual-rag-toolkit/actions/workflows/ci.yaml/badge.svg)](https://github.com/Ara-Yeroyan/visual-rag-toolkit/actions/workflows/ci.yaml)
90
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
91
- [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
88
+ [![PyPI](https://img.shields.io/pypi/v/visual-rag-toolkit)](https://pypi.org/project/visual-rag-toolkit/)
89
+ [![Python](https://img.shields.io/pypi/pyversions/visual-rag-toolkit)](https://pypi.org/project/visual-rag-toolkit/)
90
+ [![License](https://img.shields.io/pypi/l/visual-rag-toolkit)](LICENSE)
91
+ [![CI](https://img.shields.io/github/actions/workflow/status/Ara-Yeroyan/visual-rag-toolkit/ci.yaml?branch=main)](https://github.com/Ara-Yeroyan/visual-rag-toolkit/actions/workflows/ci.yaml)
92
+
93
+ Note:
94
+ - The **PyPI badge** shows “not found” until the first release is published.
95
+ - The **CI badge** requires the GitHub repo to be **public** (GitHub does not serve Actions badges for private repos).
92
96
 
93
97
  End-to-end visual document retrieval toolkit featuring **fast multi-stage retrieval** (prefetch with pooled vectors + exact MaxSim reranking).
94
98
 
@@ -112,11 +116,10 @@ This repo contains:
112
116
  pip install visual-rag-toolkit
113
117
 
114
118
  # With specific features
115
- pip install visual-rag-toolkit[embedding] # ColSmol/ColPali embedding support
116
- pip install visual-rag-toolkit[pdf] # PDF processing
119
+ pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
117
120
  pip install visual-rag-toolkit[qdrant] # Vector database
121
+ pip install visual-rag-toolkit[embedding] # ColSmol/ColPali embedding support
118
122
  pip install visual-rag-toolkit[cloudinary] # Image CDN
119
- pip install visual-rag-toolkit[ui] # Streamlit demo dependencies
120
123
 
121
124
  # All dependencies
122
125
  pip install visual-rag-toolkit[all]
@@ -157,6 +160,80 @@ for r in results[:3]:
157
160
  print(r["id"], r["score_final"])
158
161
  ```
159
162
 
163
+ ### End-to-end: ingest PDFs (with cropping) → index in Qdrant
164
+
165
+ This is the “SDK-style” pipeline: PDF → images → optional crop → embed → store vectors + payload in Qdrant.
166
+
167
+ ```python
168
+ import os
169
+ from pathlib import Path
170
+
171
+ import numpy as np
172
+ import torch
173
+
174
+ from visual_rag import VisualEmbedder
175
+ from visual_rag.indexing import ProcessingPipeline, QdrantIndexer
176
+
177
+ QDRANT_URL = os.environ["SIGIR_QDRANT_URL"] # or QDRANT_URL
178
+ QDRANT_KEY = os.getenv("SIGIR_QDRANT_KEY", "") # or QDRANT_API_KEY
179
+
180
+ collection = "my_visual_docs"
181
+
182
+ embedder = VisualEmbedder(
183
+ model_name="vidore/colSmol-500M",
184
+ torch_dtype=torch.float16,
185
+ output_dtype=np.float16,
186
+ batch_size=8,
187
+ )
188
+
189
+ indexer = QdrantIndexer(
190
+ url=QDRANT_URL,
191
+ api_key=QDRANT_KEY,
192
+ collection_name=collection,
193
+ prefer_grpc=True,
194
+ vector_datatype="float16",
195
+ )
196
+ indexer.create_collection(force_recreate=False)
197
+
198
+ pipeline = ProcessingPipeline(
199
+ embedder=embedder,
200
+ indexer=indexer,
201
+ embedding_strategy="all", # store full tokens + pooled vectors in one pass
202
+ crop_empty=True,
203
+ crop_empty_percentage_to_remove=0.99, # kept for traceability
204
+ crop_empty_remove_page_number=True,
205
+ crop_empty_preserve_border_px=1,
206
+ crop_empty_uniform_rowcol_std_threshold=3.0,
207
+ )
208
+
209
+ pdfs = [Path("docs/a.pdf"), Path("docs/b.pdf")]
210
+ for pdf_path in pdfs:
211
+ pipeline.process_pdf(
212
+ pdf_path,
213
+ skip_existing=True,
214
+ upload_to_cloudinary=False,
215
+ upload_to_qdrant=True,
216
+ )
217
+ ```
218
+
219
+ CLI equivalent:
220
+
221
+ ```bash
222
+ export SIGIR_QDRANT_URL="https://YOUR_QDRANT"
223
+ export SIGIR_QDRANT_KEY="YOUR_KEY"
224
+
225
+ visual-rag process \
226
+ --reports-dir ./docs \
227
+ --collection my_visual_docs \
228
+ --model vidore/colSmol-500M \
229
+ --strategy all \
230
+ --batch-size 8 \
231
+ --qdrant-vector-dtype float16 \
232
+ --prefer-grpc \
233
+ --crop-empty \
234
+ --crop-empty-remove-page-number
235
+ ```
236
+
160
237
  ### Process a PDF into images (no embedding, no vector DB)
161
238
 
162
239
  ```python
@@ -209,16 +286,11 @@ visual-rag-toolkit/
209
286
  Configure via environment variables or YAML:
210
287
 
211
288
  ```bash
212
- # Qdrant credentials (preferred names used by the demo + scripts)
213
- export SIGIR_QDRANT_URL="https://your-cluster.qdrant.io"
214
- export SIGIR_QDRANT_KEY="your-api-key"
215
289
 
216
- # Backwards-compatible fallbacks (also supported)
290
+ # Qdrant credentials (preferred names used by the demo + scripts)
217
291
  export QDRANT_URL="https://your-cluster.qdrant.io"
218
292
  export QDRANT_API_KEY="your-api-key"
219
293
 
220
- export VISUALRAG_MODEL="vidore/colSmol-500M"
221
-
222
294
  # Special token handling (default: filter them out)
223
295
  export VISUALRAG_INCLUDE_SPECIAL_TOKENS=true # Include special tokens
224
296
  ```
@@ -269,7 +341,7 @@ python -m benchmarks.vidore_beir_qdrant.run_qdrant_beir \
269
341
  ```
270
342
 
271
343
  More commands (including multi-stage variants and cropping configs) live in:
272
- - `benchmarks/vidore_tatdqa_test/COMMANDS.md`
344
+ - `examples/COMMANDS.md`
273
345
 
274
346
  ## 🔧 Development
275
347
 
@@ -6,7 +6,6 @@ benchmarks/prepare_submission.py,sha256=wD9sLWDqkQw_OANmVOdwe7OQlv4ZVf4sTQiQs7La
6
6
  benchmarks/quick_test.py,sha256=Mdcf2FNYSqWpYVfCmQLQzUVWLG-FiKUnyHyHKnAR3z4,20531
7
7
  benchmarks/run_vidore.py,sha256=RuDaEJ0wIV-hLHRtcd8PsRGOEEUFYDcrjUlor-HAajc,16373
8
8
  benchmarks/vidore_beir_qdrant/run_qdrant_beir.py,sha256=0lqIA6Qv53CreJpOg-h48sl4c8m7c_pVoQCp-oscnG0,56715
9
- benchmarks/vidore_tatdqa_test/COMMANDS.md,sha256=lhobkqHLZJjIPE-Lo3VuBuKh5XpbT2WS_sK-6dasPcE,1890
10
9
  benchmarks/vidore_tatdqa_test/__init__.py,sha256=WZiwKx8BGNuc0-oz1V3yiq8m_gWc5woEWy-WGb4F14E,18
11
10
  benchmarks/vidore_tatdqa_test/dataset_loader.py,sha256=gCCneGAKWQm0WlJHLvGjoMrAbm5b9cPEflkoMimtA2s,12795
12
11
  benchmarks/vidore_tatdqa_test/metrics.py,sha256=cLdYbRt5VcxInO1cN79ve6ZLP3kaSxRkdzRX3IbPPMs,1112
@@ -19,7 +18,7 @@ demo/config.py,sha256=BNkV4NSEEMIV9e6Z-cxds2v247uVmTPCgL-M5ItPzMg,757
19
18
  demo/download_models.py,sha256=J10qQt2TpEshVOxvCX_ZSbV7YozIBqDATZnt8fUKFHs,2868
20
19
  demo/evaluation.py,sha256=wiVxzRu3UZ5wAwHlpSKQ6srZjnSR06dgQw3G0OOV2Eg,28954
21
20
  demo/example_metadata_mapping_sigir.json,sha256=UCgqZtr6Wnq_vS7zxPxpvuokk9gxOVgKydC7f1lauw8,824
22
- demo/indexing.py,sha256=NLtGYnuCCb3uHGCgs8KHlLqKR-FSD6sxW3PlEw9UhYM,12853
21
+ demo/indexing.py,sha256=u80FSQo5ahHfry7fQNWa3kvKDngURRkLhaze3zWro1o,14077
23
22
  demo/qdrant_utils.py,sha256=VWEC7BwhMjjB7iIS5iaVDMGt_CMh9mQG4F94k1Pt0yA,7677
24
23
  demo/results.py,sha256=dprvxnyHwxJvkAQuh4deaCsiEG1wm0n9svPyxI37vJg,1050
25
24
  demo/test_qdrant_connection.py,sha256=hkbyl3zGsw_GdBBp5MkW_3SBKTHXbwH3Sr_pUE54_po,3866
@@ -52,8 +51,8 @@ visual_rag/retrieval/three_stage.py,sha256=YC0CVEohxTT5zhilcQHI7nYAk08E5jC3zkQ3-
52
51
  visual_rag/retrieval/two_stage.py,sha256=_RnEgIx_qY4yu2iIk0a3w47D7WiKHlmBivm5gLEpyI4,16779
53
52
  visual_rag/visualization/__init__.py,sha256=SITKNvBEseDp7F3K6UzLPA-6OQFqYfY5azS5nlDdihQ,447
54
53
  visual_rag/visualization/saliency.py,sha256=F3Plc18Sf3tzWcyncuaruTmENm1IfW5j9NFGEQR93cY,11248
55
- visual_rag_toolkit-0.1.1.dist-info/METADATA,sha256=SL55eEexz2ogZPD5q-gfzpF2TVZ_U1ZwykPlHaggEdU,11070
56
- visual_rag_toolkit-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
57
- visual_rag_toolkit-0.1.1.dist-info/entry_points.txt,sha256=6Tob1GPg_ILGELjYTPsAnNMZ1W0NS939nfI7xyW2DIY,102
58
- visual_rag_toolkit-0.1.1.dist-info/licenses/LICENSE,sha256=hEg_weKnHXJakQRR3sw2ygcZ101zCI00zMhBOPb3yfA,1069
59
- visual_rag_toolkit-0.1.1.dist-info/RECORD,,
54
+ visual_rag_toolkit-0.1.2.dist-info/METADATA,sha256=LrZ-EUezUsmUJpnNofm5TTM5IJIUerznFDcBSucI7rc,12830
55
+ visual_rag_toolkit-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
56
+ visual_rag_toolkit-0.1.2.dist-info/entry_points.txt,sha256=6Tob1GPg_ILGELjYTPsAnNMZ1W0NS939nfI7xyW2DIY,102
57
+ visual_rag_toolkit-0.1.2.dist-info/licenses/LICENSE,sha256=hEg_weKnHXJakQRR3sw2ygcZ101zCI00zMhBOPb3yfA,1069
58
+ visual_rag_toolkit-0.1.2.dist-info/RECORD,,
@@ -1,83 +0,0 @@
1
- # ViDoRe TAT-DQA (Qdrant) — commands
2
-
3
- ## Environment
4
-
5
- Either export:
6
-
7
- ```bash
8
- export QDRANT_URL="..."
9
- export QDRANT_API_KEY="..." # optional
10
- ```
11
-
12
- Or create a `.env` file in `visual-rag-toolkit/` with the same variables.
13
-
14
- ## Index + evaluate (single run)
15
-
16
- This is the “all-in-one” script (indexes, then evaluates once):
17
-
18
- ```bash
19
- python -m benchmarks.vidore_tatdqa_test.run_qdrant \
20
- --dataset vidore/tatdqa_test \
21
- --collection vidore_tatdqa_test \
22
- --recreate --index \
23
- --indexing-threshold 0 \
24
- --batch-size 6 \
25
- --upload-batch-size 12 \
26
- --upload-workers 0 \
27
- --loader-workers 0 \
28
- --prefer-grpc \
29
- --torch-dtype float16 \
30
- --no-upsert-wait \
31
- --qdrant-vector-dtype float16
32
- ```
33
-
34
- ## Evaluate only (no re-index) — baseline + sweeps
35
-
36
- These commands assume the Qdrant collection already exists and is populated.
37
-
38
- ### Baseline: single-stage full MaxSim
39
-
40
- ```bash
41
- python -m benchmarks.vidore_tatdqa_test.sweep_eval \
42
- --dataset vidore/tatdqa_test \
43
- --collection vidore_tatdqa_test \
44
- --prefer-grpc \
45
- --mode single_full \
46
- --torch-dtype auto \
47
- --query-batch-size 32 \
48
- --top-k 10 \
49
- --out-dir results/sweeps
50
- ```
51
-
52
- ### Two-stage sweep (preferred): stage-1 tokens vs tiles, stage-2 full rerank
53
-
54
- ```bash
55
- python -m benchmarks.vidore_tatdqa_test.sweep_eval \
56
- --dataset vidore/tatdqa_test \
57
- --collection vidore_tatdqa_test \
58
- --prefer-grpc \
59
- --mode two_stage \
60
- --stage1-mode tokens_vs_tiles \
61
- --prefetch-ks 20,50,100,200,400 \
62
- --torch-dtype auto \
63
- --query-batch-size 32 \
64
- --top-k 10 \
65
- --out-dir results/sweeps
66
- ```
67
-
68
- ### Smoke test (optional): run only N queries
69
-
70
- ```bash
71
- python -m benchmarks.vidore_tatdqa_test.sweep_eval \
72
- --dataset vidore/tatdqa_test \
73
- --collection vidore_tatdqa_test \
74
- --prefer-grpc \
75
- --mode single_full \
76
- --torch-dtype auto \
77
- --query-batch-size 32 \
78
- --top-k 10 \
79
- --max-queries 50 \
80
- --out-dir results/sweeps
81
- ```
82
-
83
-