visual-rag-toolkit 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,25 @@ import os
2
2
  from typing import Any, Dict, List, Optional
3
3
  from urllib.parse import urlparse
4
4
 
5
+ import numpy as np
6
+ import torch
7
+
8
+ try:
9
+ from dotenv import load_dotenv
10
+
11
+ DOTENV_AVAILABLE = True
12
+ except ImportError:
13
+ DOTENV_AVAILABLE = False
14
+ load_dotenv = None
15
+
16
+ try:
17
+ from qdrant_client import QdrantClient
18
+
19
+ QDRANT_AVAILABLE = True
20
+ except ImportError:
21
+ QDRANT_AVAILABLE = False
22
+ QdrantClient = None
23
+
5
24
  from visual_rag.embedding.visual_embedder import VisualEmbedder
6
25
  from visual_rag.retrieval.single_stage import SingleStageRetriever
7
26
  from visual_rag.retrieval.three_stage import ThreeStageRetriever
@@ -11,9 +30,7 @@ from visual_rag.retrieval.two_stage import TwoStageRetriever
11
30
  class MultiVectorRetriever:
12
31
  @staticmethod
13
32
  def _maybe_load_dotenv() -> None:
14
- try:
15
- from dotenv import load_dotenv
16
- except ImportError:
33
+ if not DOTENV_AVAILABLE:
17
34
  return
18
35
  if os.path.exists(".env"):
19
36
  load_dotenv(".env")
@@ -33,87 +50,83 @@ class MultiVectorRetriever:
33
50
  ):
34
51
  if qdrant_client is None:
35
52
  self._maybe_load_dotenv()
36
- try:
37
- from qdrant_client import QdrantClient
38
- except ImportError as e:
53
+ if not QDRANT_AVAILABLE:
39
54
  raise ImportError(
40
55
  "Qdrant client not installed. Install with: pip install visual-rag-toolkit[qdrant]"
41
- ) from e
56
+ )
42
57
 
43
58
  qdrant_url = (
44
- qdrant_url
45
- or os.getenv("SIGIR_QDRANT_URL")
46
- or os.getenv("DEST_QDRANT_URL")
47
- or os.getenv("QDRANT_URL")
59
+ qdrant_url or os.getenv("QDRANT_URL") or os.getenv("SIGIR_QDRANT_URL") # legacy
48
60
  )
49
61
  if not qdrant_url:
50
- raise ValueError(
51
- "QDRANT_URL is required (pass qdrant_url or set env var). "
52
- "You can also set DEST_QDRANT_URL to override."
53
- )
62
+ raise ValueError("QDRANT_URL is required (pass qdrant_url or set env var).")
54
63
 
55
64
  qdrant_api_key = (
56
65
  qdrant_api_key
57
- or os.getenv("SIGIR_QDRANT_KEY")
58
- or os.getenv("SIGIR_QDRANT_API_KEY")
59
- or os.getenv("DEST_QDRANT_API_KEY")
60
66
  or os.getenv("QDRANT_API_KEY")
67
+ or os.getenv("SIGIR_QDRANT_KEY") # legacy
61
68
  )
62
69
 
63
70
  grpc_port = None
64
71
  if prefer_grpc:
65
72
  try:
66
- if urlparse(qdrant_url).port == 6333:
73
+ parsed = urlparse(qdrant_url)
74
+ port = parsed.port
75
+ if port == 6333:
67
76
  grpc_port = 6334
68
77
  except Exception:
69
- grpc_port = None
78
+ pass
70
79
 
71
80
  def _make_client(use_grpc: bool):
72
81
  return QdrantClient(
73
82
  url=qdrant_url,
74
83
  api_key=qdrant_api_key,
84
+ timeout=request_timeout,
75
85
  prefer_grpc=bool(use_grpc),
76
86
  grpc_port=grpc_port,
77
- timeout=int(request_timeout),
78
87
  check_compatibility=False,
79
88
  )
80
89
 
81
- qdrant_client = _make_client(prefer_grpc)
90
+ client = _make_client(prefer_grpc)
82
91
  if prefer_grpc:
83
92
  try:
84
- _ = qdrant_client.get_collections()
93
+ _ = client.get_collections()
85
94
  except Exception as e:
86
95
  msg = str(e)
87
96
  if (
88
97
  "StatusCode.PERMISSION_DENIED" in msg
89
98
  or "http2 header with status: 403" in msg
90
99
  ):
91
- qdrant_client = _make_client(False)
100
+ client = _make_client(False)
92
101
  else:
93
102
  raise
103
+ qdrant_client = client
94
104
 
95
105
  self.client = qdrant_client
96
106
  self.collection_name = collection_name
107
+
97
108
  self.embedder = embedder or VisualEmbedder(model_name=model_name)
98
109
 
99
110
  self._two_stage = TwoStageRetriever(
100
- self.client,
101
- collection_name=self.collection_name,
102
- request_timeout=int(request_timeout),
103
- max_retries=int(max_retries),
104
- retry_sleep=float(retry_sleep),
111
+ qdrant_client=qdrant_client,
112
+ collection_name=collection_name,
113
+ request_timeout=request_timeout,
114
+ max_retries=max_retries,
115
+ retry_sleep=retry_sleep,
105
116
  )
106
117
  self._three_stage = ThreeStageRetriever(
107
- self.client,
108
- collection_name=self.collection_name,
109
- request_timeout=int(request_timeout),
110
- max_retries=int(max_retries),
111
- retry_sleep=float(retry_sleep),
118
+ qdrant_client=qdrant_client,
119
+ collection_name=collection_name,
120
+ request_timeout=request_timeout,
121
+ max_retries=max_retries,
122
+ retry_sleep=retry_sleep,
112
123
  )
113
124
  self._single_stage = SingleStageRetriever(
114
- self.client,
115
- collection_name=self.collection_name,
116
- request_timeout=int(request_timeout),
125
+ qdrant_client=qdrant_client,
126
+ collection_name=collection_name,
127
+ request_timeout=request_timeout,
128
+ max_retries=max_retries,
129
+ retry_sleep=retry_sleep,
117
130
  )
118
131
 
119
132
  def build_filter(
@@ -143,14 +156,10 @@ class MultiVectorRetriever:
143
156
  return_embeddings: bool = False,
144
157
  ) -> List[Dict[str, Any]]:
145
158
  q = self.embedder.embed_query(query)
146
- try:
147
- import torch
148
- except ImportError:
149
- torch = None
150
- if torch is not None and isinstance(q, torch.Tensor):
159
+ if isinstance(q, torch.Tensor):
151
160
  query_embedding = q.detach().cpu().numpy()
152
161
  else:
153
- query_embedding = q.numpy()
162
+ query_embedding = np.asarray(q)
154
163
 
155
164
  return self.search_embedded(
156
165
  query_embedding=query_embedding,
@@ -179,27 +188,17 @@ class MultiVectorRetriever:
179
188
  return self._single_stage.search(
180
189
  query_embedding=query_embedding,
181
190
  top_k=top_k,
182
- strategy="multi_vector",
183
- filter_obj=filter_obj,
184
- )
185
-
186
- if mode == "single_tiles":
187
- return self._single_stage.search(
188
- query_embedding=query_embedding,
189
- top_k=top_k,
190
- strategy="tiles_maxsim",
191
191
  filter_obj=filter_obj,
192
+ using="initial",
192
193
  )
193
-
194
- if mode == "single_global":
194
+ elif mode == "single_pooled":
195
195
  return self._single_stage.search(
196
196
  query_embedding=query_embedding,
197
197
  top_k=top_k,
198
- strategy="pooled_global",
199
198
  filter_obj=filter_obj,
199
+ using="mean_pooling",
200
200
  )
201
-
202
- if mode == "two_stage":
201
+ elif mode == "two_stage":
203
202
  return self._two_stage.search_server_side(
204
203
  query_embedding=query_embedding,
205
204
  top_k=top_k,
@@ -207,16 +206,14 @@ class MultiVectorRetriever:
207
206
  filter_obj=filter_obj,
208
207
  stage1_mode=stage1_mode,
209
208
  )
210
-
211
- if mode == "three_stage":
212
- s1 = int(stage1_k) if stage1_k is not None else 1000
213
- s2 = int(stage2_k) if stage2_k is not None else 300
209
+ elif mode == "three_stage":
214
210
  return self._three_stage.search_server_side(
215
211
  query_embedding=query_embedding,
216
212
  top_k=top_k,
217
- stage1_k=s1,
218
- stage2_k=s2,
213
+ stage1_k=stage1_k,
214
+ stage2_k=stage2_k,
219
215
  filter_obj=filter_obj,
216
+ stage1_mode=stage1_mode,
220
217
  )
221
-
222
- raise ValueError(f"Unknown mode: {mode}")
218
+ else:
219
+ raise ValueError(f"Unknown mode: {mode}")
@@ -30,6 +30,9 @@ class SingleStageRetriever:
30
30
  Args:
31
31
  qdrant_client: Connected Qdrant client
32
32
  collection_name: Name of the Qdrant collection
33
+ request_timeout: Timeout for Qdrant requests (seconds)
34
+ max_retries: Number of retry attempts on failure
35
+ retry_sleep: Sleep time between retries (seconds)
33
36
 
34
37
  Example:
35
38
  >>> retriever = SingleStageRetriever(client, "my_collection")
@@ -41,10 +44,14 @@ class SingleStageRetriever:
41
44
  qdrant_client,
42
45
  collection_name: str,
43
46
  request_timeout: int = 120,
47
+ max_retries: int = 3,
48
+ retry_sleep: float = 1.0,
44
49
  ):
45
50
  self.client = qdrant_client
46
51
  self.collection_name = collection_name
47
52
  self.request_timeout = int(request_timeout)
53
+ self.max_retries = max_retries
54
+ self.retry_sleep = retry_sleep
48
55
 
49
56
  def search(
50
57
  self,
@@ -17,10 +17,15 @@ Research Context:
17
17
  """
18
18
 
19
19
  import logging
20
+ import time
20
21
  from typing import Any, Dict, List, Optional, Union
21
22
 
22
23
  import numpy as np
23
24
  import torch
25
+ from qdrant_client.http import models as qdrant_models
26
+ from qdrant_client.models import FieldCondition, Filter, MatchAny, MatchValue
27
+
28
+ from visual_rag.embedding.pooling import compute_maxsim_score
24
29
 
25
30
  logger = logging.getLogger(__name__)
26
31
 
@@ -82,8 +87,6 @@ class TwoStageRetriever:
82
87
  self.retry_sleep = float(retry_sleep)
83
88
 
84
89
  def _retry_call(self, fn):
85
- import time
86
-
87
90
  last_err = None
88
91
  for attempt in range(self.max_retries):
89
92
  try:
@@ -120,8 +123,6 @@ class TwoStageRetriever:
120
123
  Returns:
121
124
  List of results with scores
122
125
  """
123
- from qdrant_client.http import models
124
-
125
126
  query_np = self._to_numpy(query_embedding)
126
127
 
127
128
  if prefetch_k is None:
@@ -155,9 +156,9 @@ class TwoStageRetriever:
155
156
  limit=top_k,
156
157
  query_filter=filter_obj,
157
158
  with_payload=True,
158
- search_params=models.SearchParams(exact=True),
159
+ search_params=qdrant_models.SearchParams(exact=True),
159
160
  prefetch=[
160
- models.Prefetch(
161
+ qdrant_models.Prefetch(
161
162
  query=prefetch_query,
162
163
  using=prefetch_using,
163
164
  limit=prefetch_k,
@@ -363,8 +364,6 @@ class TwoStageRetriever:
363
364
  return_embeddings: bool = False,
364
365
  ) -> List[Dict[str, Any]]:
365
366
  """Stage 2: Rerank with full multi-vector MaxSim scoring."""
366
- from visual_rag.embedding.pooling import compute_maxsim_score
367
-
368
367
  # Fetch full embeddings for candidates
369
368
  candidate_ids = [c["id"] for c in candidates]
370
369
 
@@ -435,8 +434,6 @@ class TwoStageRetriever:
435
434
 
436
435
  Supports single values or lists (using MatchAny).
437
436
  """
438
- from qdrant_client.models import FieldCondition, Filter, MatchAny, MatchValue
439
-
440
437
  conditions = []
441
438
 
442
439
  if year is not None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: visual-rag-toolkit
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: End-to-end visual document retrieval with ColPali, featuring two-stage pooling for scalable search
5
5
  Project-URL: Homepage, https://github.com/Ara-Yeroyan/visual-rag-toolkit
6
6
  Project-URL: Documentation, https://github.com/Ara-Yeroyan/visual-rag-toolkit#readme
@@ -88,14 +88,12 @@ Description-Content-Type: text/markdown
88
88
  [![PyPI](https://img.shields.io/pypi/v/visual-rag-toolkit)](https://pypi.org/project/visual-rag-toolkit/)
89
89
  [![Python](https://img.shields.io/pypi/pyversions/visual-rag-toolkit)](https://pypi.org/project/visual-rag-toolkit/)
90
90
  [![License](https://img.shields.io/pypi/l/visual-rag-toolkit)](LICENSE)
91
- [![CI](https://img.shields.io/github/actions/workflow/status/Ara-Yeroyan/visual-rag-toolkit/ci.yaml?branch=main)](https://github.com/Ara-Yeroyan/visual-rag-toolkit/actions/workflows/ci.yaml)
92
-
93
- Note:
94
- - The **PyPI badge** shows “not found” until the first release is published.
95
- - The **CI badge** requires the GitHub repo to be **public** (GitHub does not serve Actions badges for private repos).
91
+ [![Demo](https://img.shields.io/badge/Demo-Hugging%20Face-yellow)](https://huggingface.co/spaces/Yeroyan/visual-rag-toolkit)
96
92
 
97
93
  End-to-end visual document retrieval toolkit featuring **fast multi-stage retrieval** (prefetch with pooled vectors + exact MaxSim reranking).
98
94
 
95
+ **[Try the Live Demo](https://huggingface.co/spaces/Yeroyan/visual-rag-toolkit)** - Upload PDFs, index to Qdrant, and query with visual retrieval.
96
+
99
97
  This repo contains:
100
98
  - a **Python package** (`visual_rag`)
101
99
  - a **Streamlit demo app** (`demo/`)
@@ -162,7 +160,7 @@ for r in results[:3]:
162
160
 
163
161
  ### End-to-end: ingest PDFs (with cropping) → index in Qdrant
164
162
 
165
- This is the SDK-style pipeline: PDF → images → optional crop → embed → store vectors + payload in Qdrant.
163
+ This is the "SDK-style" pipeline: PDF → images → optional crop → embed → store vectors + payload in Qdrant.
166
164
 
167
165
  ```python
168
166
  import os
@@ -174,8 +172,8 @@ import torch
174
172
  from visual_rag import VisualEmbedder
175
173
  from visual_rag.indexing import ProcessingPipeline, QdrantIndexer
176
174
 
177
- QDRANT_URL = os.environ["SIGIR_QDRANT_URL"] # or QDRANT_URL
178
- QDRANT_KEY = os.getenv("SIGIR_QDRANT_KEY", "") # or QDRANT_API_KEY
175
+ QDRANT_URL = os.environ["QDRANT_URL"]
176
+ QDRANT_KEY = os.getenv("QDRANT_API_KEY", "")
179
177
 
180
178
  collection = "my_visual_docs"
181
179
 
@@ -193,6 +191,8 @@ indexer = QdrantIndexer(
193
191
  prefer_grpc=True,
194
192
  vector_datatype="float16",
195
193
  )
194
+
195
+ # Creates collection + required payload indexes (e.g., "filename" for skip_existing)
196
196
  indexer.create_collection(force_recreate=False)
197
197
 
198
198
  pipeline = ProcessingPipeline(
@@ -208,19 +208,32 @@ pipeline = ProcessingPipeline(
208
208
 
209
209
  pdfs = [Path("docs/a.pdf"), Path("docs/b.pdf")]
210
210
  for pdf_path in pdfs:
211
- pipeline.process_pdf(
211
+ result = pipeline.process_pdf(
212
212
  pdf_path,
213
- skip_existing=True,
213
+ skip_existing=True, # Skip pages already in Qdrant (uses filename index)
214
214
  upload_to_cloudinary=False,
215
215
  upload_to_qdrant=True,
216
216
  )
217
+ # Logs automatically shown:
218
+ # [10:23:45] 📚 Processing PDF: a.pdf
219
+ # [10:23:45] 🖼️ Converting PDF to images...
220
+ # [10:23:46] ✅ Converted 12 pages
221
+ # [10:23:46] 📦 Processing pages 1-8/12
222
+ # [10:23:46] 🤖 Generating embeddings for 8 pages...
223
+ # [10:23:48] 📤 Uploading batch of 8 pages...
224
+ # [10:23:48] ✅ Uploaded 8 points to Qdrant
225
+ # [10:23:48] 📦 Processing pages 9-12/12
226
+ # [10:23:48] 🤖 Generating embeddings for 4 pages...
227
+ # [10:23:50] 📤 Uploading batch of 4 pages...
228
+ # [10:23:50] ✅ Uploaded 4 points to Qdrant
229
+ # [10:23:50] ✅ Completed a.pdf: 12 uploaded, 0 skipped, 0 failed
217
230
  ```
218
231
 
219
232
  CLI equivalent:
220
233
 
221
234
  ```bash
222
- export SIGIR_QDRANT_URL="https://YOUR_QDRANT"
223
- export SIGIR_QDRANT_KEY="YOUR_KEY"
235
+ export QDRANT_URL="https://YOUR_QDRANT"
236
+ export QDRANT_API_KEY="YOUR_KEY"
224
237
 
225
238
  visual-rag process \
226
239
  --reports-dir ./docs \
@@ -263,7 +276,7 @@ Stage 2: Exact MaxSim reranking on candidates
263
276
  └── Return top-k results (e.g., 10)
264
277
  ```
265
278
 
266
- Three-stage extends this with an additional cheap prefetch stage before stage 2.
279
+ Three-stage extends this with an additional "cheap prefetch" stage before stage 2.
267
280
 
268
281
  ## 📁 Package Structure
269
282
 
@@ -358,7 +371,7 @@ If you use this toolkit in your research, please cite:
358
371
 
359
372
  ```bibtex
360
373
  @software{visual_rag_toolkit,
361
- title = {Visual RAG Toolkit: Scalable Visual Document Retrieval with Two-Stage Pooling},
374
+ title = {Visual RAG Toolkit: Scalable Visual Document Retrieval with 1D Convolutional Pooling},
362
375
  author = {Ara Yeroyan},
363
376
  year = {2026},
364
377
  url = {https://github.com/Ara-Yeroyan/visual-rag-toolkit}
@@ -374,4 +387,3 @@ MIT License - see [LICENSE](LICENSE) for details.
374
387
  - [Qdrant](https://qdrant.tech/) - Vector database with multi-vector support
375
388
  - [ColPali](https://github.com/illuin-tech/colpali) - Visual document retrieval models
376
389
  - [ViDoRe](https://huggingface.co/spaces/vidore/vidore-leaderboard) - Benchmark dataset
377
-
@@ -11,48 +11,47 @@ benchmarks/vidore_tatdqa_test/dataset_loader.py,sha256=gCCneGAKWQm0WlJHLvGjoMrAb
11
11
  benchmarks/vidore_tatdqa_test/metrics.py,sha256=cLdYbRt5VcxInO1cN79ve6ZLP3kaSxRkdzRX3IbPPMs,1112
12
12
  benchmarks/vidore_tatdqa_test/run_qdrant.py,sha256=_PikeqIYpWPim-KEQOwvT-aqwYoAWASjqJVisi8PfQg,28681
13
13
  benchmarks/vidore_tatdqa_test/sweep_eval.py,sha256=d_kbyNTJ1LoFfIVnsZyiRO1nKyMqmRB5jEweZL6kYd4,12688
14
- demo/__init__.py,sha256=jVzjsVKZl5ZZuFxawA8Pxj3yuIKL7llkao3rBpde-aQ,204
15
- demo/app.py,sha256=1GZJ_JhVWvqoBewngc8tHeiuM1fNbxddEO6ZsEdwBfg,1029
14
+ demo/__init__.py,sha256=QtvjqZ94WpbJ915Xik5ZzYFDtK7APmsQK6m0-Sxe8uw,204
15
+ demo/app.py,sha256=nZbCz1mpRK-GZTgOHyz4m4AfgKFgsH-09JwXeL3d3ng,1405
16
16
  demo/commands.py,sha256=qxRE2x610yZvcjwEfSKiR9CyFonX-vRxFqQNJCUKfyA,13690
17
17
  demo/config.py,sha256=BNkV4NSEEMIV9e6Z-cxds2v247uVmTPCgL-M5ItPzMg,757
18
18
  demo/download_models.py,sha256=J10qQt2TpEshVOxvCX_ZSbV7YozIBqDATZnt8fUKFHs,2868
19
- demo/evaluation.py,sha256=wiVxzRu3UZ5wAwHlpSKQ6srZjnSR06dgQw3G0OOV2Eg,28954
20
- demo/example_metadata_mapping_sigir.json,sha256=UCgqZtr6Wnq_vS7zxPxpvuokk9gxOVgKydC7f1lauw8,824
21
- demo/indexing.py,sha256=u80FSQo5ahHfry7fQNWa3kvKDngURRkLhaze3zWro1o,14077
22
- demo/qdrant_utils.py,sha256=VWEC7BwhMjjB7iIS5iaVDMGt_CMh9mQG4F94k1Pt0yA,7677
19
+ demo/evaluation.py,sha256=4ixJGg50KAVNiZ_mr5FMVv-QKCrZRooJ80LbrjKXM1s,27467
20
+ demo/indexing.py,sha256=qUVEB3QrIolS53Ggxurccbh-QyeLLbzcY5TLyVBVKME,10620
21
+ demo/qdrant_utils.py,sha256=Xh-thLIrACrYkFCrqazYNH0p3vS8_yMCaTbvt4HAy98,7778
23
22
  demo/results.py,sha256=dprvxnyHwxJvkAQuh4deaCsiEG1wm0n9svPyxI37vJg,1050
24
23
  demo/test_qdrant_connection.py,sha256=hkbyl3zGsw_GdBBp5MkW_3SBKTHXbwH3Sr_pUE54_po,3866
25
24
  demo/ui/__init__.py,sha256=EyBCvnXYfPbdyxJzyp9TjQBeJJUgmOY1yRHkUeC6JFQ,412
26
25
  demo/ui/benchmark.py,sha256=HiGCN4HrqeOC7L6t2kuzIiyWdcVE_cP2JTxoewrmPSo,14218
27
26
  demo/ui/header.py,sha256=J2hXr_nNyg1H9rmrd-EGx3WUl7lYo-Ca30ptgzBCfBs,806
28
- demo/ui/playground.py,sha256=Z3OgCWOzzTld1I3eN1IcTadaSzsqDQf7MiHwTbxbvJA,13692
29
- demo/ui/sidebar.py,sha256=muVCnvoeMOm1rHx7UPt68yLXlG3OERdXvJ3QqIXAUoc,7839
30
- demo/ui/upload.py,sha256=BHJmbIQOAYdMF_svxlRSYIe163Y5UX5P_gilJ09YHSA,20372
31
- visual_rag/__init__.py,sha256=UkGFXjPmjbO6Iad8ty1uJOMQsVMpV_s63ihchHltLx8,2555
32
- visual_rag/config.py,sha256=pd48M3j3n8ZV1HhaabMmP_uoEJnqhBC-Bma9vuvc8V4,7368
33
- visual_rag/demo_runner.py,sha256=wi0Wz3gZ39l4aovMd6zURq_CKUSgma4kGjF6hpQHwGY,2793
27
+ demo/ui/playground.py,sha256=yRlWWzJgsc596vALn5f0PHhmhtJCMmfv61nYakW75GQ,13672
28
+ demo/ui/sidebar.py,sha256=k7CZ5V7SxwbPOL9nQTWUct5y9mPY-YnLIeBgha7aIEE,8379
29
+ demo/ui/upload.py,sha256=NVmd1-xRj_dfgq14x_eqcN6MjDAZ3hbPBD0YRUz8OOY,20508
30
+ visual_rag/__init__.py,sha256=O8I4wQdQcQxAs7_SIPxnAzUOmRv5I9ezau8sZx9yqXQ,4384
31
+ visual_rag/config.py,sha256=qqSQk2lM5MiRji-6xQNGS2gSiXA4NgyJnCbgGx7uGJQ,7395
32
+ visual_rag/demo_runner.py,sha256=ahIKYhHQSLOF_kWbj0u_29SLX4qTdyxb3Smt_cMMfzI,2763
34
33
  visual_rag/qdrant_admin.py,sha256=NNczko2S5-K3qATNUxgYn51hNWgWb6boheL7vlCQGpM,7055
35
34
  visual_rag/cli/__init__.py,sha256=WgBRXm0VACfLltvVlLcSs3FTM1uQ7Uuw3CVD4-zWZwc,46
36
35
  visual_rag/cli/main.py,sha256=QmpnQ0lbC6Q9lwxaSCDh6paEEzI78IPY1jwc3_9y7VI,21083
37
36
  visual_rag/embedding/__init__.py,sha256=7QIENmxwRnwnUzsYKRY3VQTyF3HJkRiL1D7Au9XHF0w,682
38
37
  visual_rag/embedding/pooling.py,sha256=x8uY4VHbxEnsJRM2JeOkzPHDiwOkbi5NK4XW21U1hAc,11401
39
38
  visual_rag/embedding/visual_embedder.py,sha256=he9JpVHmo_szOiXCwtJdrCseGmf2y5Gi0UEFjwazzVY,23198
40
- visual_rag/indexing/__init__.py,sha256=pMLuinCIERbwWechn176nMrtlmTp0ySfuj8gdkNvRks,679
39
+ visual_rag/indexing/__init__.py,sha256=rloBEBt3x8BQut1Tj1n8fuaQ3iXMS3pm64o8n-NlSAw,985
41
40
  visual_rag/indexing/cloudinary_uploader.py,sha256=e-G5du4D7z6mWWl2lahMidG-Wdc-baImFFILTojebpA,8826
42
41
  visual_rag/indexing/pdf_processor.py,sha256=V3RAKpwgIFicqUaXzaaljePxh_oP4UV5W0aiJyfv0BY,10247
43
42
  visual_rag/indexing/pipeline.py,sha256=1ScpVRlLCq2FWi3IPvlQcIfDCQQ2F64IlRd9ZZHiTaA,25037
44
- visual_rag/indexing/qdrant_indexer.py,sha256=uUOA-6Qkd_vEeP1LdgGyoh1FHu1ZNEyYKuNxJAqetBU,17121
43
+ visual_rag/indexing/qdrant_indexer.py,sha256=E9uDwOL3A1nFJhFhJcI8V3JR8FL7nDahEYiv886oG1s,19526
45
44
  visual_rag/preprocessing/__init__.py,sha256=rCzfBO0jaVKp6MpPRRused_4gasHfobAbG-139Y806E,121
46
45
  visual_rag/preprocessing/crop_empty.py,sha256=iHXITFkRlF40VPJ4k9d432RUAi_89BhAEvK4wOEn96Q,5211
47
46
  visual_rag/retrieval/__init__.py,sha256=J9pnbeB83Fqs9n4g3GcIp1VR9dnuyAlcsIDVsf0lSb8,601
48
- visual_rag/retrieval/multi_vector.py,sha256=m5PKjkj0TFeWNccKNmCqghTM5b9ARr43Lq3sRhOxnjw,7381
49
- visual_rag/retrieval/single_stage.py,sha256=TSndnh4Kz9aT_0kKhNyLEvokbDLkgq--lXuyldzP5sU,4105
47
+ visual_rag/retrieval/multi_vector.py,sha256=KyJg1VAxODr5rY80vXmsjSD9ueRpay4QGCJizPKiiBw,7008
48
+ visual_rag/retrieval/single_stage.py,sha256=Ba06V-KRSFSZm0xzbjFR3EBEWaQkDo7U_pWNx25W8H0,4425
50
49
  visual_rag/retrieval/three_stage.py,sha256=YC0CVEohxTT5zhilcQHI7nYAk08E5jC3zkQ3-rNdLMw,5951
51
- visual_rag/retrieval/two_stage.py,sha256=_RnEgIx_qY4yu2iIk0a3w47D7WiKHlmBivm5gLEpyI4,16779
50
+ visual_rag/retrieval/two_stage.py,sha256=eUbyvdf0-3f0qIiAdU6Wmlos5hGrGNyXjuvc2-Be41M,16775
52
51
  visual_rag/visualization/__init__.py,sha256=SITKNvBEseDp7F3K6UzLPA-6OQFqYfY5azS5nlDdihQ,447
53
52
  visual_rag/visualization/saliency.py,sha256=F3Plc18Sf3tzWcyncuaruTmENm1IfW5j9NFGEQR93cY,11248
54
- visual_rag_toolkit-0.1.2.dist-info/METADATA,sha256=LrZ-EUezUsmUJpnNofm5TTM5IJIUerznFDcBSucI7rc,12830
55
- visual_rag_toolkit-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
56
- visual_rag_toolkit-0.1.2.dist-info/entry_points.txt,sha256=6Tob1GPg_ILGELjYTPsAnNMZ1W0NS939nfI7xyW2DIY,102
57
- visual_rag_toolkit-0.1.2.dist-info/licenses/LICENSE,sha256=hEg_weKnHXJakQRR3sw2ygcZ101zCI00zMhBOPb3yfA,1069
58
- visual_rag_toolkit-0.1.2.dist-info/RECORD,,
53
+ visual_rag_toolkit-0.1.4.dist-info/METADATA,sha256=2nnhqsq3P_OG8GRk48sz569VB1I-aaKcOjKXYl-OvJo,13460
54
+ visual_rag_toolkit-0.1.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
55
+ visual_rag_toolkit-0.1.4.dist-info/entry_points.txt,sha256=6Tob1GPg_ILGELjYTPsAnNMZ1W0NS939nfI7xyW2DIY,102
56
+ visual_rag_toolkit-0.1.4.dist-info/licenses/LICENSE,sha256=hEg_weKnHXJakQRR3sw2ygcZ101zCI00zMhBOPb3yfA,1069
57
+ visual_rag_toolkit-0.1.4.dist-info/RECORD,,
@@ -1,37 +0,0 @@
1
- {
2
- "filenames": {
3
- "sigir2025-llms": {
4
- "year": 2025,
5
- "source": "Conference Paper",
6
- "district": null,
7
- "doc_type": "paper",
8
- "project": "sigir-demo",
9
- "tags": ["llms", "retrieval"]
10
- },
11
- "sigir2025-ginger": {
12
- "year": 2025,
13
- "source": "Conference Paper",
14
- "district": null,
15
- "doc_type": "paper",
16
- "project": "sigir-demo",
17
- "tags": ["ginger", "case-study"]
18
- },
19
- "2505.15859v1": {
20
- "year": 2025,
21
- "source": "arXiv",
22
- "district": null,
23
- "doc_type": "preprint",
24
- "project": "sigir-demo",
25
- "tags": ["arxiv", "ranking"]
26
- },
27
- "2507.04942v2": {
28
- "year": 2025,
29
- "source": "arXiv",
30
- "district": null,
31
- "doc_type": "preprint",
32
- "project": "sigir-demo",
33
- "tags": ["arxiv", "rag"]
34
- }
35
- }
36
- }
37
-