vision-agents-plugins-qdrant 0.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .cursor/*
7
+ # Distribution / packaging
8
+ .Python
9
+ build/
10
+ dist/
11
+ downloads/
12
+ develop-eggs/
13
+ eggs/
14
+ .eggs/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ share/python-wheels/
21
+ pip-wheel-metadata/
22
+ MANIFEST
23
+ *.egg-info/
24
+ *.egg
25
+
26
+ # Installer logs
27
+ pip-log.txt
28
+ pip-delete-this-directory.txt
29
+
30
+ # Unit test / coverage reports
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+ .coverage
35
+ .coverage.*
36
+ .cache
37
+ coverage.xml
38
+ nosetests.xml
39
+ *.cover
40
+ *.py,cover
41
+ .hypothesis/
42
+ .pytest_cache/
43
+
44
+ # Type checker / lint caches
45
+ .mypy_cache/
46
+ .dmypy.json
47
+ dmypy.json
48
+ .pytype/
49
+ .pyre/
50
+ .ruff_cache/
51
+
52
+ # Environments
53
+ .venv
54
+ env/
55
+ venv/
56
+ ENV/
57
+ env.bak/
58
+ venv.bak/
59
+ .env
60
+ .env.local
61
+ .env.*.local
62
+ .env.bak
63
+ pyvenv.cfg
64
+ .python-version
65
+
66
+ # Editors / IDEs
67
+ .vscode/
68
+ .idea/
69
+
70
+ # Jupyter Notebook
71
+ .ipynb_checkpoints/
72
+
73
+ # OS / Misc
74
+ .DS_Store
75
+ *.log
76
+
77
+ # Tooling & repo-specific
78
+ pyrightconfig.json
79
+ shell.nix
80
+ bin/*
81
+ lib/*
82
+ stream-py/
83
+
84
+ # Example lock files (regenerated by uv sync)
85
+ examples/*/uv.lock
86
+ plugins/*/example/uv.lock
87
+
88
+ # Artifacts / assets
89
+ *.pt
90
+ *.kef
91
+ *.onnx
92
+ profile.html
93
+
94
+ /opencode.json
95
+ .ralph-tui/
96
+ .claude/*
97
+ !.claude/skills/
98
+
99
+ .uv-cache/
100
+
101
+ # pytest json report
102
+ .report.json
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-qdrant
3
+ Version: 0.6.3
4
+ Summary: Qdrant RAG integration for Vision Agents with hybrid search
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Keywords: AI,RAG,agents,hybrid-search,qdrant,vector-search,voice agents
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: langchain-text-splitters>=1.1.1
12
+ Requires-Dist: qdrant-client[fastembed]<1.18.0,>=1.12.0
13
+ Requires-Dist: vision-agents
14
+ Description-Content-Type: text/markdown
15
+
16
+ # Qdrant RAG Plugin
17
+
18
+ Hybrid search RAG (Retrieval Augmented Generation) using Qdrant's built-in fastembed integration for dense and BM25 sparse embeddings.
19
+
20
+ ## Features
21
+
22
+ - **Hybrid Search**: Dense vector (semantic) + BM25 sparse (keyword) via native Qdrant RRF fusion
23
+ - **fastembed Native**: No external embedding dependencies — Qdrant client handles everything
24
+ - **Implements RAG Interface**: Compatible with Vision Agents RAG base class
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ uv add "vision-agents[qdrant]"
30
+ # or directly
31
+ uv add vision-agents-plugins-qdrant
32
+ ```
33
+
34
+ ## Usage
35
+
36
+ ```python
37
+ from vision_agents.plugins import qdrant
38
+
39
+ # Initialize RAG (connects to local Qdrant by default)
40
+ rag = qdrant.QdrantRAG(collection="my-knowledge")
41
+ await rag.add_directory("./knowledge")
42
+
43
+ # Hybrid search (default)
44
+ results = await rag.search("How does the chat API work?")
45
+
46
+ # Vector-only search
47
+ results = await rag.search("How does the chat API work?", mode="vector")
48
+
49
+ # BM25 search
50
+ results = await rag.search("chat API pricing", mode="bm25")
51
+
52
+ # Or use convenience function
53
+ rag = await qdrant.create_rag(
54
+ collection="product-knowledge",
55
+ knowledge_dir="./knowledge"
56
+ )
57
+ ```
58
+
59
+ ## Configuration
60
+
61
+ | Parameter | Description | Default |
62
+ |----------------|-----------------------------------------------|--------------------------------------------|
63
+ | `collection` | Qdrant collection name | Required |
64
+ | `url` | Qdrant server URL | `http://localhost:6333` |
65
+ | `api_key` | Qdrant API key (for Qdrant Cloud) | `QDRANT_API_KEY` env var |
66
+ | `dense_model` | fastembed dense model for semantic search | `sentence-transformers/all-MiniLM-L6-v2` |
67
+ | `sparse_model` | fastembed sparse model for BM25 search | `Qdrant/bm25` |
68
+ | `chunk_size` | Size of text chunks for splitting documents | `10000` |
69
+ | `chunk_overlap`| Overlap between chunks for context continuity | `200` |
70
+ | `cloud_inference` | Use Qdrant Cloud server-side inference instead of local fastembed | `False` |
71
+
72
+ ## Environment Variables
73
+
74
+ - `QDRANT_API_KEY`: Qdrant API key (for Qdrant Cloud; not needed for local)
75
+
76
+ ## Running Qdrant locally
77
+
78
+ ```bash
79
+ docker run -p 6333:6333 qdrant/qdrant
80
+ ```
81
+
82
+ ## Dependencies
83
+
84
+ - `qdrant-client[fastembed]`: Qdrant async client with built-in fastembed support
85
+ - `langchain-text-splitters`: Text chunking utilities
86
+
87
+ ## References
88
+
89
+ - [Qdrant Hybrid Queries](https://qdrant.tech/documentation/concepts/hybrid-queries/)
90
+ - [fastembed Models](https://qdrant.github.io/fastembed/examples/Supported_Models/)
@@ -0,0 +1,75 @@
1
+ # Qdrant RAG Plugin
2
+
3
+ Hybrid search RAG (Retrieval Augmented Generation) using Qdrant's built-in fastembed integration for dense and BM25 sparse embeddings.
4
+
5
+ ## Features
6
+
7
+ - **Hybrid Search**: Dense vector (semantic) + BM25 sparse (keyword) via native Qdrant RRF fusion
8
+ - **fastembed Native**: No external embedding dependencies — Qdrant client handles everything
9
+ - **Implements RAG Interface**: Compatible with Vision Agents RAG base class
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ uv add "vision-agents[qdrant]"
15
+ # or directly
16
+ uv add vision-agents-plugins-qdrant
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```python
22
+ from vision_agents.plugins import qdrant
23
+
24
+ # Initialize RAG (connects to local Qdrant by default)
25
+ rag = qdrant.QdrantRAG(collection="my-knowledge")
26
+ await rag.add_directory("./knowledge")
27
+
28
+ # Hybrid search (default)
29
+ results = await rag.search("How does the chat API work?")
30
+
31
+ # Vector-only search
32
+ results = await rag.search("How does the chat API work?", mode="vector")
33
+
34
+ # BM25 search
35
+ results = await rag.search("chat API pricing", mode="bm25")
36
+
37
+ # Or use convenience function
38
+ rag = await qdrant.create_rag(
39
+ collection="product-knowledge",
40
+ knowledge_dir="./knowledge"
41
+ )
42
+ ```
43
+
44
+ ## Configuration
45
+
46
+ | Parameter | Description | Default |
47
+ |----------------|-----------------------------------------------|--------------------------------------------|
48
+ | `collection` | Qdrant collection name | Required |
49
+ | `url` | Qdrant server URL | `http://localhost:6333` |
50
+ | `api_key` | Qdrant API key (for Qdrant Cloud) | `QDRANT_API_KEY` env var |
51
+ | `dense_model` | fastembed dense model for semantic search | `sentence-transformers/all-MiniLM-L6-v2` |
52
+ | `sparse_model` | fastembed sparse model for BM25 search | `Qdrant/bm25` |
53
+ | `chunk_size` | Size of text chunks for splitting documents | `10000` |
54
+ | `chunk_overlap`| Overlap between chunks for context continuity | `200` |
55
+ | `cloud_inference` | Use Qdrant Cloud server-side inference instead of local fastembed | `False` |
56
+
57
+ ## Environment Variables
58
+
59
+ - `QDRANT_API_KEY`: Qdrant API key (for Qdrant Cloud; not needed for local)
60
+
61
+ ## Running Qdrant locally
62
+
63
+ ```bash
64
+ docker run -p 6333:6333 qdrant/qdrant
65
+ ```
66
+
67
+ ## Dependencies
68
+
69
+ - `qdrant-client[fastembed]`: Qdrant async client with built-in fastembed support
70
+ - `langchain-text-splitters`: Text chunking utilities
71
+
72
+ ## References
73
+
74
+ - [Qdrant Hybrid Queries](https://qdrant.tech/documentation/concepts/hybrid-queries/)
75
+ - [fastembed Models](https://qdrant.github.io/fastembed/examples/Supported_Models/)
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vision-agents-plugins-qdrant"
7
+ dynamic = ["version"]
8
+ description = "Qdrant RAG integration for Vision Agents with hybrid search"
9
+ readme = "README.md"
10
+ keywords = ["qdrant", "RAG", "vector-search", "hybrid-search", "AI", "voice agents", "agents"]
11
+ requires-python = ">=3.10"
12
+ license = "MIT"
13
+ dependencies = [
14
+ "vision-agents",
15
+ "qdrant-client[fastembed]>=1.12.0,<1.18.0",
16
+ "langchain-text-splitters>=1.1.1",
17
+ ]
18
+
19
+ [project.urls]
20
+ Documentation = "https://visionagents.ai/"
21
+ Website = "https://visionagents.ai/"
22
+ Source = "https://github.com/GetStream/Vision-Agents"
23
+
24
+ [tool.hatch.version]
25
+ source = "vcs"
26
+ raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
27
+
28
+ [tool.hatch.build.targets.wheel]
29
+ packages = ["vision_agents"]
30
+
31
+ [tool.hatch.build.targets.sdist]
32
+ include = ["/vision_agents"]
33
+
34
+
35
+ [dependency-groups]
36
+ dev = [
37
+ "pytest>=8.4.1",
38
+ "pytest-asyncio>=1.0.0",
39
+ "testcontainers[redis,qdrant]>=4.0.0",
40
+ ]
@@ -0,0 +1,3 @@
1
+ from .qdrant_rag import QdrantRAG, create_rag
2
+
3
+ __all__ = ["QdrantRAG", "create_rag"]
@@ -0,0 +1,233 @@
1
+ """
2
+ Qdrant Hybrid Search RAG implementation.
3
+
4
+ Uses Qdrant's built-in fastembed integration for dense and BM25 sparse embeddings.
5
+ Hybrid search uses Qdrant's native Reciprocal Rank Fusion (RRF).
6
+ See: https://qdrant.tech/documentation/concepts/hybrid-queries/
7
+
8
+ Usage:
9
+ from vision_agents.plugins import qdrant
10
+
11
+ # Initialize with a Qdrant collection
12
+ rag = qdrant.QdrantRAG(collection="my-knowledge")
13
+ await rag.add_directory("./knowledge")
14
+
15
+ # Hybrid search (vector + BM25)
16
+ results = await rag.search("How does the chat API work?")
17
+
18
+ # Vector-only search
19
+ results = await rag.search("How does the chat API work?", mode="vector")
20
+
21
+ # BM25-only search
22
+ results = await rag.search("chat API pricing", mode="bm25")
23
+
24
+ Environment variables:
25
+ QDRANT_API_KEY: Qdrant API key (Optional)
26
+ """
27
+
28
+ import logging
29
+ import os
30
+ import uuid
31
+ from pathlib import Path
32
+ from typing import Literal
33
+
34
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
35
+ from qdrant_client import AsyncQdrantClient
36
+ from qdrant_client import models
37
+
38
+ from vision_agents.core.rag import RAG, Document
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ _DENSE = "dense"
43
+ _SPARSE = "sparse"
44
+ _DEFAULT_DENSE_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
45
+ _DEFAULT_SPARSE_MODEL = "Qdrant/bm25"
46
+
47
+
48
+ class QdrantRAG(RAG):
49
+ def __init__(
50
+ self,
51
+ collection: str,
52
+ url: str = "http://localhost:6333",
53
+ api_key: str | None = None,
54
+ dense_model: str = _DEFAULT_DENSE_MODEL,
55
+ sparse_model: str = _DEFAULT_SPARSE_MODEL,
56
+ chunk_size: int = 10000,
57
+ chunk_overlap: int = 200,
58
+ cloud_inference: bool = False,
59
+ ):
60
+ self._collection = collection
61
+ self._client = AsyncQdrantClient(
62
+ url=url,
63
+ api_key=api_key or os.environ.get("QDRANT_API_KEY"),
64
+ cloud_inference=cloud_inference,
65
+ )
66
+ self._dense_model = dense_model
67
+ self._sparse_model = sparse_model
68
+ self._splitter = RecursiveCharacterTextSplitter(
69
+ chunk_size=chunk_size,
70
+ chunk_overlap=chunk_overlap,
71
+ length_function=len,
72
+ )
73
+ self._indexed_files: list[str] = []
74
+
75
+ @property
76
+ def indexed_files(self) -> list[str]:
77
+ return self._indexed_files
78
+
79
+ async def _ensure_collection(self) -> None:
80
+ if not await self._client.collection_exists(self._collection):
81
+ await self._client.create_collection(
82
+ collection_name=self._collection,
83
+ vectors_config={
84
+ _DENSE: models.VectorParams(
85
+ size=self._client.get_embedding_size(self._dense_model),
86
+ distance=models.Distance.COSINE,
87
+ ),
88
+ },
89
+ sparse_vectors_config={
90
+ _SPARSE: models.SparseVectorParams(
91
+ index=models.SparseIndexParams(on_disk=False),
92
+ ),
93
+ },
94
+ )
95
+
96
+ async def add_documents(self, documents: list[Document]) -> int:
97
+ if not documents:
98
+ return 0
99
+
100
+ all_chunks: list[str] = []
101
+ chunk_sources: list[tuple[str, int]] = []
102
+ indexed_sources: list[str] = []
103
+
104
+ for doc in documents:
105
+ chunks = self._splitter.split_text(doc.text)
106
+ if not chunks:
107
+ logger.warning(f"No chunks generated from document: {doc.source}")
108
+ continue
109
+ for i, chunk in enumerate(chunks):
110
+ all_chunks.append(chunk)
111
+ chunk_sources.append((doc.source, i))
112
+ indexed_sources.append(doc.source)
113
+
114
+ if not all_chunks:
115
+ return 0
116
+
117
+ await self._ensure_collection()
118
+ await self._client.upsert(
119
+ collection_name=self._collection,
120
+ points=[
121
+ models.PointStruct(
122
+ id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"{source}_{idx}")),
123
+ vector={
124
+ _DENSE: models.Document(text=chunk, model=self._dense_model),
125
+ _SPARSE: models.Document(text=chunk, model=self._sparse_model),
126
+ },
127
+ payload={"text": chunk, "source": source, "chunk_index": idx},
128
+ )
129
+ for chunk, (source, idx) in zip(all_chunks, chunk_sources)
130
+ ],
131
+ )
132
+
133
+ self._indexed_files.extend(indexed_sources)
134
+ logger.info(f"Indexed {len(all_chunks)} chunks from {len(documents)} documents")
135
+ return len(all_chunks)
136
+
137
+ async def _search_single(
138
+ self, query: str, using: str, limit: int
139
+ ) -> list[models.ScoredPoint]:
140
+ model = self._dense_model if using == _DENSE else self._sparse_model
141
+ return (
142
+ await self._client.query_points(
143
+ collection_name=self._collection,
144
+ query=models.Document(text=query, model=model),
145
+ using=using,
146
+ limit=limit,
147
+ with_payload=["text", "source"],
148
+ )
149
+ ).points
150
+
151
+ async def search(
152
+ self,
153
+ query: str,
154
+ top_k: int = 3,
155
+ mode: Literal["hybrid", "vector", "bm25"] = "hybrid",
156
+ ) -> str:
157
+ if not await self._client.collection_exists(self._collection):
158
+ return "No relevant information found in the knowledge base."
159
+
160
+ if mode == "vector":
161
+ points = await self._search_single(query, _DENSE, top_k)
162
+ elif mode == "bm25":
163
+ points = await self._search_single(query, _SPARSE, top_k)
164
+ else:
165
+ results = await self._client.query_points(
166
+ collection_name=self._collection,
167
+ prefetch=[
168
+ models.Prefetch(
169
+ query=models.Document(text=query, model=self._dense_model),
170
+ using=_DENSE,
171
+ limit=top_k,
172
+ ),
173
+ models.Prefetch(
174
+ query=models.Document(text=query, model=self._sparse_model),
175
+ using=_SPARSE,
176
+ limit=top_k,
177
+ ),
178
+ ],
179
+ query=models.FusionQuery(fusion=models.Fusion.RRF),
180
+ limit=top_k,
181
+ with_payload=["text", "source"],
182
+ )
183
+ points = results.points
184
+
185
+ if not points:
186
+ return "No relevant information found in the knowledge base."
187
+
188
+ formatted_results = []
189
+ for i, p in enumerate(points, 1):
190
+ payload = p.payload or {}
191
+ formatted_results.append(
192
+ f"[{i}] From {payload.get('source', 'unknown')}:\n{payload.get('text', '')}"
193
+ )
194
+ return "\n\n".join(formatted_results)
195
+
196
+ async def clear(self) -> None:
197
+ if await self._client.collection_exists(self._collection):
198
+ await self._client.delete_collection(self._collection)
199
+ self._indexed_files = []
200
+ logger.info(f"Cleared collection: {self._collection}")
201
+
202
+ async def close(self) -> None:
203
+ await self._client.close()
204
+
205
+
206
+ async def create_rag(
207
+ collection: str,
208
+ knowledge_dir: str | Path,
209
+ extensions: list[str] | None = None,
210
+ url: str = "http://localhost:6333",
211
+ api_key: str | None = None,
212
+ dense_model: str = _DEFAULT_DENSE_MODEL,
213
+ sparse_model: str = _DEFAULT_SPARSE_MODEL,
214
+ chunk_size: int = 10000,
215
+ chunk_overlap: int = 200,
216
+ cloud_inference: bool = False,
217
+ ) -> QdrantRAG:
218
+ rag = QdrantRAG(
219
+ collection=collection,
220
+ url=url,
221
+ api_key=api_key,
222
+ dense_model=dense_model,
223
+ sparse_model=sparse_model,
224
+ chunk_size=chunk_size,
225
+ chunk_overlap=chunk_overlap,
226
+ cloud_inference=cloud_inference,
227
+ )
228
+ try:
229
+ await rag.add_directory(knowledge_dir, extensions=extensions)
230
+ except Exception:
231
+ await rag.close()
232
+ raise
233
+ return rag