vision-agents-plugins-qdrant 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agents_plugins_qdrant-0.6.3/.gitignore +102 -0
- vision_agents_plugins_qdrant-0.6.3/PKG-INFO +90 -0
- vision_agents_plugins_qdrant-0.6.3/README.md +75 -0
- vision_agents_plugins_qdrant-0.6.3/pyproject.toml +40 -0
- vision_agents_plugins_qdrant-0.6.3/vision_agents/plugins/qdrant/__init__.py +3 -0
- vision_agents_plugins_qdrant-0.6.3/vision_agents/plugins/qdrant/qdrant_rag.py +233 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.cursor/*
|
|
7
|
+
# Distribution / packaging
|
|
8
|
+
.Python
|
|
9
|
+
build/
|
|
10
|
+
dist/
|
|
11
|
+
downloads/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
eggs/
|
|
14
|
+
.eggs/
|
|
15
|
+
lib64/
|
|
16
|
+
parts/
|
|
17
|
+
sdist/
|
|
18
|
+
var/
|
|
19
|
+
wheels/
|
|
20
|
+
share/python-wheels/
|
|
21
|
+
pip-wheel-metadata/
|
|
22
|
+
MANIFEST
|
|
23
|
+
*.egg-info/
|
|
24
|
+
*.egg
|
|
25
|
+
|
|
26
|
+
# Installer logs
|
|
27
|
+
pip-log.txt
|
|
28
|
+
pip-delete-this-directory.txt
|
|
29
|
+
|
|
30
|
+
# Unit test / coverage reports
|
|
31
|
+
htmlcov/
|
|
32
|
+
.tox/
|
|
33
|
+
.nox/
|
|
34
|
+
.coverage
|
|
35
|
+
.coverage.*
|
|
36
|
+
.cache
|
|
37
|
+
coverage.xml
|
|
38
|
+
nosetests.xml
|
|
39
|
+
*.cover
|
|
40
|
+
*.py,cover
|
|
41
|
+
.hypothesis/
|
|
42
|
+
.pytest_cache/
|
|
43
|
+
|
|
44
|
+
# Type checker / lint caches
|
|
45
|
+
.mypy_cache/
|
|
46
|
+
.dmypy.json
|
|
47
|
+
dmypy.json
|
|
48
|
+
.pytype/
|
|
49
|
+
.pyre/
|
|
50
|
+
.ruff_cache/
|
|
51
|
+
|
|
52
|
+
# Environments
|
|
53
|
+
.venv
|
|
54
|
+
env/
|
|
55
|
+
venv/
|
|
56
|
+
ENV/
|
|
57
|
+
env.bak/
|
|
58
|
+
venv.bak/
|
|
59
|
+
.env
|
|
60
|
+
.env.local
|
|
61
|
+
.env.*.local
|
|
62
|
+
.env.bak
|
|
63
|
+
pyvenv.cfg
|
|
64
|
+
.python-version
|
|
65
|
+
|
|
66
|
+
# Editors / IDEs
|
|
67
|
+
.vscode/
|
|
68
|
+
.idea/
|
|
69
|
+
|
|
70
|
+
# Jupyter Notebook
|
|
71
|
+
.ipynb_checkpoints/
|
|
72
|
+
|
|
73
|
+
# OS / Misc
|
|
74
|
+
.DS_Store
|
|
75
|
+
*.log
|
|
76
|
+
|
|
77
|
+
# Tooling & repo-specific
|
|
78
|
+
pyrightconfig.json
|
|
79
|
+
shell.nix
|
|
80
|
+
bin/*
|
|
81
|
+
lib/*
|
|
82
|
+
stream-py/
|
|
83
|
+
|
|
84
|
+
# Example lock files (regenerated by uv sync)
|
|
85
|
+
examples/*/uv.lock
|
|
86
|
+
plugins/*/example/uv.lock
|
|
87
|
+
|
|
88
|
+
# Artifacts / assets
|
|
89
|
+
*.pt
|
|
90
|
+
*.kef
|
|
91
|
+
*.onnx
|
|
92
|
+
profile.html
|
|
93
|
+
|
|
94
|
+
/opencode.json
|
|
95
|
+
.ralph-tui/
|
|
96
|
+
.claude/*
|
|
97
|
+
!.claude/skills/
|
|
98
|
+
|
|
99
|
+
.uv-cache/
|
|
100
|
+
|
|
101
|
+
# pytest json report
|
|
102
|
+
.report.json
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vision-agents-plugins-qdrant
|
|
3
|
+
Version: 0.6.3
|
|
4
|
+
Summary: Qdrant RAG integration for Vision Agents with hybrid search
|
|
5
|
+
Project-URL: Documentation, https://visionagents.ai/
|
|
6
|
+
Project-URL: Website, https://visionagents.ai/
|
|
7
|
+
Project-URL: Source, https://github.com/GetStream/Vision-Agents
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Keywords: AI,RAG,agents,hybrid-search,qdrant,vector-search,voice agents
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Requires-Dist: langchain-text-splitters>=1.1.1
|
|
12
|
+
Requires-Dist: qdrant-client[fastembed]<1.18.0,>=1.12.0
|
|
13
|
+
Requires-Dist: vision-agents
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# Qdrant RAG Plugin
|
|
17
|
+
|
|
18
|
+
Hybrid search RAG (Retrieval Augmented Generation) using Qdrant's built-in fastembed integration for dense and BM25 sparse embeddings.
|
|
19
|
+
|
|
20
|
+
## Features
|
|
21
|
+
|
|
22
|
+
- **Hybrid Search**: Dense vector (semantic) + BM25 sparse (keyword) via native Qdrant RRF fusion
|
|
23
|
+
- **fastembed Native**: No external embedding dependencies — Qdrant client handles everything
|
|
24
|
+
- **Implements RAG Interface**: Compatible with Vision Agents RAG base class
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
uv add "vision-agents[qdrant]"
|
|
30
|
+
# or directly
|
|
31
|
+
uv add vision-agents-plugins-qdrant
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Usage
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from vision_agents.plugins import qdrant
|
|
38
|
+
|
|
39
|
+
# Initialize RAG (connects to local Qdrant by default)
|
|
40
|
+
rag = qdrant.QdrantRAG(collection="my-knowledge")
|
|
41
|
+
await rag.add_directory("./knowledge")
|
|
42
|
+
|
|
43
|
+
# Hybrid search (default)
|
|
44
|
+
results = await rag.search("How does the chat API work?")
|
|
45
|
+
|
|
46
|
+
# Vector-only search
|
|
47
|
+
results = await rag.search("How does the chat API work?", mode="vector")
|
|
48
|
+
|
|
49
|
+
# BM25 search
|
|
50
|
+
results = await rag.search("chat API pricing", mode="bm25")
|
|
51
|
+
|
|
52
|
+
# Or use convenience function
|
|
53
|
+
rag = await qdrant.create_rag(
|
|
54
|
+
collection="product-knowledge",
|
|
55
|
+
knowledge_dir="./knowledge"
|
|
56
|
+
)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Configuration
|
|
60
|
+
|
|
61
|
+
| Parameter | Description | Default |
|
|
62
|
+
|----------------|-----------------------------------------------|--------------------------------------------|
|
|
63
|
+
| `collection` | Qdrant collection name | Required |
|
|
64
|
+
| `url` | Qdrant server URL | `http://localhost:6333` |
|
|
65
|
+
| `api_key` | Qdrant API key (for Qdrant Cloud) | `QDRANT_API_KEY` env var |
|
|
66
|
+
| `dense_model` | fastembed dense model for semantic search | `sentence-transformers/all-MiniLM-L6-v2` |
|
|
67
|
+
| `sparse_model` | fastembed sparse model for BM25 search | `Qdrant/bm25` |
|
|
68
|
+
| `chunk_size` | Size of text chunks for splitting documents | `10000` |
|
|
69
|
+
| `chunk_overlap`| Overlap between chunks for context continuity | `200` |
|
|
70
|
+
| `cloud_inference` | Use Qdrant Cloud server-side inference instead of local fastembed | `False` |
|
|
71
|
+
|
|
72
|
+
## Environment Variables
|
|
73
|
+
|
|
74
|
+
- `QDRANT_API_KEY`: Qdrant API key (for Qdrant Cloud; not needed for local)
|
|
75
|
+
|
|
76
|
+
## Running Qdrant locally
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
docker run -p 6333:6333 qdrant/qdrant
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Dependencies
|
|
83
|
+
|
|
84
|
+
- `qdrant-client[fastembed]`: Qdrant async client with built-in fastembed support
|
|
85
|
+
- `langchain-text-splitters`: Text chunking utilities
|
|
86
|
+
|
|
87
|
+
## References
|
|
88
|
+
|
|
89
|
+
- [Qdrant Hybrid Queries](https://qdrant.tech/documentation/concepts/hybrid-queries/)
|
|
90
|
+
- [fastembed Models](https://qdrant.github.io/fastembed/examples/Supported_Models/)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Qdrant RAG Plugin
|
|
2
|
+
|
|
3
|
+
Hybrid search RAG (Retrieval Augmented Generation) using Qdrant's built-in fastembed integration for dense and BM25 sparse embeddings.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Hybrid Search**: Dense vector (semantic) + BM25 sparse (keyword) via native Qdrant RRF fusion
|
|
8
|
+
- **fastembed Native**: No external embedding dependencies — Qdrant client handles everything
|
|
9
|
+
- **Implements RAG Interface**: Compatible with Vision Agents RAG base class
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
uv add "vision-agents[qdrant]"
|
|
15
|
+
# or directly
|
|
16
|
+
uv add vision-agents-plugins-qdrant
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from vision_agents.plugins import qdrant
|
|
23
|
+
|
|
24
|
+
# Initialize RAG (connects to local Qdrant by default)
|
|
25
|
+
rag = qdrant.QdrantRAG(collection="my-knowledge")
|
|
26
|
+
await rag.add_directory("./knowledge")
|
|
27
|
+
|
|
28
|
+
# Hybrid search (default)
|
|
29
|
+
results = await rag.search("How does the chat API work?")
|
|
30
|
+
|
|
31
|
+
# Vector-only search
|
|
32
|
+
results = await rag.search("How does the chat API work?", mode="vector")
|
|
33
|
+
|
|
34
|
+
# BM25 search
|
|
35
|
+
results = await rag.search("chat API pricing", mode="bm25")
|
|
36
|
+
|
|
37
|
+
# Or use convenience function
|
|
38
|
+
rag = await qdrant.create_rag(
|
|
39
|
+
collection="product-knowledge",
|
|
40
|
+
knowledge_dir="./knowledge"
|
|
41
|
+
)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Configuration
|
|
45
|
+
|
|
46
|
+
| Parameter | Description | Default |
|
|
47
|
+
|----------------|-----------------------------------------------|--------------------------------------------|
|
|
48
|
+
| `collection` | Qdrant collection name | Required |
|
|
49
|
+
| `url` | Qdrant server URL | `http://localhost:6333` |
|
|
50
|
+
| `api_key` | Qdrant API key (for Qdrant Cloud) | `QDRANT_API_KEY` env var |
|
|
51
|
+
| `dense_model` | fastembed dense model for semantic search | `sentence-transformers/all-MiniLM-L6-v2` |
|
|
52
|
+
| `sparse_model` | fastembed sparse model for BM25 search | `Qdrant/bm25` |
|
|
53
|
+
| `chunk_size` | Size of text chunks for splitting documents | `10000` |
|
|
54
|
+
| `chunk_overlap`| Overlap between chunks for context continuity | `200` |
|
|
55
|
+
| `cloud_inference` | Use Qdrant Cloud server-side inference instead of local fastembed | `False` |
|
|
56
|
+
|
|
57
|
+
## Environment Variables
|
|
58
|
+
|
|
59
|
+
- `QDRANT_API_KEY`: Qdrant API key (for Qdrant Cloud; not needed for local)
|
|
60
|
+
|
|
61
|
+
## Running Qdrant locally
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
docker run -p 6333:6333 qdrant/qdrant
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Dependencies
|
|
68
|
+
|
|
69
|
+
- `qdrant-client[fastembed]`: Qdrant async client with built-in fastembed support
|
|
70
|
+
- `langchain-text-splitters`: Text chunking utilities
|
|
71
|
+
|
|
72
|
+
## References
|
|
73
|
+
|
|
74
|
+
- [Qdrant Hybrid Queries](https://qdrant.tech/documentation/concepts/hybrid-queries/)
|
|
75
|
+
- [fastembed Models](https://qdrant.github.io/fastembed/examples/Supported_Models/)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling", "hatch-vcs"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "vision-agents-plugins-qdrant"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Qdrant RAG integration for Vision Agents with hybrid search"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
keywords = ["qdrant", "RAG", "vector-search", "hybrid-search", "AI", "voice agents", "agents"]
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
license = "MIT"
|
|
13
|
+
dependencies = [
|
|
14
|
+
"vision-agents",
|
|
15
|
+
"qdrant-client[fastembed]>=1.12.0,<1.18.0",
|
|
16
|
+
"langchain-text-splitters>=1.1.1",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[project.urls]
|
|
20
|
+
Documentation = "https://visionagents.ai/"
|
|
21
|
+
Website = "https://visionagents.ai/"
|
|
22
|
+
Source = "https://github.com/GetStream/Vision-Agents"
|
|
23
|
+
|
|
24
|
+
[tool.hatch.version]
|
|
25
|
+
source = "vcs"
|
|
26
|
+
raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
|
|
27
|
+
|
|
28
|
+
[tool.hatch.build.targets.wheel]
|
|
29
|
+
packages = ["vision_agents"]
|
|
30
|
+
|
|
31
|
+
[tool.hatch.build.targets.sdist]
|
|
32
|
+
include = ["/vision_agents"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
[dependency-groups]
|
|
36
|
+
dev = [
|
|
37
|
+
"pytest>=8.4.1",
|
|
38
|
+
"pytest-asyncio>=1.0.0",
|
|
39
|
+
"testcontainers[redis,qdrant]>=4.0.0",
|
|
40
|
+
]
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Qdrant Hybrid Search RAG implementation.
|
|
3
|
+
|
|
4
|
+
Uses Qdrant's built-in fastembed integration for dense and BM25 sparse embeddings.
|
|
5
|
+
Hybrid search uses Qdrant's native Reciprocal Rank Fusion (RRF).
|
|
6
|
+
See: https://qdrant.tech/documentation/concepts/hybrid-queries/
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
from vision_agents.plugins import qdrant
|
|
10
|
+
|
|
11
|
+
# Initialize with a Qdrant collection
|
|
12
|
+
rag = qdrant.QdrantRAG(collection="my-knowledge")
|
|
13
|
+
await rag.add_directory("./knowledge")
|
|
14
|
+
|
|
15
|
+
# Hybrid search (vector + BM25)
|
|
16
|
+
results = await rag.search("How does the chat API work?")
|
|
17
|
+
|
|
18
|
+
# Vector-only search
|
|
19
|
+
results = await rag.search("How does the chat API work?", mode="vector")
|
|
20
|
+
|
|
21
|
+
# BM25-only search
|
|
22
|
+
results = await rag.search("chat API pricing", mode="bm25")
|
|
23
|
+
|
|
24
|
+
Environment variables:
|
|
25
|
+
QDRANT_API_KEY: Qdrant API key (Optional)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
import os
|
|
30
|
+
import uuid
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Literal
|
|
33
|
+
|
|
34
|
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
35
|
+
from qdrant_client import AsyncQdrantClient
|
|
36
|
+
from qdrant_client import models
|
|
37
|
+
|
|
38
|
+
from vision_agents.core.rag import RAG, Document
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
_DENSE = "dense"
|
|
43
|
+
_SPARSE = "sparse"
|
|
44
|
+
_DEFAULT_DENSE_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
|
45
|
+
_DEFAULT_SPARSE_MODEL = "Qdrant/bm25"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class QdrantRAG(RAG):
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
collection: str,
|
|
52
|
+
url: str = "http://localhost:6333",
|
|
53
|
+
api_key: str | None = None,
|
|
54
|
+
dense_model: str = _DEFAULT_DENSE_MODEL,
|
|
55
|
+
sparse_model: str = _DEFAULT_SPARSE_MODEL,
|
|
56
|
+
chunk_size: int = 10000,
|
|
57
|
+
chunk_overlap: int = 200,
|
|
58
|
+
cloud_inference: bool = False,
|
|
59
|
+
):
|
|
60
|
+
self._collection = collection
|
|
61
|
+
self._client = AsyncQdrantClient(
|
|
62
|
+
url=url,
|
|
63
|
+
api_key=api_key or os.environ.get("QDRANT_API_KEY"),
|
|
64
|
+
cloud_inference=cloud_inference,
|
|
65
|
+
)
|
|
66
|
+
self._dense_model = dense_model
|
|
67
|
+
self._sparse_model = sparse_model
|
|
68
|
+
self._splitter = RecursiveCharacterTextSplitter(
|
|
69
|
+
chunk_size=chunk_size,
|
|
70
|
+
chunk_overlap=chunk_overlap,
|
|
71
|
+
length_function=len,
|
|
72
|
+
)
|
|
73
|
+
self._indexed_files: list[str] = []
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def indexed_files(self) -> list[str]:
|
|
77
|
+
return self._indexed_files
|
|
78
|
+
|
|
79
|
+
async def _ensure_collection(self) -> None:
|
|
80
|
+
if not await self._client.collection_exists(self._collection):
|
|
81
|
+
await self._client.create_collection(
|
|
82
|
+
collection_name=self._collection,
|
|
83
|
+
vectors_config={
|
|
84
|
+
_DENSE: models.VectorParams(
|
|
85
|
+
size=self._client.get_embedding_size(self._dense_model),
|
|
86
|
+
distance=models.Distance.COSINE,
|
|
87
|
+
),
|
|
88
|
+
},
|
|
89
|
+
sparse_vectors_config={
|
|
90
|
+
_SPARSE: models.SparseVectorParams(
|
|
91
|
+
index=models.SparseIndexParams(on_disk=False),
|
|
92
|
+
),
|
|
93
|
+
},
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
async def add_documents(self, documents: list[Document]) -> int:
|
|
97
|
+
if not documents:
|
|
98
|
+
return 0
|
|
99
|
+
|
|
100
|
+
all_chunks: list[str] = []
|
|
101
|
+
chunk_sources: list[tuple[str, int]] = []
|
|
102
|
+
indexed_sources: list[str] = []
|
|
103
|
+
|
|
104
|
+
for doc in documents:
|
|
105
|
+
chunks = self._splitter.split_text(doc.text)
|
|
106
|
+
if not chunks:
|
|
107
|
+
logger.warning(f"No chunks generated from document: {doc.source}")
|
|
108
|
+
continue
|
|
109
|
+
for i, chunk in enumerate(chunks):
|
|
110
|
+
all_chunks.append(chunk)
|
|
111
|
+
chunk_sources.append((doc.source, i))
|
|
112
|
+
indexed_sources.append(doc.source)
|
|
113
|
+
|
|
114
|
+
if not all_chunks:
|
|
115
|
+
return 0
|
|
116
|
+
|
|
117
|
+
await self._ensure_collection()
|
|
118
|
+
await self._client.upsert(
|
|
119
|
+
collection_name=self._collection,
|
|
120
|
+
points=[
|
|
121
|
+
models.PointStruct(
|
|
122
|
+
id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"{source}_{idx}")),
|
|
123
|
+
vector={
|
|
124
|
+
_DENSE: models.Document(text=chunk, model=self._dense_model),
|
|
125
|
+
_SPARSE: models.Document(text=chunk, model=self._sparse_model),
|
|
126
|
+
},
|
|
127
|
+
payload={"text": chunk, "source": source, "chunk_index": idx},
|
|
128
|
+
)
|
|
129
|
+
for chunk, (source, idx) in zip(all_chunks, chunk_sources)
|
|
130
|
+
],
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
self._indexed_files.extend(indexed_sources)
|
|
134
|
+
logger.info(f"Indexed {len(all_chunks)} chunks from {len(documents)} documents")
|
|
135
|
+
return len(all_chunks)
|
|
136
|
+
|
|
137
|
+
async def _search_single(
|
|
138
|
+
self, query: str, using: str, limit: int
|
|
139
|
+
) -> list[models.ScoredPoint]:
|
|
140
|
+
model = self._dense_model if using == _DENSE else self._sparse_model
|
|
141
|
+
return (
|
|
142
|
+
await self._client.query_points(
|
|
143
|
+
collection_name=self._collection,
|
|
144
|
+
query=models.Document(text=query, model=model),
|
|
145
|
+
using=using,
|
|
146
|
+
limit=limit,
|
|
147
|
+
with_payload=["text", "source"],
|
|
148
|
+
)
|
|
149
|
+
).points
|
|
150
|
+
|
|
151
|
+
async def search(
|
|
152
|
+
self,
|
|
153
|
+
query: str,
|
|
154
|
+
top_k: int = 3,
|
|
155
|
+
mode: Literal["hybrid", "vector", "bm25"] = "hybrid",
|
|
156
|
+
) -> str:
|
|
157
|
+
if not await self._client.collection_exists(self._collection):
|
|
158
|
+
return "No relevant information found in the knowledge base."
|
|
159
|
+
|
|
160
|
+
if mode == "vector":
|
|
161
|
+
points = await self._search_single(query, _DENSE, top_k)
|
|
162
|
+
elif mode == "bm25":
|
|
163
|
+
points = await self._search_single(query, _SPARSE, top_k)
|
|
164
|
+
else:
|
|
165
|
+
results = await self._client.query_points(
|
|
166
|
+
collection_name=self._collection,
|
|
167
|
+
prefetch=[
|
|
168
|
+
models.Prefetch(
|
|
169
|
+
query=models.Document(text=query, model=self._dense_model),
|
|
170
|
+
using=_DENSE,
|
|
171
|
+
limit=top_k,
|
|
172
|
+
),
|
|
173
|
+
models.Prefetch(
|
|
174
|
+
query=models.Document(text=query, model=self._sparse_model),
|
|
175
|
+
using=_SPARSE,
|
|
176
|
+
limit=top_k,
|
|
177
|
+
),
|
|
178
|
+
],
|
|
179
|
+
query=models.FusionQuery(fusion=models.Fusion.RRF),
|
|
180
|
+
limit=top_k,
|
|
181
|
+
with_payload=["text", "source"],
|
|
182
|
+
)
|
|
183
|
+
points = results.points
|
|
184
|
+
|
|
185
|
+
if not points:
|
|
186
|
+
return "No relevant information found in the knowledge base."
|
|
187
|
+
|
|
188
|
+
formatted_results = []
|
|
189
|
+
for i, p in enumerate(points, 1):
|
|
190
|
+
payload = p.payload or {}
|
|
191
|
+
formatted_results.append(
|
|
192
|
+
f"[{i}] From {payload.get('source', 'unknown')}:\n{payload.get('text', '')}"
|
|
193
|
+
)
|
|
194
|
+
return "\n\n".join(formatted_results)
|
|
195
|
+
|
|
196
|
+
async def clear(self) -> None:
|
|
197
|
+
if await self._client.collection_exists(self._collection):
|
|
198
|
+
await self._client.delete_collection(self._collection)
|
|
199
|
+
self._indexed_files = []
|
|
200
|
+
logger.info(f"Cleared collection: {self._collection}")
|
|
201
|
+
|
|
202
|
+
async def close(self) -> None:
|
|
203
|
+
await self._client.close()
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
async def create_rag(
|
|
207
|
+
collection: str,
|
|
208
|
+
knowledge_dir: str | Path,
|
|
209
|
+
extensions: list[str] | None = None,
|
|
210
|
+
url: str = "http://localhost:6333",
|
|
211
|
+
api_key: str | None = None,
|
|
212
|
+
dense_model: str = _DEFAULT_DENSE_MODEL,
|
|
213
|
+
sparse_model: str = _DEFAULT_SPARSE_MODEL,
|
|
214
|
+
chunk_size: int = 10000,
|
|
215
|
+
chunk_overlap: int = 200,
|
|
216
|
+
cloud_inference: bool = False,
|
|
217
|
+
) -> QdrantRAG:
|
|
218
|
+
rag = QdrantRAG(
|
|
219
|
+
collection=collection,
|
|
220
|
+
url=url,
|
|
221
|
+
api_key=api_key,
|
|
222
|
+
dense_model=dense_model,
|
|
223
|
+
sparse_model=sparse_model,
|
|
224
|
+
chunk_size=chunk_size,
|
|
225
|
+
chunk_overlap=chunk_overlap,
|
|
226
|
+
cloud_inference=cloud_inference,
|
|
227
|
+
)
|
|
228
|
+
try:
|
|
229
|
+
await rag.add_directory(knowledge_dir, extensions=extensions)
|
|
230
|
+
except Exception:
|
|
231
|
+
await rag.close()
|
|
232
|
+
raise
|
|
233
|
+
return rag
|