vision-agents-plugins-turbopuffer 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .cursor/*
7
+ # Distribution / packaging
8
+ .Python
9
+ build/
10
+ dist/
11
+ downloads/
12
+ develop-eggs/
13
+ eggs/
14
+ .eggs/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ share/python-wheels/
21
+ pip-wheel-metadata/
22
+ MANIFEST
23
+ *.egg-info/
24
+ *.egg
25
+
26
+ # Installer logs
27
+ pip-log.txt
28
+ pip-delete-this-directory.txt
29
+
30
+ # Unit test / coverage reports
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+ .coverage
35
+ .coverage.*
36
+ .cache
37
+ coverage.xml
38
+ nosetests.xml
39
+ *.cover
40
+ *.py,cover
41
+ .hypothesis/
42
+ .pytest_cache/
43
+
44
+ # Type checker / lint caches
45
+ .mypy_cache/
46
+ .dmypy.json
47
+ dmypy.json
48
+ .pytype/
49
+ .pyre/
50
+ .ruff_cache/
51
+
52
+ # Environments
53
+ .venv
54
+ env/
55
+ venv/
56
+ ENV/
57
+ env.bak/
58
+ venv.bak/
59
+ .env
60
+ .env.local
61
+ .env.*.local
62
+ .env.bak
63
+ pyvenv.cfg
64
+ .python-version
65
+
66
+ # Editors / IDEs
67
+ .vscode/
68
+ .idea/
69
+
70
+ # Jupyter Notebook
71
+ .ipynb_checkpoints/
72
+
73
+ # OS / Misc
74
+ .DS_Store
75
+ *.log
76
+
77
+ # Tooling & repo-specific
78
+ pyrightconfig.json
79
+ shell.nix
80
+ bin/*
81
+ lib/*
82
+ stream-py/
83
+
84
+ # Artifacts / assets
85
+ *.pt
86
+ *.kef
87
+ *.onnx
88
+ profile.html
89
+
90
+ /opencode.json
@@ -0,0 +1,84 @@
1
+ Metadata-Version: 2.4
2
+ Name: vision-agents-plugins-turbopuffer
3
+ Version: 0.3.0
4
+ Summary: TurboPuffer RAG integration for Vision Agents with hybrid search
5
+ Project-URL: Documentation, https://visionagents.ai/
6
+ Project-URL: Website, https://visionagents.ai/
7
+ Project-URL: Source, https://github.com/GetStream/Vision-Agents
8
+ License-Expression: MIT
9
+ Keywords: AI,RAG,agents,hybrid-search,turbopuffer,vector-search,voice agents
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: langchain-google-genai>=2.1.4
12
+ Requires-Dist: langchain-text-splitters>=0.3.8
13
+ Requires-Dist: turbopuffer>=0.3.3
14
+ Requires-Dist: vision-agents
15
+ Description-Content-Type: text/markdown
16
+
17
+ # TurboPuffer RAG Plugin
18
+
19
+ Hybrid search RAG (Retrieval Augmented Generation) implementation using TurboPuffer for vector + BM25 full-text search, with Gemini for embeddings.
20
+
21
+ ## Features
22
+
23
+ - **Hybrid Search**: Combines vector (semantic) and BM25 (keyword) search for better retrieval quality
24
+ - **Reciprocal Rank Fusion**: Merges results from multiple search strategies
25
+ - **Gemini Embeddings**: Uses Google's Gemini embedding model for high-quality vectors
26
+ - **Low-latency Queries**: Supports cache warming for fast query responses
27
+ - **Implements RAG Interface**: Compatible with Vision Agents RAG base class
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ uv add vision-agents[turbopuffer]
33
+ ```
34
+
35
+ ## Usage
36
+
37
+ ```python
38
+ from vision_agents.plugins import turbopuffer
39
+
40
+ # Initialize RAG
41
+ rag = turbopuffer.TurboPufferRAG(namespace="my-knowledge")
42
+ await rag.add_directory("./knowledge")
43
+
44
+ # Hybrid search (default)
45
+ results = await rag.search("How does the chat API work?")
46
+
47
+ # Vector-only search
48
+ results = await rag.search("How does the chat API work?", mode="vector")
49
+
50
+ # BM25-only search
51
+ results = await rag.search("chat API pricing", mode="bm25")
52
+
53
+ # Or use convenience function
54
+ rag = await turbopuffer.create_rag(
55
+ namespace="product-knowledge",
56
+ knowledge_dir="./knowledge"
57
+ )
58
+ ```
59
+
60
+ ## Configuration
61
+
62
+ | Parameter | Description | Default |
63
+ |-----------|-------------|---------|
64
+ | `namespace` | TurboPuffer namespace for storing vectors | Required |
65
+ | `embedding_model` | Gemini embedding model | `models/gemini-embedding-001` |
66
+ | `chunk_size` | Size of text chunks for splitting documents | `10000` |
67
+ | `chunk_overlap` | Overlap between chunks for context continuity | `200` |
68
+ | `region` | TurboPuffer region | `gcp-us-central1` |
69
+
70
+ ## Environment Variables
71
+
72
+ - `TURBO_PUFFER_KEY`: TurboPuffer API key
73
+ - `GOOGLE_API_KEY`: Google API key (for Gemini embeddings)
74
+
75
+ ## Dependencies
76
+
77
+ - `turbopuffer`: TurboPuffer vector database client
78
+ - `langchain-google-genai`: Gemini embeddings
79
+ - `langchain-text-splitters`: Text chunking utilities
80
+
81
+ ## References
82
+
83
+ - [TurboPuffer Hybrid Search](https://turbopuffer.com/docs/hybrid)
84
+ - [MTEB Embedding Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)
@@ -0,0 +1,68 @@
1
+ # TurboPuffer RAG Plugin
2
+
3
+ Hybrid search RAG (Retrieval Augmented Generation) implementation using TurboPuffer for vector + BM25 full-text search, with Gemini for embeddings.
4
+
5
+ ## Features
6
+
7
+ - **Hybrid Search**: Combines vector (semantic) and BM25 (keyword) search for better retrieval quality
8
+ - **Reciprocal Rank Fusion**: Merges results from multiple search strategies
9
+ - **Gemini Embeddings**: Uses Google's Gemini embedding model for high-quality vectors
10
+ - **Low-latency Queries**: Supports cache warming for fast query responses
11
+ - **Implements RAG Interface**: Compatible with Vision Agents RAG base class
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ uv add vision-agents[turbopuffer]
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```python
22
+ from vision_agents.plugins import turbopuffer
23
+
24
+ # Initialize RAG
25
+ rag = turbopuffer.TurboPufferRAG(namespace="my-knowledge")
26
+ await rag.add_directory("./knowledge")
27
+
28
+ # Hybrid search (default)
29
+ results = await rag.search("How does the chat API work?")
30
+
31
+ # Vector-only search
32
+ results = await rag.search("How does the chat API work?", mode="vector")
33
+
34
+ # BM25-only search
35
+ results = await rag.search("chat API pricing", mode="bm25")
36
+
37
+ # Or use convenience function
38
+ rag = await turbopuffer.create_rag(
39
+ namespace="product-knowledge",
40
+ knowledge_dir="./knowledge"
41
+ )
42
+ ```
43
+
44
+ ## Configuration
45
+
46
+ | Parameter | Description | Default |
47
+ |-----------|-------------|---------|
48
+ | `namespace` | TurboPuffer namespace for storing vectors | Required |
49
+ | `embedding_model` | Gemini embedding model | `models/gemini-embedding-001` |
50
+ | `chunk_size` | Size of text chunks for splitting documents | `10000` |
51
+ | `chunk_overlap` | Overlap between chunks for context continuity | `200` |
52
+ | `region` | TurboPuffer region | `gcp-us-central1` |
53
+
54
+ ## Environment Variables
55
+
56
+ - `TURBO_PUFFER_KEY`: TurboPuffer API key
57
+ - `GOOGLE_API_KEY`: Google API key (for Gemini embeddings)
58
+
59
+ ## Dependencies
60
+
61
+ - `turbopuffer`: TurboPuffer vector database client
62
+ - `langchain-google-genai`: Gemini embeddings
63
+ - `langchain-text-splitters`: Text chunking utilities
64
+
65
+ ## References
66
+
67
+ - [TurboPuffer Hybrid Search](https://turbopuffer.com/docs/hybrid)
68
+ - [MTEB Embedding Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-vcs"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vision-agents-plugins-turbopuffer"
7
+ dynamic = ["version"]
8
+ description = "TurboPuffer RAG integration for Vision Agents with hybrid search"
9
+ readme = "README.md"
10
+ keywords = ["turbopuffer", "RAG", "vector-search", "hybrid-search", "AI", "voice agents", "agents"]
11
+ requires-python = ">=3.10"
12
+ license = "MIT"
13
+ dependencies = [
14
+ "vision-agents",
15
+ "turbopuffer>=0.3.3",
16
+ "langchain-google-genai>=2.1.4",
17
+ "langchain-text-splitters>=0.3.8",
18
+ ]
19
+
20
+ [project.urls]
21
+ Documentation = "https://visionagents.ai/"
22
+ Website = "https://visionagents.ai/"
23
+ Source = "https://github.com/GetStream/Vision-Agents"
24
+
25
+ [tool.hatch.version]
26
+ source = "vcs"
27
+ raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
28
+
29
+ [tool.hatch.build.targets.wheel]
30
+ packages = ["."]
31
+
32
+ [tool.hatch.build.targets.sdist]
33
+ include = ["/vision_agents"]
34
+
35
+
36
+ [dependency-groups]
37
+ dev = [
38
+ "pytest>=8.4.1",
39
+ "pytest-asyncio>=1.0.0",
40
+ ]
@@ -0,0 +1,3 @@
1
+ from .turbopuffer_rag import TurboPufferRAG, create_rag
2
+
3
+ __all__ = ["TurboPufferRAG", "create_rag"]
@@ -0,0 +1,401 @@
1
+ """
2
+ TurboPuffer Hybrid Search RAG implementation.
3
+
4
+ This module provides a hybrid search RAG (Retrieval Augmented Generation) implementation
5
+ using TurboPuffer for vector + BM25 full-text search, with Gemini for embeddings.
6
+
7
+ Hybrid search combines:
8
+ - Vector search: Semantic similarity using embeddings
9
+ - BM25 full-text search: Keyword matching for exact terms (SKUs, names, etc.)
10
+
11
+ Results are combined using Reciprocal Rank Fusion (RRF) for better retrieval quality.
12
+ See: https://turbopuffer.com/docs/hybrid
13
+
14
+ Usage:
15
+ from vision_agents.plugins import turbopuffer
16
+
17
+ # Initialize with knowledge directory
18
+ rag = turbopuffer.TurboPufferRAG(namespace="my-knowledge")
19
+ await rag.add_directory("./knowledge")
20
+
21
+ # Hybrid search (vector + BM25)
22
+ results = await rag.search("How does the chat API work?")
23
+
24
+ # Vector-only search
25
+ results = await rag.search("How does the chat API work?", mode="vector")
26
+
27
+ # BM25-only search
28
+ results = await rag.search("chat API pricing", mode="bm25")
29
+
30
+ Environment variables:
31
+ TURBO_PUFFER_KEY: TurboPuffer API key
32
+ GOOGLE_API_KEY: Google API key (for Gemini embeddings)
33
+
34
+ Note:
35
+ For embedding model selection best practices and benchmarks, see:
36
+ https://huggingface.co/spaces/mteb/leaderboard
37
+ """
38
+
39
+ import asyncio
40
+ import logging
41
+ import os
42
+ from collections import defaultdict
43
+ from pathlib import Path
44
+ from typing import Literal
45
+
46
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
47
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
48
+ from turbopuffer import AsyncTurbopuffer, NotFoundError
49
+
50
+ from vision_agents.core.rag import RAG, Document
51
+
52
+ logger = logging.getLogger(__name__)
53
+
54
+ # Schema for hybrid search - enables BM25 full-text search on the text field
55
+ HYBRID_SCHEMA = {
56
+ "text": {
57
+ "type": "string",
58
+ "full_text_search": True,
59
+ },
60
+ "source": {"type": "string"},
61
+ "chunk_index": {"type": "uint"},
62
+ }
63
+
64
+
65
+ def reciprocal_rank_fusion(
66
+ ranked_lists: list[list[tuple[str, float]]],
67
+ k: int = 60,
68
+ ) -> list[tuple[str, float]]:
69
+ """
70
+ Combine multiple ranked lists using Reciprocal Rank Fusion (RRF).
71
+
72
+ RRF is a simple but effective rank fusion algorithm that combines
73
+ results from multiple search strategies.
74
+
75
+ Args:
76
+ ranked_lists: List of ranked results, each as [(id, score), ...].
77
+ k: RRF constant (default 60, as per original paper).
78
+
79
+ Returns:
80
+ Fused ranking as [(id, rrf_score), ...] sorted by score descending.
81
+ """
82
+ rrf_scores: dict[str, float] = defaultdict(float)
83
+
84
+ for ranked_list in ranked_lists:
85
+ for rank, (doc_id, _) in enumerate(ranked_list, start=1):
86
+ rrf_scores[doc_id] += 1.0 / (k + rank)
87
+
88
+ return sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)
89
+
90
+
91
+ class TurboPufferRAG(RAG):
92
+ """
93
+ Hybrid search RAG using TurboPuffer (vector + BM25) and Gemini embeddings.
94
+
95
+ Combines semantic vector search with BM25 keyword search for better
96
+ retrieval quality. Uses Reciprocal Rank Fusion to merge results.
97
+
98
+ For hybrid search best practices, see:
99
+ https://turbopuffer.com/docs/hybrid
100
+
101
+ For embedding model benchmarks, see the MTEB leaderboard:
102
+ https://huggingface.co/spaces/mteb/leaderboard
103
+ """
104
+
105
+ def __init__(
106
+ self,
107
+ namespace: str,
108
+ embedding_model: str = "models/gemini-embedding-001",
109
+ chunk_size: int = 10000,
110
+ chunk_overlap: int = 200,
111
+ region: str = "gcp-us-central1",
112
+ ):
113
+ """
114
+ Initialize the TurboPuffer Hybrid RAG.
115
+
116
+ Args:
117
+ namespace: TurboPuffer namespace for storing vectors.
118
+ embedding_model: Gemini embedding model (default: gemini-embedding-001).
119
+ chunk_size: Size of text chunks for splitting documents.
120
+ chunk_overlap: Overlap between chunks for context continuity.
121
+ region: TurboPuffer region (default "gcp-us-central1").
122
+ """
123
+ self._namespace_name = namespace
124
+
125
+ # Initialize async TurboPuffer client
126
+ self._client = AsyncTurbopuffer(
127
+ api_key=os.environ.get("TURBO_PUFFER_KEY"),
128
+ region=region,
129
+ )
130
+
131
+ # Initialize Gemini embeddings
132
+ self._embeddings = GoogleGenerativeAIEmbeddings(model=embedding_model)
133
+
134
+ # Initialize text splitter
135
+ self._splitter = RecursiveCharacterTextSplitter(
136
+ chunk_size=chunk_size,
137
+ chunk_overlap=chunk_overlap,
138
+ length_function=len,
139
+ )
140
+
141
+ self._indexed_files: list[str] = []
142
+ # Cache for retrieved documents (id -> attributes)
143
+ self._doc_cache: dict[str, dict] = {}
144
+
145
+ @property
146
+ def indexed_files(self) -> list[str]:
147
+ """List of indexed file names."""
148
+ return self._indexed_files
149
+
150
+ async def add_documents(self, documents: list[Document]) -> int:
151
+ """
152
+ Add documents to the RAG index.
153
+
154
+ Args:
155
+ documents: List of documents to index.
156
+
157
+ Returns:
158
+ Number of chunks indexed.
159
+ """
160
+ if not documents:
161
+ return 0
162
+
163
+ all_chunks: list[str] = []
164
+ chunk_sources: list[tuple[str, int]] = [] # (source, chunk_index)
165
+
166
+ for doc in documents:
167
+ chunks = self._splitter.split_text(doc.text)
168
+ if not chunks:
169
+ logger.warning(f"No chunks generated from document: {doc.source}")
170
+ continue
171
+ for i, chunk in enumerate(chunks):
172
+ all_chunks.append(chunk)
173
+ chunk_sources.append((doc.source, i))
174
+ self._indexed_files.append(doc.source)
175
+
176
+ if not all_chunks:
177
+ return 0
178
+
179
+ loop = asyncio.get_event_loop()
180
+ embeddings = await loop.run_in_executor(
181
+ None, self._embeddings.embed_documents, all_chunks
182
+ )
183
+
184
+ rows = []
185
+ for chunk, embedding, (source, idx) in zip(
186
+ all_chunks, embeddings, chunk_sources
187
+ ):
188
+ rows.append(
189
+ {
190
+ "id": f"{source}_{idx}",
191
+ "vector": embedding,
192
+ "text": chunk,
193
+ "source": source,
194
+ "chunk_index": idx,
195
+ }
196
+ )
197
+
198
+ ns = self._client.namespace(self._namespace_name)
199
+ await ns.write(
200
+ upsert_rows=rows,
201
+ distance_metric="cosine_distance",
202
+ schema=HYBRID_SCHEMA, # type: ignore[arg-type]
203
+ )
204
+
205
+ logger.info(f"Indexed {len(all_chunks)} chunks from {len(documents)} documents")
206
+ return len(all_chunks)
207
+
208
+ async def add_directory(
209
+ self,
210
+ path: str | Path,
211
+ extensions: list[str] | None = None,
212
+ ) -> int:
213
+ """
214
+ Add all files from a directory to the RAG index.
215
+
216
+ Args:
217
+ path: Path to directory containing files.
218
+ extensions: File extensions to include (e.g., ['.md', '.txt']).
219
+
220
+ Returns:
221
+ Total number of chunks indexed.
222
+ """
223
+ total_chunks = await super().add_directory(path, extensions)
224
+
225
+ # Warm cache for low-latency queries
226
+ if total_chunks > 0:
227
+ await self.warm_cache()
228
+
229
+ return total_chunks
230
+
231
+ async def warm_cache(self) -> None:
232
+ """
233
+ Hint TurboPuffer to prepare for low-latency requests.
234
+
235
+ Call this after indexing to ensure fast query responses.
236
+ See: https://turbopuffer.com/docs/warm-cache
237
+ """
238
+ ns = self._client.namespace(self._namespace_name)
239
+ await ns.hint_cache_warm()
240
+ logger.info(f"Cache warmed for namespace: {self._namespace_name}")
241
+
242
+ async def _vector_search(self, query: str, top_k: int) -> list[tuple[str, float]]:
243
+ """Run vector similarity search."""
244
+ loop = asyncio.get_event_loop()
245
+ query_embedding = await loop.run_in_executor(
246
+ None, self._embeddings.embed_query, query
247
+ )
248
+
249
+ ns = self._client.namespace(self._namespace_name)
250
+ try:
251
+ results = await ns.query(
252
+ rank_by=("vector", "ANN", query_embedding),
253
+ top_k=top_k,
254
+ include_attributes=["text", "source"],
255
+ )
256
+ except NotFoundError:
257
+ return []
258
+
259
+ ranked = []
260
+ for row in results.rows or []:
261
+ doc_id = str(row.id)
262
+ # Cache the document for later retrieval
263
+ self._doc_cache[doc_id] = {
264
+ "text": row["text"] or "",
265
+ "source": row["source"] or "unknown",
266
+ }
267
+ # Lower distance = better, so we use negative for ranking
268
+ dist = row["$dist"] or 0
269
+ ranked.append((doc_id, -dist))
270
+
271
+ return ranked
272
+
273
+ async def _bm25_search(self, query: str, top_k: int) -> list[tuple[str, float]]:
274
+ """Run BM25 full-text search."""
275
+ ns = self._client.namespace(self._namespace_name)
276
+ try:
277
+ results = await ns.query(
278
+ rank_by=("text", "BM25", query),
279
+ top_k=top_k,
280
+ include_attributes=["text", "source"],
281
+ )
282
+ except NotFoundError:
283
+ return []
284
+
285
+ ranked = []
286
+ for row in results.rows or []:
287
+ doc_id = str(row.id)
288
+ # Cache the document for later retrieval
289
+ self._doc_cache[doc_id] = {
290
+ "text": row["text"] or "",
291
+ "source": row["source"] or "unknown",
292
+ }
293
+ # BM25 score (higher = better)
294
+ score = row["$dist"] or 0
295
+ ranked.append((doc_id, score))
296
+
297
+ return ranked
298
+
299
+ async def search(
300
+ self,
301
+ query: str,
302
+ top_k: int = 3,
303
+ mode: Literal["hybrid", "vector", "bm25"] = "hybrid",
304
+ ) -> str:
305
+ """
306
+ Search the knowledge base using hybrid, vector, or BM25 search.
307
+
308
+ Hybrid search combines vector (semantic) and BM25 (keyword) search
309
+ using Reciprocal Rank Fusion for better retrieval quality.
310
+
311
+ Args:
312
+ query: Search query.
313
+ top_k: Number of results to return.
314
+ mode: Search mode - "hybrid" (default), "vector", or "bm25".
315
+
316
+ Returns:
317
+ Formatted string with search results.
318
+ """
319
+ # Clear doc cache for fresh search
320
+ self._doc_cache.clear()
321
+
322
+ # Fetch more candidates for fusion, then trim to top_k
323
+ fetch_k = top_k * 3
324
+
325
+ if mode == "vector":
326
+ ranked = await self._vector_search(query, fetch_k)
327
+ final_ids = [doc_id for doc_id, _ in ranked[:top_k]]
328
+ elif mode == "bm25":
329
+ ranked = await self._bm25_search(query, fetch_k)
330
+ final_ids = [doc_id for doc_id, _ in ranked[:top_k]]
331
+ else:
332
+ # Hybrid: run both searches in parallel and fuse
333
+ vector_results, bm25_results = await asyncio.gather(
334
+ self._vector_search(query, fetch_k),
335
+ self._bm25_search(query, fetch_k),
336
+ )
337
+
338
+ # Combine using Reciprocal Rank Fusion
339
+ fused = reciprocal_rank_fusion([vector_results, bm25_results])
340
+ final_ids = [doc_id for doc_id, _ in fused[:top_k]]
341
+
342
+ if not final_ids:
343
+ return "No relevant information found in the knowledge base."
344
+
345
+ # Format results from cache
346
+ formatted_results = []
347
+ for i, doc_id in enumerate(final_ids, 1):
348
+ doc = self._doc_cache.get(doc_id, {})
349
+ source = doc.get("source", "unknown")
350
+ text = doc.get("text", "")
351
+ formatted_results.append(f"[{i}] From {source}:\n{text}")
352
+
353
+ return "\n\n".join(formatted_results)
354
+
355
+ async def clear(self) -> None:
356
+ """Clear all vectors from the namespace."""
357
+ ns = self._client.namespace(self._namespace_name)
358
+ try:
359
+ await ns.delete_all()
360
+ except NotFoundError:
361
+ pass # Namespace doesn't exist, nothing to clear
362
+ self._indexed_files = []
363
+ self._doc_cache.clear()
364
+ logger.info(f"Cleared namespace: {self._namespace_name}")
365
+
366
+ async def close(self) -> None:
367
+ """Close the TurboPuffer client."""
368
+ await self._client.close()
369
+
370
+
371
+ async def create_rag(
372
+ namespace: str,
373
+ knowledge_dir: str | Path,
374
+ extensions: list[str] | None = None,
375
+ region: str = "gcp-us-central1",
376
+ ) -> TurboPufferRAG:
377
+ """
378
+ Convenience function to create and initialize a TurboPuffer Hybrid RAG.
379
+
380
+ Args:
381
+ namespace: TurboPuffer namespace name.
382
+ knowledge_dir: Directory containing knowledge files.
383
+ extensions: File extensions to include.
384
+ region: TurboPuffer region.
385
+
386
+ Returns:
387
+ Initialized TurboPufferRAG with files indexed.
388
+
389
+ Example:
390
+ rag = await create_rag(
391
+ namespace="product-knowledge",
392
+ knowledge_dir="./knowledge"
393
+ )
394
+
395
+ @llm.register_function(description="Search knowledge base")
396
+ async def search_knowledge(query: str) -> str:
397
+ return await rag.search(query) # Uses hybrid search by default
398
+ """
399
+ rag = TurboPufferRAG(namespace=namespace, region=region)
400
+ await rag.add_directory(knowledge_dir, extensions=extensions)
401
+ return rag