stache-ai 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stache_ai-0.1.0/PKG-INFO +263 -0
- stache_ai-0.1.0/README.md +206 -0
- stache_ai-0.1.0/pyproject.toml +158 -0
- stache_ai-0.1.0/setup.cfg +4 -0
- stache_ai-0.1.0/src/stache_ai/__init__.py +3 -0
- stache_ai-0.1.0/src/stache_ai/api/__init__.py +1 -0
- stache_ai-0.1.0/src/stache_ai/api/main.py +92 -0
- stache_ai-0.1.0/src/stache_ai/api/routes/__init__.py +1 -0
- stache_ai-0.1.0/src/stache_ai/api/routes/capture.py +53 -0
- stache_ai-0.1.0/src/stache_ai/api/routes/documents.py +854 -0
- stache_ai-0.1.0/src/stache_ai/api/routes/health.py +40 -0
- stache_ai-0.1.0/src/stache_ai/api/routes/insights.py +139 -0
- stache_ai-0.1.0/src/stache_ai/api/routes/models.py +46 -0
- stache_ai-0.1.0/src/stache_ai/api/routes/namespaces.py +458 -0
- stache_ai-0.1.0/src/stache_ai/api/routes/pending.py +196 -0
- stache_ai-0.1.0/src/stache_ai/api/routes/query.py +54 -0
- stache_ai-0.1.0/src/stache_ai/api/routes/upload.py +211 -0
- stache_ai-0.1.0/src/stache_ai/chunking/__init__.py +6 -0
- stache_ai-0.1.0/src/stache_ai/chunking/base.py +43 -0
- stache_ai-0.1.0/src/stache_ai/chunking/character.py +57 -0
- stache_ai-0.1.0/src/stache_ai/chunking/factory.py +53 -0
- stache_ai-0.1.0/src/stache_ai/chunking/hierarchical.py +204 -0
- stache_ai-0.1.0/src/stache_ai/chunking/markdown.py +124 -0
- stache_ai-0.1.0/src/stache_ai/chunking/recursive.py +151 -0
- stache_ai-0.1.0/src/stache_ai/chunking/semantic.py +107 -0
- stache_ai-0.1.0/src/stache_ai/chunking/strategies/__init__.py +26 -0
- stache_ai-0.1.0/src/stache_ai/chunking/transcript.py +275 -0
- stache_ai-0.1.0/src/stache_ai/cli/__init__.py +6 -0
- stache_ai-0.1.0/src/stache_ai/cli/__main__.py +6 -0
- stache_ai-0.1.0/src/stache_ai/cli/backfill_document_index.py +447 -0
- stache_ai-0.1.0/src/stache_ai/cli/dump_cmd.py +177 -0
- stache_ai-0.1.0/src/stache_ai/cli/import_cmd.py +244 -0
- stache_ai-0.1.0/src/stache_ai/cli/main.py +46 -0
- stache_ai-0.1.0/src/stache_ai/cli/migrate_cmd.py +238 -0
- stache_ai-0.1.0/src/stache_ai/cli/namespace_cmd.py +239 -0
- stache_ai-0.1.0/src/stache_ai/cli/providers_cmd.py +24 -0
- stache_ai-0.1.0/src/stache_ai/cli/redis_export.py +139 -0
- stache_ai-0.1.0/src/stache_ai/cli/vectors_cmd.py +461 -0
- stache_ai-0.1.0/src/stache_ai/config.py +315 -0
- stache_ai-0.1.0/src/stache_ai/core/__init__.py +0 -0
- stache_ai-0.1.0/src/stache_ai/core/operations.py +439 -0
- stache_ai-0.1.0/src/stache_ai/loaders/__init__.py +164 -0
- stache_ai-0.1.0/src/stache_ai/models/__init__.py +5 -0
- stache_ai-0.1.0/src/stache_ai/models/insight.py +21 -0
- stache_ai-0.1.0/src/stache_ai/providers/__init__.py +54 -0
- stache_ai-0.1.0/src/stache_ai/providers/base.py +706 -0
- stache_ai-0.1.0/src/stache_ai/providers/document_index/__init__.py +7 -0
- stache_ai-0.1.0/src/stache_ai/providers/embeddings/__init__.py +7 -0
- stache_ai-0.1.0/src/stache_ai/providers/embeddings/fallback.py +131 -0
- stache_ai-0.1.0/src/stache_ai/providers/factories.py +449 -0
- stache_ai-0.1.0/src/stache_ai/providers/llm/__init__.py +7 -0
- stache_ai-0.1.0/src/stache_ai/providers/llm/fallback.py +82 -0
- stache_ai-0.1.0/src/stache_ai/providers/namespace/__init__.py +7 -0
- stache_ai-0.1.0/src/stache_ai/providers/namespace/sqlite.py +301 -0
- stache_ai-0.1.0/src/stache_ai/providers/plugin_loader.py +221 -0
- stache_ai-0.1.0/src/stache_ai/providers/reranker/__init__.py +11 -0
- stache_ai-0.1.0/src/stache_ai/providers/reranker/base.py +33 -0
- stache_ai-0.1.0/src/stache_ai/providers/reranker/simple.py +139 -0
- stache_ai-0.1.0/src/stache_ai/providers/resilience/__init__.py +18 -0
- stache_ai-0.1.0/src/stache_ai/providers/resilience/circuit_breaker.py +227 -0
- stache_ai-0.1.0/src/stache_ai/providers/resilience/decorators.py +76 -0
- stache_ai-0.1.0/src/stache_ai/providers/resilience/http_client.py +543 -0
- stache_ai-0.1.0/src/stache_ai/providers/vectordb/__init__.py +7 -0
- stache_ai-0.1.0/src/stache_ai/rag/__init__.py +1 -0
- stache_ai-0.1.0/src/stache_ai/rag/embedding_resilience.py +447 -0
- stache_ai-0.1.0/src/stache_ai/rag/pipeline.py +994 -0
- stache_ai-0.1.0/src/stache_ai/testing/__init__.py +31 -0
- stache_ai-0.1.0/src/stache_ai/testing/document_index.py +95 -0
- stache_ai-0.1.0/src/stache_ai/testing/embedding.py +75 -0
- stache_ai-0.1.0/src/stache_ai/testing/llm.py +63 -0
- stache_ai-0.1.0/src/stache_ai/testing/namespace.py +81 -0
- stache_ai-0.1.0/src/stache_ai/testing/reranker.py +56 -0
- stache_ai-0.1.0/src/stache_ai/testing/vectordb.py +232 -0
- stache_ai-0.1.0/src/stache_ai.egg-info/PKG-INFO +263 -0
- stache_ai-0.1.0/src/stache_ai.egg-info/SOURCES.txt +82 -0
- stache_ai-0.1.0/src/stache_ai.egg-info/dependency_links.txt +1 -0
- stache_ai-0.1.0/src/stache_ai.egg-info/entry_points.txt +15 -0
- stache_ai-0.1.0/src/stache_ai.egg-info/requires.txt +38 -0
- stache_ai-0.1.0/src/stache_ai.egg-info/top_level.txt +1 -0
- stache_ai-0.1.0/tests/test_api.py +299 -0
- stache_ai-0.1.0/tests/test_chunking.py +301 -0
- stache_ai-0.1.0/tests/test_config.py +336 -0
- stache_ai-0.1.0/tests/test_embedding_resilience.py +356 -0
- stache_ai-0.1.0/tests/test_pipeline.py +328 -0
stache_ai-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: stache-ai
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Personal AI-powered knowledge base with RAG
|
|
5
|
+
Author: Stache Contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/stache-ai/stache-ai
|
|
8
|
+
Project-URL: Documentation, https://github.com/stache-ai/stache#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/stache-ai/stache-ai
|
|
10
|
+
Project-URL: Issues, https://github.com/stache-ai/stache/issues
|
|
11
|
+
Keywords: stache,rag,ai,knowledge-base,llm,vector-database,semantic-search
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
Requires-Dist: fastapi>=0.109.0
|
|
24
|
+
Requires-Dist: uvicorn[standard]>=0.27.0
|
|
25
|
+
Requires-Dist: python-multipart>=0.0.6
|
|
26
|
+
Requires-Dist: pypdf2>=3.0.1
|
|
27
|
+
Requires-Dist: pdfplumber>=0.10.3
|
|
28
|
+
Requires-Dist: ebooklib>=0.18
|
|
29
|
+
Requires-Dist: python-docx>=1.1.0
|
|
30
|
+
Requires-Dist: python-pptx>=0.6.23
|
|
31
|
+
Requires-Dist: beautifulsoup4>=4.12.3
|
|
32
|
+
Requires-Dist: lxml>=5.1.0
|
|
33
|
+
Requires-Dist: markdown>=3.5.2
|
|
34
|
+
Requires-Dist: webvtt-py>=0.4.6
|
|
35
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
36
|
+
Requires-Dist: tiktoken>=0.5.2
|
|
37
|
+
Requires-Dist: pydantic>=2.7.0
|
|
38
|
+
Requires-Dist: pydantic-settings>=2.3.0
|
|
39
|
+
Requires-Dist: click>=8.1.7
|
|
40
|
+
Requires-Dist: rich>=13.7.0
|
|
41
|
+
Requires-Dist: tqdm>=4.66.1
|
|
42
|
+
Provides-Extra: dev
|
|
43
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
44
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
45
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
46
|
+
Requires-Dist: httpx>=0.25.0; extra == "dev"
|
|
47
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
48
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
49
|
+
Provides-Extra: aws
|
|
50
|
+
Requires-Dist: stache-ai-s3vectors>=0.1.0; extra == "aws"
|
|
51
|
+
Requires-Dist: stache-ai-dynamodb>=0.1.0; extra == "aws"
|
|
52
|
+
Requires-Dist: stache-ai-bedrock>=0.1.0; extra == "aws"
|
|
53
|
+
Provides-Extra: ollama
|
|
54
|
+
Requires-Dist: stache-ai-ollama>=0.1.0; extra == "ollama"
|
|
55
|
+
Provides-Extra: openai
|
|
56
|
+
Requires-Dist: stache-ai-openai>=0.1.0; extra == "openai"
|
|
57
|
+
|
|
58
|
+
# stache-ai
|
|
59
|
+
|
|
60
|
+
A Python library for building AI-powered knowledge bases using Retrieval-Augmented Generation (RAG).
|
|
61
|
+
|
|
62
|
+
## Overview
|
|
63
|
+
|
|
64
|
+
stache-ai provides a pluggable framework for ingesting documents, storing embeddings, and executing semantic search with optional reranking. It includes support for multiple vector databases, LLM providers, embedding models, and document formats.
|
|
65
|
+
|
|
66
|
+
## Installation
|
|
67
|
+
|
|
68
|
+
Install the core package:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install stache-ai
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from stache_ai.rag.pipeline import get_pipeline
|
|
78
|
+
|
|
79
|
+
# Get the pipeline (uses configured providers)
|
|
80
|
+
pipeline = get_pipeline()
|
|
81
|
+
|
|
82
|
+
# Ingest text
|
|
83
|
+
result = pipeline.ingest_text(
|
|
84
|
+
text="Your knowledge base content here",
|
|
85
|
+
metadata={"source": "example"}
|
|
86
|
+
)
|
|
87
|
+
print(f"Created {result['chunks_created']} chunks")
|
|
88
|
+
|
|
89
|
+
# Search
|
|
90
|
+
results = pipeline.query(
|
|
91
|
+
question="What is this about?",
|
|
92
|
+
top_k=5
|
|
93
|
+
)
|
|
94
|
+
for source in results['sources']:
|
|
95
|
+
print(f"- {source['text'][:100]}...")
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Provider Packages
|
|
99
|
+
|
|
100
|
+
stache-ai uses a provider pattern to support different backends. Install optional provider packages to enable specific functionality:
|
|
101
|
+
|
|
102
|
+
### AWS Providers
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
pip install "stache-ai[aws]"
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Includes:
|
|
109
|
+
- `stache-ai-s3vectors` - Amazon S3 Vectors for semantic search
|
|
110
|
+
- `stache-ai-dynamodb` - Amazon DynamoDB for namespace and document index storage
|
|
111
|
+
- `stache-ai-bedrock` - Amazon Bedrock for LLMs and embeddings
|
|
112
|
+
|
|
113
|
+
### Ollama
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
pip install "stache-ai[ollama]"
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Includes:
|
|
120
|
+
- `stache-ai-ollama` - Ollama for local LLM and embedding models
|
|
121
|
+
|
|
122
|
+
### OpenAI
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
pip install "stache-ai[openai]"
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Includes:
|
|
129
|
+
- `stache-ai-openai` - OpenAI for GPT models and embeddings
|
|
130
|
+
|
|
131
|
+
## Configuration
|
|
132
|
+
|
|
133
|
+
Configure stache-ai via environment variables or a `.env` file:
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
# Vector Database
|
|
137
|
+
VECTORDB_PROVIDER=s3vectors
|
|
138
|
+
VECTORDB_S3_REGION=us-east-1
|
|
139
|
+
VECTORDB_S3_INDEX_NAME=stache
|
|
140
|
+
|
|
141
|
+
# Embeddings
|
|
142
|
+
EMBEDDING_PROVIDER=bedrock
|
|
143
|
+
EMBEDDING_MODEL=cohere.embed-english-v3
|
|
144
|
+
|
|
145
|
+
# Namespaces
|
|
146
|
+
NAMESPACE_PROVIDER=dynamodb
|
|
147
|
+
NAMESPACE_DYNAMODB_TABLE=stache-namespaces
|
|
148
|
+
|
|
149
|
+
# LLM
|
|
150
|
+
LLM_PROVIDER=bedrock
|
|
151
|
+
LLM_MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0
|
|
152
|
+
|
|
153
|
+
# Optional features
|
|
154
|
+
ENABLE_DOCUMENT_INDEX=true
|
|
155
|
+
EMBEDDING_AUTO_SPLIT_ENABLED=true
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
See `src/stache_ai/config.py` for all available options.
|
|
159
|
+
|
|
160
|
+
## Usage Examples
|
|
161
|
+
|
|
162
|
+
### Document Chunking
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
from stache_ai.chunking import ChunkingStrategy
|
|
166
|
+
|
|
167
|
+
# Recursive character-level chunking
|
|
168
|
+
chunks = ChunkingStrategy.create(
|
|
169
|
+
strategy="recursive",
|
|
170
|
+
chunk_size=1024,
|
|
171
|
+
chunk_overlap=100
|
|
172
|
+
).chunk("Your document text")
|
|
173
|
+
|
|
174
|
+
for chunk in chunks:
|
|
175
|
+
print(chunk)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Filtering Results
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
# Search with metadata filter
|
|
182
|
+
results = pipeline.query(
|
|
183
|
+
question="API documentation",
|
|
184
|
+
filter={"source": "docs"}
|
|
185
|
+
)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Namespace Isolation
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
# Ingest to a specific namespace
|
|
192
|
+
pipeline.ingest_text(
|
|
193
|
+
text="Project A data",
|
|
194
|
+
namespace="project-a"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# Search within a namespace
|
|
198
|
+
results = pipeline.query(
|
|
199
|
+
question="Find related content",
|
|
200
|
+
namespace="project-a"
|
|
201
|
+
)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
## API Server
|
|
205
|
+
|
|
206
|
+
Run a FastAPI server for HTTP access:
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
pip install stache-ai[dev]
|
|
210
|
+
python -m stache_ai.api.main
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
Server exposes endpoints for:
|
|
214
|
+
- `/api/query` - Semantic search
|
|
215
|
+
- `/api/capture` - Text ingestion
|
|
216
|
+
- `/api/namespaces` - Manage namespaces
|
|
217
|
+
- `/api/documents` - List and retrieve documents
|
|
218
|
+
- `/api/upload` - Upload files (PDF, DOCX, etc.)
|
|
219
|
+
|
|
220
|
+
## CLI Tools
|
|
221
|
+
|
|
222
|
+
### Admin CLI (stache-admin)
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
# Import documents from a directory
|
|
226
|
+
stache-import /path/to/documents --namespace my-docs
|
|
227
|
+
|
|
228
|
+
# List namespaces
|
|
229
|
+
stache-admin namespace-list
|
|
230
|
+
|
|
231
|
+
# View vector statistics
|
|
232
|
+
stache-admin vectors stats
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### User CLI (stache-tools)
|
|
236
|
+
|
|
237
|
+
For search, ingest, and MCP server, install [stache-tools](https://github.com/stache-ai/stache-tools):
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
pip install stache-tools
|
|
241
|
+
|
|
242
|
+
# Search
|
|
243
|
+
stache search "your query"
|
|
244
|
+
|
|
245
|
+
# Ingest text
|
|
246
|
+
stache ingest -t "your text" -n namespace
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
## Testing
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
pip install stache-ai[dev]
|
|
253
|
+
pytest
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
## Documentation
|
|
257
|
+
|
|
258
|
+
- [GitHub Repository](https://github.com/stache-ai/stache-ai)
|
|
259
|
+
- [Architecture Guide](https://github.com/stache-ai/stache-ai/tree/main/docs)
|
|
260
|
+
|
|
261
|
+
## License
|
|
262
|
+
|
|
263
|
+
MIT
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# stache-ai
|
|
2
|
+
|
|
3
|
+
A Python library for building AI-powered knowledge bases using Retrieval-Augmented Generation (RAG).
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
stache-ai provides a pluggable framework for ingesting documents, storing embeddings, and executing semantic search with optional reranking. It includes support for multiple vector databases, LLM providers, embedding models, and document formats.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
Install the core package:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install stache-ai
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from stache_ai.rag.pipeline import get_pipeline
|
|
21
|
+
|
|
22
|
+
# Get the pipeline (uses configured providers)
|
|
23
|
+
pipeline = get_pipeline()
|
|
24
|
+
|
|
25
|
+
# Ingest text
|
|
26
|
+
result = pipeline.ingest_text(
|
|
27
|
+
text="Your knowledge base content here",
|
|
28
|
+
metadata={"source": "example"}
|
|
29
|
+
)
|
|
30
|
+
print(f"Created {result['chunks_created']} chunks")
|
|
31
|
+
|
|
32
|
+
# Search
|
|
33
|
+
results = pipeline.query(
|
|
34
|
+
question="What is this about?",
|
|
35
|
+
top_k=5
|
|
36
|
+
)
|
|
37
|
+
for source in results['sources']:
|
|
38
|
+
print(f"- {source['text'][:100]}...")
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Provider Packages
|
|
42
|
+
|
|
43
|
+
stache-ai uses a provider pattern to support different backends. Install optional provider packages to enable specific functionality:
|
|
44
|
+
|
|
45
|
+
### AWS Providers
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install "stache-ai[aws]"
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Includes:
|
|
52
|
+
- `stache-ai-s3vectors` - Amazon S3 Vectors for semantic search
|
|
53
|
+
- `stache-ai-dynamodb` - Amazon DynamoDB for namespace and document index storage
|
|
54
|
+
- `stache-ai-bedrock` - Amazon Bedrock for LLMs and embeddings
|
|
55
|
+
|
|
56
|
+
### Ollama
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install "stache-ai[ollama]"
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Includes:
|
|
63
|
+
- `stache-ai-ollama` - Ollama for local LLM and embedding models
|
|
64
|
+
|
|
65
|
+
### OpenAI
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
pip install "stache-ai[openai]"
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Includes:
|
|
72
|
+
- `stache-ai-openai` - OpenAI for GPT models and embeddings
|
|
73
|
+
|
|
74
|
+
## Configuration
|
|
75
|
+
|
|
76
|
+
Configure stache-ai via environment variables or a `.env` file:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Vector Database
|
|
80
|
+
VECTORDB_PROVIDER=s3vectors
|
|
81
|
+
VECTORDB_S3_REGION=us-east-1
|
|
82
|
+
VECTORDB_S3_INDEX_NAME=stache
|
|
83
|
+
|
|
84
|
+
# Embeddings
|
|
85
|
+
EMBEDDING_PROVIDER=bedrock
|
|
86
|
+
EMBEDDING_MODEL=cohere.embed-english-v3
|
|
87
|
+
|
|
88
|
+
# Namespaces
|
|
89
|
+
NAMESPACE_PROVIDER=dynamodb
|
|
90
|
+
NAMESPACE_DYNAMODB_TABLE=stache-namespaces
|
|
91
|
+
|
|
92
|
+
# LLM
|
|
93
|
+
LLM_PROVIDER=bedrock
|
|
94
|
+
LLM_MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0
|
|
95
|
+
|
|
96
|
+
# Optional features
|
|
97
|
+
ENABLE_DOCUMENT_INDEX=true
|
|
98
|
+
EMBEDDING_AUTO_SPLIT_ENABLED=true
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
See `src/stache_ai/config.py` for all available options.
|
|
102
|
+
|
|
103
|
+
## Usage Examples
|
|
104
|
+
|
|
105
|
+
### Document Chunking
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from stache_ai.chunking import ChunkingStrategy
|
|
109
|
+
|
|
110
|
+
# Recursive character-level chunking
|
|
111
|
+
chunks = ChunkingStrategy.create(
|
|
112
|
+
strategy="recursive",
|
|
113
|
+
chunk_size=1024,
|
|
114
|
+
chunk_overlap=100
|
|
115
|
+
).chunk("Your document text")
|
|
116
|
+
|
|
117
|
+
for chunk in chunks:
|
|
118
|
+
print(chunk)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Filtering Results
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
# Search with metadata filter
|
|
125
|
+
results = pipeline.query(
|
|
126
|
+
question="API documentation",
|
|
127
|
+
filter={"source": "docs"}
|
|
128
|
+
)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Namespace Isolation
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
# Ingest to a specific namespace
|
|
135
|
+
pipeline.ingest_text(
|
|
136
|
+
text="Project A data",
|
|
137
|
+
namespace="project-a"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Search within a namespace
|
|
141
|
+
results = pipeline.query(
|
|
142
|
+
question="Find related content",
|
|
143
|
+
namespace="project-a"
|
|
144
|
+
)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## API Server
|
|
148
|
+
|
|
149
|
+
Run a FastAPI server for HTTP access:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
pip install stache-ai[dev]
|
|
153
|
+
python -m stache_ai.api.main
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Server exposes endpoints for:
|
|
157
|
+
- `/api/query` - Semantic search
|
|
158
|
+
- `/api/capture` - Text ingestion
|
|
159
|
+
- `/api/namespaces` - Manage namespaces
|
|
160
|
+
- `/api/documents` - List and retrieve documents
|
|
161
|
+
- `/api/upload` - Upload files (PDF, DOCX, etc.)
|
|
162
|
+
|
|
163
|
+
## CLI Tools
|
|
164
|
+
|
|
165
|
+
### Admin CLI (stache-admin)
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
# Import documents from a directory
|
|
169
|
+
stache-import /path/to/documents --namespace my-docs
|
|
170
|
+
|
|
171
|
+
# List namespaces
|
|
172
|
+
stache-admin namespace-list
|
|
173
|
+
|
|
174
|
+
# View vector statistics
|
|
175
|
+
stache-admin vectors stats
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### User CLI (stache-tools)
|
|
179
|
+
|
|
180
|
+
For search, ingest, and MCP server, install [stache-tools](https://github.com/stache-ai/stache-tools):
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
pip install stache-tools
|
|
184
|
+
|
|
185
|
+
# Search
|
|
186
|
+
stache search "your query"
|
|
187
|
+
|
|
188
|
+
# Ingest text
|
|
189
|
+
stache ingest -t "your text" -n namespace
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Testing
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
pip install stache-ai[dev]
|
|
196
|
+
pytest
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Documentation
|
|
200
|
+
|
|
201
|
+
- [GitHub Repository](https://github.com/stache-ai/stache-ai)
|
|
202
|
+
- [Architecture Guide](https://github.com/stache-ai/stache-ai/tree/main/docs)
|
|
203
|
+
|
|
204
|
+
## License
|
|
205
|
+
|
|
206
|
+
MIT
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "stache-ai"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
description = "Personal AI-powered knowledge base with RAG"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Stache Contributors"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["stache", "rag", "ai", "knowledge-base", "llm", "vector-database", "semantic-search"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"fastapi>=0.109.0",
|
|
29
|
+
"uvicorn[standard]>=0.27.0",
|
|
30
|
+
"python-multipart>=0.0.6",
|
|
31
|
+
"pypdf2>=3.0.1",
|
|
32
|
+
"pdfplumber>=0.10.3",
|
|
33
|
+
"ebooklib>=0.18",
|
|
34
|
+
"python-docx>=1.1.0",
|
|
35
|
+
"python-pptx>=0.6.23",
|
|
36
|
+
"beautifulsoup4>=4.12.3",
|
|
37
|
+
"lxml>=5.1.0",
|
|
38
|
+
"markdown>=3.5.2",
|
|
39
|
+
"webvtt-py>=0.4.6",
|
|
40
|
+
"python-dotenv>=1.0.1",
|
|
41
|
+
"tiktoken>=0.5.2",
|
|
42
|
+
"pydantic>=2.7.0",
|
|
43
|
+
"pydantic-settings>=2.3.0",
|
|
44
|
+
"click>=8.1.7",
|
|
45
|
+
"rich>=13.7.0",
|
|
46
|
+
"tqdm>=4.66.1",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
[project.optional-dependencies]
|
|
50
|
+
dev = [
|
|
51
|
+
"pytest>=7.0.0",
|
|
52
|
+
"pytest-asyncio>=0.21.0",
|
|
53
|
+
"pytest-cov>=4.0.0",
|
|
54
|
+
"httpx>=0.25.0",
|
|
55
|
+
"mypy>=1.0.0",
|
|
56
|
+
"ruff>=0.1.0",
|
|
57
|
+
]
|
|
58
|
+
# Provider bundles for convenience
|
|
59
|
+
aws = [
|
|
60
|
+
"stache-ai-s3vectors>=0.1.0",
|
|
61
|
+
"stache-ai-dynamodb>=0.1.0",
|
|
62
|
+
"stache-ai-bedrock>=0.1.0",
|
|
63
|
+
]
|
|
64
|
+
ollama = [
|
|
65
|
+
"stache-ai-ollama>=0.1.0",
|
|
66
|
+
]
|
|
67
|
+
openai = [
|
|
68
|
+
"stache-ai-openai>=0.1.0",
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
[project.scripts]
|
|
72
|
+
stache-admin = "stache_ai.cli:cli"
|
|
73
|
+
stache-import = "stache_ai.cli.import_cmd:import_directory"
|
|
74
|
+
|
|
75
|
+
# =============================================================================
|
|
76
|
+
# Provider Entry Points - Core Built-in Providers Only
|
|
77
|
+
# =============================================================================
|
|
78
|
+
# External packages add providers by defining similar entry points:
|
|
79
|
+
# [project.entry-points."stache.llm"]
|
|
80
|
+
# my_provider = "my_package:MyLLMProvider"
|
|
81
|
+
# =============================================================================
|
|
82
|
+
|
|
83
|
+
[project.entry-points."stache.llm"]
|
|
84
|
+
fallback = "stache_ai.providers.llm.fallback:FallbackLLMProvider"
|
|
85
|
+
|
|
86
|
+
[project.entry-points."stache.embeddings"]
|
|
87
|
+
fallback = "stache_ai.providers.embeddings.fallback:FallbackEmbeddingProvider"
|
|
88
|
+
|
|
89
|
+
[project.entry-points."stache.namespace"]
|
|
90
|
+
sqlite = "stache_ai.providers.namespace.sqlite:SQLiteNamespaceProvider"
|
|
91
|
+
|
|
92
|
+
[project.entry-points."stache.reranker"]
|
|
93
|
+
simple = "stache_ai.providers.reranker.simple:SimpleReranker"
|
|
94
|
+
|
|
95
|
+
[project.urls]
|
|
96
|
+
Homepage = "https://github.com/stache-ai/stache-ai"
|
|
97
|
+
Documentation = "https://github.com/stache-ai/stache#readme"
|
|
98
|
+
Repository = "https://github.com/stache-ai/stache-ai"
|
|
99
|
+
Issues = "https://github.com/stache-ai/stache/issues"
|
|
100
|
+
|
|
101
|
+
[tool.setuptools.packages.find]
|
|
102
|
+
where = ["src"]
|
|
103
|
+
include = ["stache_ai*"]
|
|
104
|
+
|
|
105
|
+
[tool.pytest.ini_options]
|
|
106
|
+
pythonpath = ["src"]
|
|
107
|
+
addopts = "-v"
|
|
108
|
+
testpaths = ["tests"]
|
|
109
|
+
python_files = ["test_*.py"]
|
|
110
|
+
python_classes = ["Test*"]
|
|
111
|
+
python_functions = ["test_*"]
|
|
112
|
+
asyncio_mode = "auto"
|
|
113
|
+
filterwarnings = [
|
|
114
|
+
"ignore::DeprecationWarning",
|
|
115
|
+
"ignore::PendingDeprecationWarning",
|
|
116
|
+
]
|
|
117
|
+
markers = [
|
|
118
|
+
"integration: marks tests as integration tests (deselect with '-m \"not integration\"')",
|
|
119
|
+
"s3vectors: marks tests requiring S3 Vectors service",
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
[tool.coverage.run]
|
|
123
|
+
source = ["stache_ai"]
|
|
124
|
+
branch = true
|
|
125
|
+
omit = ["tests/*", "*/migrations/*"]
|
|
126
|
+
|
|
127
|
+
[tool.coverage.report]
|
|
128
|
+
exclude_lines = [
|
|
129
|
+
"pragma: no cover",
|
|
130
|
+
"def __repr__",
|
|
131
|
+
"raise AssertionError",
|
|
132
|
+
"raise NotImplementedError",
|
|
133
|
+
"if __name__ == .__main__.:",
|
|
134
|
+
"if TYPE_CHECKING:",
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
[tool.ruff]
|
|
138
|
+
target-version = "py310"
|
|
139
|
+
line-length = 100
|
|
140
|
+
select = [
|
|
141
|
+
"E", # pycodestyle errors
|
|
142
|
+
"W", # pycodestyle warnings
|
|
143
|
+
"F", # pyflakes
|
|
144
|
+
"I", # isort
|
|
145
|
+
"B", # flake8-bugbear
|
|
146
|
+
"C4", # flake8-comprehensions
|
|
147
|
+
"UP", # pyupgrade
|
|
148
|
+
]
|
|
149
|
+
ignore = [
|
|
150
|
+
"E501", # line too long (handled by formatter)
|
|
151
|
+
"B008", # do not perform function calls in argument defaults
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
[tool.mypy]
|
|
155
|
+
python_version = "3.10"
|
|
156
|
+
warn_return_any = true
|
|
157
|
+
warn_unused_configs = true
|
|
158
|
+
ignore_missing_imports = true
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""API package"""
|