stele-context 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stele_context-0.7.0/LICENSE +21 -0
- stele_context-0.7.0/PKG-INFO +554 -0
- stele_context-0.7.0/README.md +481 -0
- stele_context-0.7.0/pyproject.toml +115 -0
- stele_context-0.7.0/setup.cfg +4 -0
- stele_context-0.7.0/stele/__init__.py +36 -0
- stele_context-0.7.0/stele/bm25.py +125 -0
- stele_context-0.7.0/stele/chunkers/__init__.py +67 -0
- stele_context-0.7.0/stele/chunkers/audio.py +198 -0
- stele_context-0.7.0/stele/chunkers/base.py +307 -0
- stele_context-0.7.0/stele/chunkers/code.py +613 -0
- stele_context-0.7.0/stele/chunkers/image.py +277 -0
- stele_context-0.7.0/stele/chunkers/numpy_compat.py +77 -0
- stele_context-0.7.0/stele/chunkers/pdf.py +173 -0
- stele_context-0.7.0/stele/chunkers/text.py +319 -0
- stele_context-0.7.0/stele/chunkers/video.py +254 -0
- stele_context-0.7.0/stele/cli.py +579 -0
- stele_context-0.7.0/stele/cli_metadata.py +140 -0
- stele_context-0.7.0/stele/config.py +195 -0
- stele_context-0.7.0/stele/coordination.py +695 -0
- stele_context-0.7.0/stele/core.py +18 -0
- stele_context-0.7.0/stele/document_lock_storage.py +440 -0
- stele_context-0.7.0/stele/engine.py +1735 -0
- stele_context-0.7.0/stele/env_checks.py +153 -0
- stele_context-0.7.0/stele/index.py +616 -0
- stele_context-0.7.0/stele/index_store.py +175 -0
- stele_context-0.7.0/stele/mcp_server.py +842 -0
- stele_context-0.7.0/stele/mcp_stdio.py +969 -0
- stele_context-0.7.0/stele/metadata_storage.py +233 -0
- stele_context-0.7.0/stele/py.typed +0 -0
- stele_context-0.7.0/stele/rwlock.py +52 -0
- stele_context-0.7.0/stele/session.py +224 -0
- stele_context-0.7.0/stele/session_storage.py +350 -0
- stele_context-0.7.0/stele/storage.py +1040 -0
- stele_context-0.7.0/stele/symbol_graph.py +327 -0
- stele_context-0.7.0/stele/symbol_storage.py +256 -0
- stele_context-0.7.0/stele/symbols.py +885 -0
- stele_context-0.7.0/stele_context.egg-info/PKG-INFO +554 -0
- stele_context-0.7.0/stele_context.egg-info/SOURCES.txt +60 -0
- stele_context-0.7.0/stele_context.egg-info/dependency_links.txt +1 -0
- stele_context-0.7.0/stele_context.egg-info/entry_points.txt +3 -0
- stele_context-0.7.0/stele_context.egg-info/requires.txt +58 -0
- stele_context-0.7.0/stele_context.egg-info/top_level.txt +1 -0
- stele_context-0.7.0/tests/test_agent_embeddings.py +182 -0
- stele_context-0.7.0/tests/test_bm25.py +137 -0
- stele_context-0.7.0/tests/test_chunk_history.py +119 -0
- stele_context-0.7.0/tests/test_chunkers.py +219 -0
- stele_context-0.7.0/tests/test_concurrency.py +366 -0
- stele_context-0.7.0/tests/test_config.py +181 -0
- stele_context-0.7.0/tests/test_conflicts.py +493 -0
- stele_context-0.7.0/tests/test_core.py +261 -0
- stele_context-0.7.0/tests/test_engine.py +293 -0
- stele_context-0.7.0/tests/test_index.py +205 -0
- stele_context-0.7.0/tests/test_index_store.py +209 -0
- stele_context-0.7.0/tests/test_mcp_server.py +389 -0
- stele_context-0.7.0/tests/test_mcp_stdio.py +236 -0
- stele_context-0.7.0/tests/test_metadata.py +370 -0
- stele_context-0.7.0/tests/test_session.py +124 -0
- stele_context-0.7.0/tests/test_storage_migration.py +208 -0
- stele_context-0.7.0/tests/test_symbols.py +874 -0
- stele_context-0.7.0/tests/test_tree_sitter.py +242 -0
- stele_context-0.7.0/tests/test_worktree_safety.py +838 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Stele Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: stele-context
|
|
3
|
+
Version: 0.7.0
|
|
4
|
+
Summary: Local context cache for LLM agents with semantic chunking and vector search
|
|
5
|
+
Author: Stele Contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/IronAdamant/Stele
|
|
8
|
+
Project-URL: Repository, https://github.com/IronAdamant/Stele
|
|
9
|
+
Project-URL: Documentation, https://github.com/IronAdamant/Stele#readme
|
|
10
|
+
Keywords: llm,context-cache,chunking,vector-search,semantic-search,offline,local
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Provides-Extra: performance
|
|
24
|
+
Requires-Dist: msgspec>=0.18.0; extra == "performance"
|
|
25
|
+
Requires-Dist: numpy>=1.24.0; extra == "performance"
|
|
26
|
+
Provides-Extra: image
|
|
27
|
+
Requires-Dist: Pillow>=10.0; extra == "image"
|
|
28
|
+
Provides-Extra: pdf
|
|
29
|
+
Requires-Dist: pymupdf>=1.23.0; extra == "pdf"
|
|
30
|
+
Provides-Extra: audio
|
|
31
|
+
Requires-Dist: librosa>=0.10.0; extra == "audio"
|
|
32
|
+
Requires-Dist: numpy>=1.24.0; extra == "audio"
|
|
33
|
+
Provides-Extra: video
|
|
34
|
+
Requires-Dist: opencv-python>=4.8.0; extra == "video"
|
|
35
|
+
Requires-Dist: numpy>=1.24.0; extra == "video"
|
|
36
|
+
Provides-Extra: tree-sitter
|
|
37
|
+
Requires-Dist: tree-sitter>=0.23.0; extra == "tree-sitter"
|
|
38
|
+
Requires-Dist: tree-sitter-javascript>=0.23.0; extra == "tree-sitter"
|
|
39
|
+
Requires-Dist: tree-sitter-typescript>=0.23.0; extra == "tree-sitter"
|
|
40
|
+
Requires-Dist: tree-sitter-java>=0.23.0; extra == "tree-sitter"
|
|
41
|
+
Requires-Dist: tree-sitter-c>=0.23.0; extra == "tree-sitter"
|
|
42
|
+
Requires-Dist: tree-sitter-cpp>=0.23.0; extra == "tree-sitter"
|
|
43
|
+
Requires-Dist: tree-sitter-go>=0.23.0; extra == "tree-sitter"
|
|
44
|
+
Requires-Dist: tree-sitter-rust>=0.23.0; extra == "tree-sitter"
|
|
45
|
+
Requires-Dist: tree-sitter-ruby>=0.23.0; extra == "tree-sitter"
|
|
46
|
+
Requires-Dist: tree-sitter-php>=0.23.0; extra == "tree-sitter"
|
|
47
|
+
Provides-Extra: mcp
|
|
48
|
+
Requires-Dist: mcp>=1.0.0; extra == "mcp"
|
|
49
|
+
Provides-Extra: all
|
|
50
|
+
Requires-Dist: msgspec>=0.18.0; extra == "all"
|
|
51
|
+
Requires-Dist: numpy>=1.24.0; extra == "all"
|
|
52
|
+
Requires-Dist: Pillow>=10.0; extra == "all"
|
|
53
|
+
Requires-Dist: pymupdf>=1.23.0; extra == "all"
|
|
54
|
+
Requires-Dist: librosa>=0.10.0; extra == "all"
|
|
55
|
+
Requires-Dist: opencv-python>=4.8.0; extra == "all"
|
|
56
|
+
Requires-Dist: tree-sitter>=0.23.0; extra == "all"
|
|
57
|
+
Requires-Dist: tree-sitter-javascript>=0.23.0; extra == "all"
|
|
58
|
+
Requires-Dist: tree-sitter-typescript>=0.23.0; extra == "all"
|
|
59
|
+
Requires-Dist: tree-sitter-java>=0.23.0; extra == "all"
|
|
60
|
+
Requires-Dist: tree-sitter-c>=0.23.0; extra == "all"
|
|
61
|
+
Requires-Dist: tree-sitter-cpp>=0.23.0; extra == "all"
|
|
62
|
+
Requires-Dist: tree-sitter-go>=0.23.0; extra == "all"
|
|
63
|
+
Requires-Dist: tree-sitter-rust>=0.23.0; extra == "all"
|
|
64
|
+
Requires-Dist: tree-sitter-ruby>=0.23.0; extra == "all"
|
|
65
|
+
Requires-Dist: tree-sitter-php>=0.23.0; extra == "all"
|
|
66
|
+
Requires-Dist: mcp>=1.0.0; extra == "all"
|
|
67
|
+
Provides-Extra: dev
|
|
68
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
69
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
70
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
71
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
72
|
+
Dynamic: license-file
|
|
73
|
+
|
|
74
|
+
# Stele
|
|
75
|
+
|
|
76
|
+
**Local context cache for LLM agents with semantic chunking and vector search.**
|
|
77
|
+
|
|
78
|
+
[](https://opensource.org/licenses/MIT)
|
|
79
|
+
[](https://www.python.org/downloads/)
|
|
80
|
+
[](https://github.com/IronAdamant/Stele)
|
|
81
|
+
[](https://github.com/IronAdamant/Stele/actions)
|
|
82
|
+
|
|
83
|
+
Stele helps LLM agents avoid re-reading unchanged files by caching chunk data with semantic search. Documents are routed through modality-specific chunkers, chunk content is stored in SQLite, and an HNSW vector index enables fast O(log n) retrieval. Only modified chunks trigger reprocessing.
|
|
84
|
+
|
|
85
|
+
## Key Features
|
|
86
|
+
|
|
87
|
+
- **100% Offline & Local-Only**: No internet access, no external API calls, no cloud components
|
|
88
|
+
- **Zero Required Dependencies**: Runs on Python stdlib alone — no supply chain risks
|
|
89
|
+
- **Multi-Modal Support**: Text, code, images, PDFs, audio, and video (optional dependencies)
|
|
90
|
+
- **HNSW Vector Index**: O(log n) semantic search across all indexed chunks
|
|
91
|
+
- **Hybrid Search**: HNSW cosine similarity + BM25 keyword matching, auto-tuned blending
|
|
92
|
+
- **Tree-Sitter Chunking**: AST-aware code chunking for 9 languages (optional, falls back to regex)
|
|
93
|
+
- **Symbol Graph**: Cross-file reference tracking — `find_references`, `find_definition`, `impact_radius`
|
|
94
|
+
- **Multi-Agent Safe**: Per-document locking, optimistic versioning, cross-worktree coordination
|
|
95
|
+
- **MCP Server**: JSON-RPC over stdio for Claude Desktop, HTTP REST for other agents
|
|
96
|
+
- **Project Config**: `.stele.toml` file for per-project settings
|
|
97
|
+
- **Session Management**: Sessions with rollback, pruning, and KV-cache persistence
|
|
98
|
+
|
|
99
|
+
## Architecture
|
|
100
|
+
|
|
101
|
+
```mermaid
|
|
102
|
+
graph TB
|
|
103
|
+
subgraph API["API Layer"]
|
|
104
|
+
CLI["CLI<br/>stele index / search / serve"]
|
|
105
|
+
HTTP["HTTP REST<br/>30 tools, threaded"]
|
|
106
|
+
MCP["MCP stdio<br/>32 tools, JSON-RPC"]
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
subgraph Engine["Engine (engine.py)"]
|
|
110
|
+
CFG["Config<br/>.stele.toml loader"]
|
|
111
|
+
SEARCH["Hybrid Search<br/>HNSW + BM25"]
|
|
112
|
+
IDX["index_documents()<br/>detect_changes()"]
|
|
113
|
+
SYM["Symbol Graph<br/>12 languages"]
|
|
114
|
+
SESS["Sessions<br/>rollback, pruning"]
|
|
115
|
+
LOCK["Document Locking<br/>ownership, versioning"]
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
subgraph Chunkers["Chunkers"]
|
|
119
|
+
TXT["TextChunker"]
|
|
120
|
+
CODE["CodeChunker<br/>Python AST<br/>tree-sitter (9 langs)<br/>regex fallback"]
|
|
121
|
+
IMG["ImageChunker<br/>(Pillow)"]
|
|
122
|
+
PDF["PDFChunker<br/>(pymupdf)"]
|
|
123
|
+
AUD["AudioChunker<br/>(librosa)"]
|
|
124
|
+
VID["VideoChunker<br/>(opencv)"]
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
subgraph Storage["Storage"]
|
|
128
|
+
SQLITE["SQLite<br/>chunks, symbols,<br/>sessions, history"]
|
|
129
|
+
HNSW["HNSW Index<br/>128-dim vectors"]
|
|
130
|
+
BM25["BM25 Index<br/>keyword scoring"]
|
|
131
|
+
KV["KV Cache<br/>JSON + zlib"]
|
|
132
|
+
COORD["Coordination DB<br/>cross-worktree locks"]
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
CLI --> Engine
|
|
136
|
+
HTTP --> Engine
|
|
137
|
+
MCP --> Engine
|
|
138
|
+
Engine --> Chunkers
|
|
139
|
+
Engine --> Storage
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Comparison
|
|
143
|
+
|
|
144
|
+
| Feature | Stele | LangChain | LlamaIndex | EverMemOS |
|
|
145
|
+
|---------|-------|-----------|------------|-----------|
|
|
146
|
+
| Zero dependencies | Yes | No (50+) | No (30+) | No (Mongo, Redis, Milvus) |
|
|
147
|
+
| 100% offline | Yes | No | No | No |
|
|
148
|
+
| No model downloads | Yes | No | No | No |
|
|
149
|
+
| Multi-modal | 6 modalities | Text-focused | Text-focused | Text only |
|
|
150
|
+
| Code-aware chunking | AST + tree-sitter | Basic splitting | Basic splitting | No |
|
|
151
|
+
| Symbol graph | 12 languages | No | No | No |
|
|
152
|
+
| Multi-agent safety | Locks + versioning | No | No | Yes |
|
|
153
|
+
| MCP server | Native | Plugin | Plugin | Planned |
|
|
154
|
+
| Storage | SQLite (embedded) | Vector DB (external) | Vector DB (external) | MongoDB + Milvus |
|
|
155
|
+
|
|
156
|
+
## Installation
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
# From source
|
|
160
|
+
git clone https://github.com/IronAdamant/Stele.git
|
|
161
|
+
cd stele
|
|
162
|
+
pip install -e .
|
|
163
|
+
|
|
164
|
+
# With dev dependencies
|
|
165
|
+
pip install -e ".[dev]"
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Requirements
|
|
169
|
+
|
|
170
|
+
- Python 3.9+
|
|
171
|
+
- **Zero required dependencies**
|
|
172
|
+
|
|
173
|
+
### Optional Extras (all 100% offline)
|
|
174
|
+
|
|
175
|
+
| Extra | Packages | Use Case |
|
|
176
|
+
|-------|----------|----------|
|
|
177
|
+
| `performance` | msgspec, numpy | Faster serialization & vector math |
|
|
178
|
+
| `image` | Pillow | Image indexing & similarity |
|
|
179
|
+
| `pdf` | pymupdf | PDF text extraction |
|
|
180
|
+
| `audio` | librosa, numpy | Audio segmentation & features |
|
|
181
|
+
| `video` | opencv-python, numpy | Video keyframe extraction |
|
|
182
|
+
| `tree-sitter` | tree-sitter + 9 grammar packages | AST-aware code chunking for JS/TS, Java, C/C++, Go, Rust, Ruby, PHP |
|
|
183
|
+
| `mcp` | mcp | MCP stdio server for Claude Desktop |
|
|
184
|
+
| `all` | All of the above | Everything |
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
pip install stele[tree-sitter] # AST-aware code chunking
|
|
188
|
+
pip install stele[image,pdf] # Multi-modal
|
|
189
|
+
pip install stele[all] # Everything
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Quick Start
|
|
193
|
+
|
|
194
|
+
### 1. Index Documents
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
stele index src/*.py docs/*.md
|
|
198
|
+
stele index --force document.py # Force re-index
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### 2. Semantic Search
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
stele search "authentication logic" --top-k 5
|
|
205
|
+
stele search "error handling" --json
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### 3. MCP Server (for Claude Code / Claude Desktop)
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
pip install stele[mcp]
|
|
212
|
+
stele serve-mcp
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
**Claude Code** (`~/.claude/settings.json`):
|
|
216
|
+
```json
|
|
217
|
+
{
|
|
218
|
+
"mcpServers": {
|
|
219
|
+
"stele": {
|
|
220
|
+
"command": "stele",
|
|
221
|
+
"args": ["serve-mcp"]
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
**Claude Desktop** (`~/.config/Claude/claude_desktop_config.json`):
|
|
228
|
+
```json
|
|
229
|
+
{
|
|
230
|
+
"mcpServers": {
|
|
231
|
+
"stele": {
|
|
232
|
+
"command": "stele",
|
|
233
|
+
"args": ["serve-mcp"]
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
> **Tip:** If installed in a virtualenv, use the full path to the `stele` binary.
|
|
240
|
+
|
|
241
|
+
### 4. HTTP REST Server
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
stele serve --port 9876
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### 5. Project Configuration
|
|
248
|
+
|
|
249
|
+
Create `.stele.toml` in your project root:
|
|
250
|
+
|
|
251
|
+
```toml
|
|
252
|
+
[stele]
|
|
253
|
+
chunk_size = 512
|
|
254
|
+
max_chunk_size = 8192
|
|
255
|
+
merge_threshold = 0.75
|
|
256
|
+
change_threshold = 0.90
|
|
257
|
+
search_alpha = 0.6
|
|
258
|
+
skip_dirs = [".git", "node_modules", "dist", "vendor"]
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
All values are optional — constructor params and env vars override config file values.
|
|
262
|
+
|
|
263
|
+
## Python API
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
from stele import Stele
|
|
267
|
+
|
|
268
|
+
engine = Stele()
|
|
269
|
+
|
|
270
|
+
# Index documents (auto-detects modality, walks directories)
|
|
271
|
+
result = engine.index_documents(["src/", "README.md"])
|
|
272
|
+
print(f"Indexed {result['total_chunks']} chunks")
|
|
273
|
+
|
|
274
|
+
# Hybrid semantic search (HNSW + BM25)
|
|
275
|
+
results = engine.search("authentication logic", top_k=5)
|
|
276
|
+
for r in results:
|
|
277
|
+
print(f"[{r['relevance_score']:.3f}] {r['document_path']}")
|
|
278
|
+
print(f" {r['content'][:100]}...")
|
|
279
|
+
|
|
280
|
+
# Get cached context — unchanged chunks skip reprocessing
|
|
281
|
+
context = engine.get_context(["src/main.py", "src/utils.py"])
|
|
282
|
+
for doc in context["unchanged"]:
|
|
283
|
+
print(f"{doc['path']}: {len(doc['chunks'])} cached chunks")
|
|
284
|
+
|
|
285
|
+
# Symbol graph — cross-file reference tracking
|
|
286
|
+
refs = engine.find_references("Stele")
|
|
287
|
+
defn = engine.find_definition("StorageBackend")
|
|
288
|
+
|
|
289
|
+
# Impact analysis — what breaks if this changes?
|
|
290
|
+
impact = engine.impact_radius(chunk_id="abc123", depth=2)
|
|
291
|
+
|
|
292
|
+
# Staleness detection — find chunks with stale dependencies
|
|
293
|
+
stale = engine.stale_chunks(threshold=0.3)
|
|
294
|
+
|
|
295
|
+
# Chunk version history
|
|
296
|
+
history = engine.get_chunk_history(document_path="src/main.py")
|
|
297
|
+
|
|
298
|
+
# Session management
|
|
299
|
+
engine.save_kv_state("session-1", {"chunk_id": {"key": "value"}})
|
|
300
|
+
engine.rollback("session-1", target_turn=2)
|
|
301
|
+
engine.prune_chunks("session-1", max_tokens=100000)
|
|
302
|
+
|
|
303
|
+
# Multi-agent document locking
|
|
304
|
+
engine.acquire_document_lock("src/main.py", agent_id="agent-alpha")
|
|
305
|
+
engine.index_documents(["src/main.py"], agent_id="agent-alpha")
|
|
306
|
+
engine.release_document_lock("src/main.py", agent_id="agent-alpha")
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
### Configuration
|
|
310
|
+
|
|
311
|
+
```python
|
|
312
|
+
engine = Stele(
|
|
313
|
+
chunk_size=256, # Target tokens per initial chunk
|
|
314
|
+
max_chunk_size=4096, # Maximum tokens per merged chunk
|
|
315
|
+
merge_threshold=0.7, # Similarity threshold for merging
|
|
316
|
+
change_threshold=0.85, # Similarity threshold for "unchanged"
|
|
317
|
+
search_alpha=0.7, # Blend: 1.0 = pure vector, 0.0 = pure keyword
|
|
318
|
+
)
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
Or use `.stele.toml` (see above) — constructor params override config file values.
|
|
322
|
+
|
|
323
|
+
### Agent-Supplied Semantic Embeddings
|
|
324
|
+
|
|
325
|
+
LLM agents already understand the semantics of every chunk they read. Instead of using a separate embedding model, Stele captures the agent's understanding directly:
|
|
326
|
+
|
|
327
|
+
```python
|
|
328
|
+
# After indexing, the agent describes what each chunk does
|
|
329
|
+
engine.store_semantic_summary(
|
|
330
|
+
chunk_id="abc123",
|
|
331
|
+
summary="JWT authentication middleware that validates bearer tokens and attaches user identity to request context"
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
# Now searches like "token validation" match far better than
|
|
335
|
+
# statistical signatures on raw code would
|
|
336
|
+
results = engine.search("token validation middleware")
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
The agent IS the embedding model. Stele just stores and indexes what the agent tells it — zero new dependencies, no model downloads, no API calls.
|
|
340
|
+
|
|
341
|
+
**How it works:**
|
|
342
|
+
- **Tier 1** (always): 128-dim statistical signatures — trigrams, bigrams, structural features. Used for change detection.
|
|
343
|
+
- **Tier 2** (optional): Agent-supplied semantic summaries. Stele computes a signature from the summary text and uses it for HNSW search. ~9% improvement on semantic queries.
|
|
344
|
+
- **Tier 2 alt**: `store_embedding(chunk_id, vector)` for agents with direct embedding API access.
|
|
345
|
+
|
|
346
|
+
## MCP Tools
|
|
347
|
+
|
|
348
|
+
### HTTP Server (30 tools)
|
|
349
|
+
|
|
350
|
+
| Category | Tools |
|
|
351
|
+
|----------|-------|
|
|
352
|
+
| **Indexing** | `index_documents`, `detect_changes_and_update`, `detect_modality`, `get_supported_formats` |
|
|
353
|
+
| **Search** | `search`, `get_context`, `get_relevant_kv` |
|
|
354
|
+
| **Sessions** | `save_kv_state`, `rollback`, `prune_chunks`, `list_sessions` |
|
|
355
|
+
| **Symbols** | `find_references`, `find_definition`, `impact_radius`, `rebuild_symbol_graph`, `stale_chunks` |
|
|
356
|
+
| **Locking** | `acquire_document_lock`, `release_document_lock`, `refresh_document_lock`, `get_document_lock_status`, `release_agent_locks`, `reap_expired_locks` |
|
|
357
|
+
| **History** | `get_conflicts`, `get_chunk_history`, `get_notifications` |
|
|
358
|
+
| **Embeddings** | `store_semantic_summary`, `store_embedding` |
|
|
359
|
+
| **Utilities** | `list_agents`, `environment_check`, `clean_bytecache` |
|
|
360
|
+
|
|
361
|
+
### MCP stdio Server (32 tools)
|
|
362
|
+
|
|
363
|
+
All HTTP tools plus: `annotate`, `get_annotations`, `delete_annotation`, `update_annotation`, `search_annotations`, `bulk_annotate`, `prune_history`, `map`, `history`, `stats`, `remove`
|
|
364
|
+
|
|
365
|
+
## How It Works
|
|
366
|
+
|
|
367
|
+
### Change Detection
|
|
368
|
+
|
|
369
|
+
```
|
|
370
|
+
For each chunk:
|
|
371
|
+
1. SHA-256 hash → exact match → instant cache hit (0 tokens)
|
|
372
|
+
2. Hash differs → compute 128-dim semantic signature
|
|
373
|
+
3. Cosine similarity > threshold → semantically similar → restore KV
|
|
374
|
+
4. Similarity ≤ threshold → significant change → reprocess
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
### Token Savings
|
|
378
|
+
|
|
379
|
+
| Scenario | Without Stele | With Stele | Savings |
|
|
380
|
+
|----------|---------------|------------|---------|
|
|
381
|
+
| Unchanged document | 10,000 tokens | 0 tokens | 100% |
|
|
382
|
+
| Minor edit (typo) | 10,000 tokens | ~100 tokens | 99% |
|
|
383
|
+
| Moderate edit | 10,000 tokens | ~1,000 tokens | 90% |
|
|
384
|
+
| Major rewrite | 10,000 tokens | 10,000 tokens | 0% |
|
|
385
|
+
|
|
386
|
+
### Code Chunking Strategy
|
|
387
|
+
|
|
388
|
+
| Language | Parser | Fallback |
|
|
389
|
+
|----------|--------|----------|
|
|
390
|
+
| Python | stdlib `ast` (always) | regex |
|
|
391
|
+
| JS/TS, Java, C/C++, Go, Rust, Ruby, PHP | tree-sitter (optional) | regex patterns |
|
|
392
|
+
| Shell, Swift, SQL, config files | regex patterns | line-based |
|
|
393
|
+
|
|
394
|
+
Tree-sitter provides proper AST boundary detection for function/class definitions.
|
|
395
|
+
Install with `pip install stele[tree-sitter]`.
|
|
396
|
+
|
|
397
|
+
### Storage Layout
|
|
398
|
+
|
|
399
|
+
```
|
|
400
|
+
<project_root>/.stele/ # Per-worktree (default)
|
|
401
|
+
├── stele.db # SQLite: chunks, symbols, sessions, history
|
|
402
|
+
├── kv_cache/ # JSON + zlib compressed KV states
|
|
403
|
+
└── indices/ # HNSW + BM25 persistent indices
|
|
404
|
+
|
|
405
|
+
<git-common-dir>/stele/ # Shared across worktrees
|
|
406
|
+
└── coordination.db # Agent registry, shared locks, notifications
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
## Multi-Agent Support
|
|
410
|
+
|
|
411
|
+
Stele supports multiple LLM agents sharing one store on the same machine.
|
|
412
|
+
|
|
413
|
+
| Layer | Protection |
|
|
414
|
+
|-------|-----------|
|
|
415
|
+
| **Thread safety** | RWLock — concurrent reads, exclusive writes |
|
|
416
|
+
| **Process safety** | `fcntl.flock()` on index files |
|
|
417
|
+
| **Document ownership** | `acquire_document_lock()` with TTL expiry |
|
|
418
|
+
| **Optimistic locking** | `doc_version` compare-and-swap |
|
|
419
|
+
| **Cross-worktree** | Shared coordination DB for locks, agent registry, notifications |
|
|
420
|
+
| **Conflict log** | Full audit trail of ownership violations |
|
|
421
|
+
|
|
422
|
+
## Performance
|
|
423
|
+
|
|
424
|
+
Run benchmarks:
|
|
425
|
+
```bash
|
|
426
|
+
python benchmarks/run_all.py # Full suite
|
|
427
|
+
python benchmarks/run_all.py --quick # CI mode
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
Representative results (quick mode):
|
|
431
|
+
|
|
432
|
+
| Operation | Size | Time | Throughput |
|
|
433
|
+
|-----------|------|------|------------|
|
|
434
|
+
| TextChunker | 10KB | 1.6ms | 6,100 KB/s |
|
|
435
|
+
| CodeChunker (AST) | 10KB | 5.7ms | 1,750 KB/s |
|
|
436
|
+
| store_chunk (batch) | 100 | 27ms | 3,700 ops/s |
|
|
437
|
+
| VectorIndex.search (k=10) | 500 nodes | 4.7ms | 212 qps |
|
|
438
|
+
| BM25.score_batch | 100 docs | 0.18ms | 556K docs/s |
|
|
439
|
+
| engine.search (hybrid) | 50 docs | 9.9ms | 101 qps |
|
|
440
|
+
|
|
441
|
+
## Security & Supply Chain
|
|
442
|
+
|
|
443
|
+
- **Zero required dependencies** — no supply chain attack surface for core functionality
|
|
444
|
+
- **No model downloads** — semantic signatures use statistical features, not ML models
|
|
445
|
+
- **No API calls** — everything runs locally, no data leaves your machine
|
|
446
|
+
- **No pickle** — session data serialized with JSON+zlib
|
|
447
|
+
- **Minimal codebase** — ~10,000 lines of Python, easy to audit
|
|
448
|
+
|
|
449
|
+
```bash
|
|
450
|
+
# Maximum security: install with zero dependencies
|
|
451
|
+
pip install stele --no-deps
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
## Supported Formats
|
|
455
|
+
|
|
456
|
+
### Text & Code (Zero Dependencies)
|
|
457
|
+
`.txt`, `.md`, `.rst`, `.csv`, `.log`, `.py`, `.js`, `.ts`, `.jsx`, `.tsx`, `.java`, `.cpp`, `.c`, `.h`, `.go`, `.rs`, `.rb`, `.php`, `.swift`, `.sh`, `.json`, `.yaml`, `.toml`, `.html`, `.css`, `.sql`
|
|
458
|
+
|
|
459
|
+
### Images (requires Pillow)
|
|
460
|
+
`.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.bmp`, `.tiff`, `.ico`
|
|
461
|
+
|
|
462
|
+
### PDFs (requires pymupdf)
|
|
463
|
+
`.pdf`
|
|
464
|
+
|
|
465
|
+
### Audio (requires librosa)
|
|
466
|
+
`.mp3`, `.wav`, `.ogg`, `.flac`, `.m4a`, `.aac`, `.wma`
|
|
467
|
+
|
|
468
|
+
### Video (requires opencv-python)
|
|
469
|
+
`.mp4`, `.avi`, `.mov`, `.mkv`, `.webm`, `.flv`, `.wmv`
|
|
470
|
+
|
|
471
|
+
## Configuration Reference
|
|
472
|
+
|
|
473
|
+
### Environment Variables
|
|
474
|
+
|
|
475
|
+
| Variable | Description |
|
|
476
|
+
|----------|-------------|
|
|
477
|
+
| `STELE_STORAGE_DIR` | Override default storage directory |
|
|
478
|
+
| `STELE_LOG_LEVEL` | Logging level (DEBUG, INFO, WARNING, ERROR) |
|
|
479
|
+
|
|
480
|
+
### Config File (`.stele.toml`)
|
|
481
|
+
|
|
482
|
+
```toml
|
|
483
|
+
[stele]
|
|
484
|
+
storage_dir = ".stele" # Storage directory (relative to project root)
|
|
485
|
+
chunk_size = 256 # Target tokens per initial chunk
|
|
486
|
+
max_chunk_size = 4096 # Maximum tokens per merged chunk
|
|
487
|
+
merge_threshold = 0.7 # Similarity threshold for merging chunks
|
|
488
|
+
change_threshold = 0.85 # Similarity threshold for "unchanged"
|
|
489
|
+
search_alpha = 0.7 # Hybrid search blend (1.0=vector, 0.0=keyword)
|
|
490
|
+
skip_dirs = [".git", "node_modules", "__pycache__"]
|
|
491
|
+
```
|
|
492
|
+
|
|
493
|
+
Priority: constructor params > `.stele.toml` > `STELE_STORAGE_DIR` env var > defaults.
|
|
494
|
+
|
|
495
|
+
## FAQ
|
|
496
|
+
|
|
497
|
+
**Q: Does Stele require an internet connection?**
|
|
498
|
+
No. Stele is 100% offline. No API calls, no model downloads, no telemetry. All operations run locally using Python stdlib.
|
|
499
|
+
|
|
500
|
+
**Q: How does Stele compare to RAG (Retrieval-Augmented Generation)?**
|
|
501
|
+
Stele is not RAG — it's a context cache. RAG retrieves chunks at query time from an external store. Stele caches chunk KV-states so the LLM skips re-reading unchanged content. It can be used alongside RAG, but its primary value is token savings through change detection.
|
|
502
|
+
|
|
503
|
+
**Q: What happens if tree-sitter isn't installed?**
|
|
504
|
+
Code chunking falls back to regex patterns for non-Python languages. Python always uses stdlib `ast`. Install tree-sitter for better accuracy on JS/TS, Java, C/C++, Go, Rust, Ruby, PHP: `pip install stele[tree-sitter]`.
|
|
505
|
+
|
|
506
|
+
**Q: Can multiple agents use Stele simultaneously?**
|
|
507
|
+
Yes. Stele provides per-document locking, optimistic versioning, and a cross-worktree coordination DB. Both HTTP and MCP servers auto-register agents and inject agent IDs into write operations.
|
|
508
|
+
|
|
509
|
+
**Q: How accurate are the semantic signatures?**
|
|
510
|
+
The 128-dim statistical signatures (trigrams, bigrams, structural features) are approximate. They're designed for change detection (same vs different), not for embedding-quality similarity. For typical code and documentation, they achieve ~95% accuracy on change detection.
|
|
511
|
+
|
|
512
|
+
**Q: Where is data stored?**
|
|
513
|
+
By default, `<project_root>/.stele/` (each git worktree gets its own). Override with `STELE_STORAGE_DIR` or `storage_dir` in `.stele.toml`. Cross-worktree coordination data lives in `<git-common-dir>/stele/coordination.db`.
|
|
514
|
+
|
|
515
|
+
## Troubleshooting
|
|
516
|
+
|
|
517
|
+
**`ImportError: No module named 'stele'`**
|
|
518
|
+
Ensure Stele is installed: `pip install -e .` from the repo root. If using a virtualenv, make sure it's activated.
|
|
519
|
+
|
|
520
|
+
**MCP server not connecting in Claude Desktop**
|
|
521
|
+
Use the full path to the `stele` binary. Check with `which stele` and update your config. If installed in a virtualenv: `/path/to/.venv/bin/stele`.
|
|
522
|
+
|
|
523
|
+
**`PermissionError` when indexing**
|
|
524
|
+
Another agent holds a lock on the document. Check with `get_document_lock_status()` or `reap_expired_locks()` to clean up stale locks.
|
|
525
|
+
|
|
526
|
+
**Slow search on large indices**
|
|
527
|
+
The HNSW index adapts search width automatically. For 10K+ chunks, search uses 4x `ef_search`. If still slow, reduce `top_k` or check that the BM25 index isn't being rebuilt on every query (it's lazy-loaded once).
|
|
528
|
+
|
|
529
|
+
**Tree-sitter not working for a language**
|
|
530
|
+
Verify the grammar package is installed: `pip install tree-sitter-javascript` (etc.). Check with: `python -c "from stele.chunkers.code import HAS_TREE_SITTER; print(HAS_TREE_SITTER)"`.
|
|
531
|
+
|
|
532
|
+
**Stale `.pyc` files causing issues**
|
|
533
|
+
Run `stele` with the `environment_check` MCP tool, or call `engine.check_environment()`. Use `engine.clean_bytecache()` to remove orphaned `.pyc` files.
|
|
534
|
+
|
|
535
|
+
## Development
|
|
536
|
+
|
|
537
|
+
```bash
|
|
538
|
+
pip install -e ".[dev]"
|
|
539
|
+
pytest # 412 tests
|
|
540
|
+
pytest --cov=stele # With coverage
|
|
541
|
+
python benchmarks/run_all.py # Performance benchmarks
|
|
542
|
+
mypy stele/ # Type checking
|
|
543
|
+
ruff check stele/ # Linting
|
|
544
|
+
```
|
|
545
|
+
|
|
546
|
+
Entry points: `stele` (CLI), `stele-mcp` (MCP stdio server)
|
|
547
|
+
|
|
548
|
+
## Contributing
|
|
549
|
+
|
|
550
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
551
|
+
|
|
552
|
+
## License
|
|
553
|
+
|
|
554
|
+
MIT License — see [LICENSE](LICENSE) for details.
|