sqlite-muninn 0.1.0a1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlite_muninn/__init__.py +43 -0
- sqlite_muninn/muninn.dll +0 -0
- sqlite_muninn-0.1.0a1.dist-info/METADATA +425 -0
- sqlite_muninn-0.1.0a1.dist-info/RECORD +7 -0
- sqlite_muninn-0.1.0a1.dist-info/WHEEL +5 -0
- sqlite_muninn-0.1.0a1.dist-info/licenses/LICENSE +21 -0
- sqlite_muninn-0.1.0a1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""sqlite-muninn: HNSW vector search + graph traversal + Node2Vec for SQLite.
|
|
2
|
+
|
|
3
|
+
Zero-dependency C11 SQLite extension. Three subsystems in one .load:
|
|
4
|
+
HNSW approximate nearest neighbor search, graph traversal TVFs, and Node2Vec.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import importlib.metadata
|
|
8
|
+
import pathlib
|
|
9
|
+
import sqlite3
|
|
10
|
+
|
|
11
|
+
_PKG_DIR = pathlib.Path(__file__).parent
|
|
12
|
+
|
|
13
|
+
__version__ = importlib.metadata.version("sqlite-muninn")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def loadable_path() -> str:
|
|
17
|
+
"""Return path to the muninn loadable extension (without file extension).
|
|
18
|
+
|
|
19
|
+
SQLite's load_extension() automatically appends .so/.dylib/.dll.
|
|
20
|
+
Searches in package directory first (wheel install), then repo root (dev / git install).
|
|
21
|
+
"""
|
|
22
|
+
# Wheel install: binary is inside the package directory
|
|
23
|
+
pkg_path = _PKG_DIR / "muninn"
|
|
24
|
+
if any(_PKG_DIR.glob("muninn.*")):
|
|
25
|
+
return str(pkg_path)
|
|
26
|
+
|
|
27
|
+
# Development / git install: binary is in the build directory
|
|
28
|
+
build_dir = _PKG_DIR.parent / "build"
|
|
29
|
+
if any(build_dir.glob("muninn.*")):
|
|
30
|
+
return str(build_dir / "muninn")
|
|
31
|
+
|
|
32
|
+
raise FileNotFoundError("muninn extension not found. Build it with: make all")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load(conn: sqlite3.Connection) -> None:
|
|
36
|
+
"""Load muninn into the given SQLite connection.
|
|
37
|
+
|
|
38
|
+
The connection must have load_extension enabled:
|
|
39
|
+
conn.enable_load_extension(True)
|
|
40
|
+
sqlite_muninn.load(conn)
|
|
41
|
+
conn.enable_load_extension(False)
|
|
42
|
+
"""
|
|
43
|
+
conn.load_extension(loadable_path())
|
sqlite_muninn/muninn.dll
ADDED
|
Binary file
|
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sqlite-muninn
|
|
3
|
+
Version: 0.1.0a1
|
|
4
|
+
Summary: HNSW vector search + graph traversal + Node2Vec for SQLite
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/user/sqlite-muninn
|
|
7
|
+
Project-URL: Repository, https://github.com/user/sqlite-muninn
|
|
8
|
+
Keywords: sqlite,vector,hnsw,graph,node2vec,search
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: C
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Topic :: Database
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Requires-Python: >=3.12
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# sqlite-muninn
|
|
21
|
+
|
|
22
|
+
<div align="center">
|
|
23
|
+
<img src="docs/assets/muninn_logo_transparent.png" alt="Muninn Raven Logo" width=480px/>
|
|
24
|
+
<p><i>Odin's mythic <a href="https://en.wikipedia.org/wiki/Huginn_and_Muninn">raven of Memory</a>.</i></p>
|
|
25
|
+
</div>
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
A zero-dependency C extension for SQLite to add an advanced collection of knowledge graph primitives like Vector Similarity Search, HNSW Indexes, Graph database, Community Detection, Node2Vec capabilities.
|
|
30
|
+
|
|
31
|
+
**[Documentation](https://neozenith.github.io/sqlite-muninn/)** | **[GitHub](https://github.com/neozenith/sqlite-muninn)**
|
|
32
|
+
|
|
33
|
+
```mermaid
|
|
34
|
+
graph LR
|
|
35
|
+
subgraph Muninn Extension
|
|
36
|
+
direction TB
|
|
37
|
+
HNSW["HNSW Virtual Table<br/><i>hnsw_index</i>"]
|
|
38
|
+
GTVF["Graph TVFs<br/><i>bfs, dfs, shortest_path,<br/>components, pagerank</i>"]
|
|
39
|
+
CENT["Centrality TVFs<br/><i>degree, betweenness,<br/>closeness</i>"]
|
|
40
|
+
COMM["Community Detection<br/><i>graph_leiden</i>"]
|
|
41
|
+
N2V["Node2Vec Training<br/><i>node2vec_train()</i>"]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
ET["Any Edge Table<br/><i>src, dst columns</i>"] --> GTVF
|
|
45
|
+
ET --> CENT
|
|
46
|
+
ET --> COMM
|
|
47
|
+
ET --> N2V
|
|
48
|
+
N2V -->|embeddings| HNSW
|
|
49
|
+
APP["Application<br/><i>embeddings from<br/>any source</i>"] -->|INSERT| HNSW
|
|
50
|
+
HNSW -->|KNN search| APP
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Features
|
|
54
|
+
|
|
55
|
+
- **HNSW Vector Index** — O(log N) approximate nearest neighbor search with incremental insert/delete
|
|
56
|
+
- **Graph Traversal** — BFS, DFS, shortest path, connected components, PageRank on any edge table
|
|
57
|
+
- **Centrality Measures** — Degree, betweenness (Brandes), and closeness centrality with weighted/temporal support
|
|
58
|
+
- **Community Detection** — Leiden algorithm for discovering graph communities with modularity scoring
|
|
59
|
+
- **Node2Vec** — Learn structural node embeddings from graph topology, store in HNSW for similarity search
|
|
60
|
+
- **Zero dependencies** — Pure C11, compiles to a single `.dylib`/`.so`/`.dll`
|
|
61
|
+
- **SIMD accelerated** — ARM NEON and x86 SSE distance functions
|
|
62
|
+
|
|
63
|
+
## Build
|
|
64
|
+
|
|
65
|
+
Requires SQLite development headers and a C11 compiler.
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# macOS (Homebrew SQLite recommended)
|
|
69
|
+
brew install sqlite
|
|
70
|
+
make all
|
|
71
|
+
|
|
72
|
+
# Linux
|
|
73
|
+
sudo apt-get install libsqlite3-dev
|
|
74
|
+
make all
|
|
75
|
+
|
|
76
|
+
# Run tests
|
|
77
|
+
make test # C unit tests
|
|
78
|
+
make test-python # Python integration tests
|
|
79
|
+
make test-all # Both
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Quick Start
|
|
83
|
+
|
|
84
|
+
```sql
|
|
85
|
+
.load ./muninn
|
|
86
|
+
|
|
87
|
+
-- Create an HNSW vector index
|
|
88
|
+
CREATE VIRTUAL TABLE my_vectors USING hnsw_index(
|
|
89
|
+
dimensions=384, metric='cosine', m=16, ef_construction=200
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
-- Insert vectors
|
|
93
|
+
INSERT INTO my_vectors (rowid, vector) VALUES (1, ?); -- 384-dim float32 blob
|
|
94
|
+
|
|
95
|
+
-- KNN search
|
|
96
|
+
SELECT rowid, distance FROM my_vectors
|
|
97
|
+
WHERE vector MATCH ?query AND k = 10 AND ef_search = 64;
|
|
98
|
+
|
|
99
|
+
-- Graph traversal on any edge table
|
|
100
|
+
SELECT node, depth FROM graph_bfs
|
|
101
|
+
WHERE edge_table = 'friendships' AND src_col = 'user_a'
|
|
102
|
+
AND dst_col = 'user_b' AND start_node = 'alice' AND max_depth = 3
|
|
103
|
+
AND direction = 'both';
|
|
104
|
+
|
|
105
|
+
-- Connected components
|
|
106
|
+
SELECT node, component_id, component_size FROM graph_components
|
|
107
|
+
WHERE edge_table = 'friendships' AND src_col = 'user_a' AND dst_col = 'user_b';
|
|
108
|
+
|
|
109
|
+
-- PageRank
|
|
110
|
+
SELECT node, rank FROM graph_pagerank
|
|
111
|
+
WHERE edge_table = 'citations' AND src_col = 'citing' AND dst_col = 'cited'
|
|
112
|
+
AND damping = 0.85 AND iterations = 20;
|
|
113
|
+
|
|
114
|
+
-- Betweenness centrality (find bridge nodes)
|
|
115
|
+
SELECT node, centrality FROM graph_betweenness
|
|
116
|
+
WHERE edge_table = 'friendships' AND src_col = 'user_a' AND dst_col = 'user_b'
|
|
117
|
+
AND direction = 'both'
|
|
118
|
+
ORDER BY centrality DESC LIMIT 10;
|
|
119
|
+
|
|
120
|
+
-- Community detection (Leiden algorithm)
|
|
121
|
+
SELECT node, community_id, modularity FROM graph_leiden
|
|
122
|
+
WHERE edge_table = 'friendships' AND src_col = 'user_a' AND dst_col = 'user_b';
|
|
123
|
+
|
|
124
|
+
-- Learn structural embeddings from graph topology
|
|
125
|
+
SELECT node2vec_train(
|
|
126
|
+
'friendships', 'user_a', 'user_b', 'my_vectors',
|
|
127
|
+
64, 1.0, 1.0, 10, 80, 5, 5, 0.025, 5
|
|
128
|
+
);
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Examples
|
|
132
|
+
|
|
133
|
+
Self-contained examples in the [`examples/`](examples/) directory:
|
|
134
|
+
|
|
135
|
+
| Example | Demonstrates |
|
|
136
|
+
|---------|-------------|
|
|
137
|
+
| [Semantic Search](examples/semantic_search/) | HNSW index, KNN queries, point lookup, delete |
|
|
138
|
+
| [Movie Recommendations](examples/movie_recommendations/) | Vector similarity for content-based recommendations |
|
|
139
|
+
| [Social Network](examples/social_network/) | Graph TVFs on a social graph (BFS, components, PageRank) |
|
|
140
|
+
| [Research Papers](examples/research_papers/) | Citation graph analysis with Node2Vec embeddings |
|
|
141
|
+
| [Transit Routes](examples/transit_routes/) | Shortest path and graph traversal on route networks |
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
make all
|
|
145
|
+
python examples/semantic_search/example.py
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## API Reference
|
|
149
|
+
|
|
150
|
+
### HNSW Virtual Table (`hnsw_index`)
|
|
151
|
+
|
|
152
|
+
```sql
|
|
153
|
+
CREATE VIRTUAL TABLE name USING hnsw_index(
|
|
154
|
+
dimensions=N, -- vector dimensionality (required)
|
|
155
|
+
metric='l2', -- 'l2' | 'cosine' | 'inner_product'
|
|
156
|
+
m=16, -- max connections per node per layer
|
|
157
|
+
ef_construction=200 -- beam width during index construction
|
|
158
|
+
);
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
**Columns:**
|
|
162
|
+
|
|
163
|
+
| Column | Type | Hidden | Description |
|
|
164
|
+
|--------|------|--------|-------------|
|
|
165
|
+
| `rowid` | INTEGER | Yes | User-assigned ID for joining with application tables |
|
|
166
|
+
| `vector` | BLOB | No | `float32[dim]` — input for INSERT, MATCH constraint for search |
|
|
167
|
+
| `distance` | REAL | No | Computed distance (output only, during search) |
|
|
168
|
+
| `k` | INTEGER | Yes | Top-k parameter (search constraint) |
|
|
169
|
+
| `ef_search` | INTEGER | Yes | Search beam width (search constraint) |
|
|
170
|
+
|
|
171
|
+
**Operations:**
|
|
172
|
+
|
|
173
|
+
```sql
|
|
174
|
+
-- Insert
|
|
175
|
+
INSERT INTO t (rowid, vector) VALUES (42, ?blob);
|
|
176
|
+
|
|
177
|
+
-- KNN search
|
|
178
|
+
SELECT rowid, distance FROM t WHERE vector MATCH ?query AND k = 10;
|
|
179
|
+
|
|
180
|
+
-- Point lookup
|
|
181
|
+
SELECT vector FROM t WHERE rowid = 42;
|
|
182
|
+
|
|
183
|
+
-- Delete (with automatic neighbor reconnection)
|
|
184
|
+
DELETE FROM t WHERE rowid = 42;
|
|
185
|
+
|
|
186
|
+
-- Drop (removes index and all shadow tables)
|
|
187
|
+
DROP TABLE t;
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
**Shadow tables** (auto-managed):
|
|
191
|
+
- `{name}_config` — HNSW parameters
|
|
192
|
+
- `{name}_nodes` — stored vectors and level assignments
|
|
193
|
+
- `{name}_edges` — the proximity graph (usable by graph TVFs)
|
|
194
|
+
|
|
195
|
+
### Graph Table-Valued Functions
|
|
196
|
+
|
|
197
|
+
All graph TVFs work on **any** existing SQLite table with source/target columns. Table and column names are validated against SQL injection.
|
|
198
|
+
|
|
199
|
+
#### `graph_bfs` / `graph_dfs`
|
|
200
|
+
|
|
201
|
+
Breadth-first or depth-first traversal from a start node.
|
|
202
|
+
|
|
203
|
+
```sql
|
|
204
|
+
SELECT node, depth, parent FROM graph_bfs
|
|
205
|
+
WHERE edge_table = 'edges'
|
|
206
|
+
AND src_col = 'src'
|
|
207
|
+
AND dst_col = 'dst'
|
|
208
|
+
AND start_node = 'node-42'
|
|
209
|
+
AND max_depth = 5
|
|
210
|
+
AND direction = 'forward'; -- 'forward' | 'reverse' | 'both'
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
| Output Column | Type | Description |
|
|
214
|
+
|---------------|------|-------------|
|
|
215
|
+
| `node` | TEXT | Node identifier |
|
|
216
|
+
| `depth` | INTEGER | Hop distance from start |
|
|
217
|
+
| `parent` | TEXT | Parent node in traversal tree (NULL for start) |
|
|
218
|
+
|
|
219
|
+
#### `graph_shortest_path`
|
|
220
|
+
|
|
221
|
+
Unweighted (BFS) or weighted (Dijkstra) shortest path.
|
|
222
|
+
|
|
223
|
+
```sql
|
|
224
|
+
-- Unweighted
|
|
225
|
+
SELECT node, distance, path_order FROM graph_shortest_path
|
|
226
|
+
WHERE edge_table = 'edges' AND src_col = 'src' AND dst_col = 'dst'
|
|
227
|
+
AND start_node = 'A' AND end_node = 'Z' AND weight_col IS NULL;
|
|
228
|
+
|
|
229
|
+
-- Weighted (Dijkstra)
|
|
230
|
+
SELECT node, distance, path_order FROM graph_shortest_path
|
|
231
|
+
WHERE edge_table = 'edges' AND src_col = 'src' AND dst_col = 'dst'
|
|
232
|
+
AND start_node = 'A' AND end_node = 'Z' AND weight_col = 'weight';
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
| Output Column | Type | Description |
|
|
236
|
+
|---------------|------|-------------|
|
|
237
|
+
| `node` | TEXT | Node on the path |
|
|
238
|
+
| `distance` | REAL | Cumulative distance from start |
|
|
239
|
+
| `path_order` | INTEGER | Position in path (0-indexed) |
|
|
240
|
+
|
|
241
|
+
#### `graph_components`
|
|
242
|
+
|
|
243
|
+
Connected components via Union-Find with path compression.
|
|
244
|
+
|
|
245
|
+
```sql
|
|
246
|
+
SELECT node, component_id, component_size FROM graph_components
|
|
247
|
+
WHERE edge_table = 'edges' AND src_col = 'src' AND dst_col = 'dst';
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
| Output Column | Type | Description |
|
|
251
|
+
|---------------|------|-------------|
|
|
252
|
+
| `node` | TEXT | Node identifier |
|
|
253
|
+
| `component_id` | INTEGER | Component index (0-based) |
|
|
254
|
+
| `component_size` | INTEGER | Number of nodes in this component |
|
|
255
|
+
|
|
256
|
+
#### `graph_pagerank`
|
|
257
|
+
|
|
258
|
+
Iterative power method PageRank with configurable damping and iterations.
|
|
259
|
+
|
|
260
|
+
```sql
|
|
261
|
+
SELECT node, rank FROM graph_pagerank
|
|
262
|
+
WHERE edge_table = 'edges' AND src_col = 'src' AND dst_col = 'dst'
|
|
263
|
+
AND damping = 0.85 -- optional, default 0.85
|
|
264
|
+
AND iterations = 20; -- optional, default 20
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
| Output Column | Type | Description |
|
|
268
|
+
|---------------|------|-------------|
|
|
269
|
+
| `node` | TEXT | Node identifier |
|
|
270
|
+
| `rank` | REAL | PageRank score (sums to ~1.0) |
|
|
271
|
+
|
|
272
|
+
#### `graph_degree`
|
|
273
|
+
|
|
274
|
+
Degree centrality for all nodes.
|
|
275
|
+
|
|
276
|
+
```sql
|
|
277
|
+
SELECT node, in_degree, out_degree, degree, centrality FROM graph_degree
|
|
278
|
+
WHERE edge_table = 'edges' AND src_col = 'src' AND dst_col = 'dst';
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
| Output Column | Type | Description |
|
|
282
|
+
|---------------|------|-------------|
|
|
283
|
+
| `node` | TEXT | Node identifier |
|
|
284
|
+
| `in_degree` | REAL | Count (or weighted sum) of incoming edges |
|
|
285
|
+
| `out_degree` | REAL | Count (or weighted sum) of outgoing edges |
|
|
286
|
+
| `degree` | REAL | Total degree (in + out) |
|
|
287
|
+
| `centrality` | REAL | Normalized degree centrality |
|
|
288
|
+
|
|
289
|
+
Optional constraints: `weight_col`, `direction`, `normalized`, `timestamp_col`, `time_start`, `time_end`.
|
|
290
|
+
|
|
291
|
+
#### `graph_betweenness`
|
|
292
|
+
|
|
293
|
+
Betweenness centrality via Brandes' O(VE) algorithm.
|
|
294
|
+
|
|
295
|
+
```sql
|
|
296
|
+
SELECT node, centrality FROM graph_betweenness
|
|
297
|
+
WHERE edge_table = 'edges' AND src_col = 'src' AND dst_col = 'dst'
|
|
298
|
+
AND direction = 'both';
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
| Output Column | Type | Description |
|
|
302
|
+
|---------------|------|-------------|
|
|
303
|
+
| `node` | TEXT | Node identifier |
|
|
304
|
+
| `centrality` | REAL | Betweenness centrality score |
|
|
305
|
+
|
|
306
|
+
Optional constraints: `weight_col`, `direction`, `normalized`, `timestamp_col`, `time_start`, `time_end`.
|
|
307
|
+
|
|
308
|
+
#### `graph_closeness`
|
|
309
|
+
|
|
310
|
+
Closeness centrality with Wasserman-Faust normalization for disconnected graphs.
|
|
311
|
+
|
|
312
|
+
```sql
|
|
313
|
+
SELECT node, centrality FROM graph_closeness
|
|
314
|
+
WHERE edge_table = 'edges' AND src_col = 'src' AND dst_col = 'dst'
|
|
315
|
+
AND direction = 'both';
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
| Output Column | Type | Description |
|
|
319
|
+
|---------------|------|-------------|
|
|
320
|
+
| `node` | TEXT | Node identifier |
|
|
321
|
+
| `centrality` | REAL | Closeness centrality score |
|
|
322
|
+
|
|
323
|
+
Optional constraints: `weight_col`, `direction`, `timestamp_col`, `time_start`, `time_end`.
|
|
324
|
+
|
|
325
|
+
#### `graph_leiden`
|
|
326
|
+
|
|
327
|
+
Community detection via the Leiden algorithm (Traag et al., 2019).
|
|
328
|
+
|
|
329
|
+
```sql
|
|
330
|
+
SELECT node, community_id, modularity FROM graph_leiden
|
|
331
|
+
WHERE edge_table = 'edges' AND src_col = 'src' AND dst_col = 'dst';
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
| Output Column | Type | Description |
|
|
335
|
+
|---------------|------|-------------|
|
|
336
|
+
| `node` | TEXT | Node identifier |
|
|
337
|
+
| `community_id` | INTEGER | Community assignment (0-based) |
|
|
338
|
+
| `modularity` | REAL | Global modularity score of the partition |
|
|
339
|
+
|
|
340
|
+
Optional constraints: `weight_col`, `resolution` (default 1.0), `timestamp_col`, `time_start`, `time_end`.
|
|
341
|
+
|
|
342
|
+
### `node2vec_train()`
|
|
343
|
+
|
|
344
|
+
Learn vector embeddings from graph structure using biased random walks (Node2Vec) and Skip-gram with Negative Sampling (SGNS).
|
|
345
|
+
|
|
346
|
+
```sql
|
|
347
|
+
SELECT node2vec_train(
|
|
348
|
+
edge_table, -- name of edge table
|
|
349
|
+
src_col, -- source column name
|
|
350
|
+
dst_col, -- destination column name
|
|
351
|
+
output_table, -- HNSW table to store embeddings (must exist)
|
|
352
|
+
dimensions, -- embedding size (must match HNSW table)
|
|
353
|
+
p, -- return parameter (1.0 = uniform/DeepWalk)
|
|
354
|
+
q, -- in-out parameter (1.0 = uniform/DeepWalk)
|
|
355
|
+
num_walks, -- walks per node
|
|
356
|
+
walk_length, -- max steps per walk
|
|
357
|
+
window_size, -- SGNS context window
|
|
358
|
+
negative_samples, -- negative samples per positive
|
|
359
|
+
learning_rate, -- initial learning rate (decays linearly)
|
|
360
|
+
epochs -- training epochs
|
|
361
|
+
);
|
|
362
|
+
-- Returns: number of nodes embedded
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
**p, q parameter guide:**
|
|
366
|
+
|
|
367
|
+
| Setting | Walk Behavior | Best For |
|
|
368
|
+
|---------|--------------|----------|
|
|
369
|
+
| p=1, q=1 | Uniform (DeepWalk) | General structural similarity |
|
|
370
|
+
| Low p (0.25) | BFS-like, stays local | Community/cluster detection |
|
|
371
|
+
| Low q (0.5) | DFS-like, explores far | Structural role similarity |
|
|
372
|
+
|
|
373
|
+
## Benchmarks
|
|
374
|
+
|
|
375
|
+
The project includes a comprehensive benchmark suite comparing muninn against other SQLite extensions across real-world workloads.
|
|
376
|
+
|
|
377
|
+
**Vector search** benchmarks compare against [sqlite-vector](https://github.com/nicepkg/sqlite-vector), [sqlite-vec](https://github.com/asg017/sqlite-vec), and [vectorlite](https://github.com/nicepkg/vectorlite) using 3 embedding models (MiniLM, MPNet, BGE-Large) and 2 text datasets (AG News, Wealth of Nations) at scales up to 250K vectors.
|
|
378
|
+
|
|
379
|
+
**Graph traversal** benchmarks compare muninn TVFs against recursive CTEs and [GraphQLite](https://github.com/nicepkg/graphqlite) on synthetic graphs (Erdos-Renyi, Barabasi-Albert) at scales up to 100K nodes.
|
|
380
|
+
|
|
381
|
+
Results include interactive Plotly charts for insert throughput, search latency, recall, database size, and tipping-point analysis. See the [full benchmark results](https://neozenith.github.io/sqlite-muninn/benchmarks/) on the documentation site.
|
|
382
|
+
|
|
383
|
+
```bash
|
|
384
|
+
make -C benchmarks help # List all benchmark targets
|
|
385
|
+
make -C benchmarks analyze # Generate charts and reports from existing results
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
## Project Structure
|
|
389
|
+
|
|
390
|
+
```
|
|
391
|
+
src/ C11 source (extension entry point, HNSW, graph TVFs, Node2Vec)
|
|
392
|
+
test/ C unit tests (custom minimal framework)
|
|
393
|
+
pytests/ Python integration tests (pytest)
|
|
394
|
+
examples/ Self-contained usage examples
|
|
395
|
+
benchmarks/
|
|
396
|
+
scripts/ Benchmark runners and analysis scripts
|
|
397
|
+
charts/ Plotly JSON chart specs (committed for docs site)
|
|
398
|
+
results/ JSONL benchmark data (generated, not committed)
|
|
399
|
+
docs/ MkDocs documentation source
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
## Documentation
|
|
403
|
+
|
|
404
|
+
Full documentation is published at **[neozenith.github.io/sqlite-muninn](https://neozenith.github.io/sqlite-muninn/)** via MkDocs Material with interactive Plotly charts.
|
|
405
|
+
|
|
406
|
+
```bash
|
|
407
|
+
make docs-serve # Local dev server with live reload
|
|
408
|
+
make docs-build # Build static site
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
## Research References
|
|
412
|
+
|
|
413
|
+
| Feature | Paper |
|
|
414
|
+
|---------|-------|
|
|
415
|
+
| HNSW | Malkov & Yashunin, TPAMI 2020 |
|
|
416
|
+
| MN-RU insert repair | arXiv:2407.07871, 2024 |
|
|
417
|
+
| Patience early termination | SISAP 2025 |
|
|
418
|
+
| Betweenness centrality | Brandes, J. Math. Sociol. 2001 |
|
|
419
|
+
| Leiden community detection | Traag, Waltman & van Eck, Sci. Rep. 2019 |
|
|
420
|
+
| Node2Vec | Grover & Leskovec, KDD 2016 |
|
|
421
|
+
| SGNS | Mikolov et al., 2013 |
|
|
422
|
+
|
|
423
|
+
## License
|
|
424
|
+
|
|
425
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
sqlite_muninn/__init__.py,sha256=ZwYXrogu4GCeWaRzL5YzQ-kChpgyj14b8Zg5RCE-p-o,1465
|
|
2
|
+
sqlite_muninn/muninn.dll,sha256=Duf1w2hP2f7W81X10Ydp0glqXjoK6G9wCr7We9R_3BE,208384
|
|
3
|
+
sqlite_muninn-0.1.0a1.dist-info/licenses/LICENSE,sha256=QyyYwcmZwbq4FumXkfaKrlEDCv-bgsCNGVWuCkIlZCc,1107
|
|
4
|
+
sqlite_muninn-0.1.0a1.dist-info/METADATA,sha256=MoivHyAUAIfunBBv882TwxxZmGKdMemkS4lNp5tT8Lc,15489
|
|
5
|
+
sqlite_muninn-0.1.0a1.dist-info/WHEEL,sha256=I7N5ZCUvoGccb6ODwz-ZRxcDg9ip88rBxOm2U6c41XQ,97
|
|
6
|
+
sqlite_muninn-0.1.0a1.dist-info/top_level.txt,sha256=bHCxOUoDiepH3tv0QWQA25FwkMccry8BHRtOb2Ncae0,14
|
|
7
|
+
sqlite_muninn-0.1.0a1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 sqlite-vec-graph contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
sqlite_muninn
|