sqlrite 0.1.14__tar.gz → 0.1.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlrite-0.1.14 → sqlrite-0.1.15}/Cargo.lock +5 -5
- {sqlrite-0.1.14 → sqlrite-0.1.15}/Cargo.toml +1 -1
- {sqlrite-0.1.14 → sqlrite-0.1.15}/PKG-INFO +1 -1
- {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/package.json +1 -1
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/phase-7-plan.md +1 -1
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/roadmap.md +1 -1
- {sqlrite-0.1.14 → sqlrite-0.1.15}/pyproject.toml +1 -1
- {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/python/Cargo.toml +1 -1
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/db/table.rs +5 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/executor.rs +29 -39
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/hnsw.rs +44 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/mod.rs +22 -11
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/cell.rs +15 -0
- sqlrite-0.1.15/src/sql/pager/hnsw_cell.rs +258 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/mod.rs +337 -27
- {sqlrite-0.1.14 → sqlrite-0.1.15}/.github/workflows/ci.yml +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/.github/workflows/release-pr.yml +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/.github/workflows/release.yml +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/.github/workflows/rust.yml +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/.gitignore +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/CODE_OF_CONDUCT.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/LICENSE +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/MAINTAINERS +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/Makefile +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/README.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/index.html +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/package-lock.json +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/src/App.svelte +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/src/app.css +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/src/main.ts +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/src/vite-env.d.ts +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/svelte.config.js +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/tsconfig.json +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/vite.config.ts +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/_index.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/architecture.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/design-decisions.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/desktop.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/embedding.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/file-format.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/getting-started.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/pager.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/release-plan.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/release-secrets.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/smoke-test.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/sql-engine.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/storage-model.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/supported-sql.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/usage.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/README.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/c/Makefile +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/c/hello.c +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/go/go.mod +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/go/hello.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/nodejs/hello.mjs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/python/hello.py +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/rust/quickstart.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/wasm/Makefile +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/wasm/index.html +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite - Desktop.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite Data Structures.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite Simple SQL Execution High Level Diagram.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite Simple SQL INSERT Execution High Level Diagram (Insert Row).png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite Simple SQL INSERT Execution High Level Diagram.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite_logo.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/images/architecture.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/rust-toolchain.toml +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/AST.delete.example +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/AST.insert.exemple +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/AST.select.example +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/AST.update.example +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/CREATE TABLE sqlrite_schema.sql +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/CREATE_TABLE with duplicate.sql +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/CREATE_TABLE.sql +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/INSERT.sql +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/scripts/bump-version.sh +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/README.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/conn.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/go.mod +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/rows.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/sqlrite.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/sqlrite_test.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/stmt.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/python/README.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/python/src/lib.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/python/tests/test_sqlrite.py +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/connection.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/error.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/lib.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/main.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/meta_command/mod.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/repl/mod.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/db/database.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/db/mod.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/db/secondary_index.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/file.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/header.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/index_cell.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/interior_page.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/overflow.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/page.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/pager.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/table_page.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/varint.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/wal.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/parser/create.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/parser/insert.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/parser/mod.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/parser/select.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/tokenizer.rs +0 -0
|
@@ -3736,7 +3736,7 @@ dependencies = [
|
|
|
3736
3736
|
|
|
3737
3737
|
[[package]]
|
|
3738
3738
|
name = "sqlrite-desktop"
|
|
3739
|
-
version = "0.1.
|
|
3739
|
+
version = "0.1.15"
|
|
3740
3740
|
dependencies = [
|
|
3741
3741
|
"serde",
|
|
3742
3742
|
"serde_json",
|
|
@@ -3748,7 +3748,7 @@ dependencies = [
|
|
|
3748
3748
|
|
|
3749
3749
|
[[package]]
|
|
3750
3750
|
name = "sqlrite-engine"
|
|
3751
|
-
version = "0.1.
|
|
3751
|
+
version = "0.1.15"
|
|
3752
3752
|
dependencies = [
|
|
3753
3753
|
"clap",
|
|
3754
3754
|
"env_logger",
|
|
@@ -3763,7 +3763,7 @@ dependencies = [
|
|
|
3763
3763
|
|
|
3764
3764
|
[[package]]
|
|
3765
3765
|
name = "sqlrite-ffi"
|
|
3766
|
-
version = "0.1.
|
|
3766
|
+
version = "0.1.15"
|
|
3767
3767
|
dependencies = [
|
|
3768
3768
|
"cbindgen",
|
|
3769
3769
|
"sqlrite-engine",
|
|
@@ -3771,7 +3771,7 @@ dependencies = [
|
|
|
3771
3771
|
|
|
3772
3772
|
[[package]]
|
|
3773
3773
|
name = "sqlrite-nodejs"
|
|
3774
|
-
version = "0.1.
|
|
3774
|
+
version = "0.1.15"
|
|
3775
3775
|
dependencies = [
|
|
3776
3776
|
"napi",
|
|
3777
3777
|
"napi-build",
|
|
@@ -3781,7 +3781,7 @@ dependencies = [
|
|
|
3781
3781
|
|
|
3782
3782
|
[[package]]
|
|
3783
3783
|
name = "sqlrite-python"
|
|
3784
|
-
version = "0.1.
|
|
3784
|
+
version = "0.1.15"
|
|
3785
3785
|
dependencies = [
|
|
3786
3786
|
"pyo3",
|
|
3787
3787
|
"sqlrite-engine",
|
|
@@ -27,7 +27,7 @@ resolver = "3"
|
|
|
27
27
|
# `package =` key so the import name stays `sqlrite` internally:
|
|
28
28
|
# sqlrite = { package = "sqlrite-engine", path = "…" }
|
|
29
29
|
name = "sqlrite-engine"
|
|
30
|
-
version = "0.1.
|
|
30
|
+
version = "0.1.15"
|
|
31
31
|
authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
|
|
32
32
|
edition = "2024"
|
|
33
33
|
rust-version = "1.85"
|
|
@@ -162,7 +162,7 @@ SELECT id, title FROM docs ORDER BY embedding <-> [0.1, ...] LIMIT 10;
|
|
|
162
162
|
>
|
|
163
163
|
> - **✅ 7d.1 — Pure HNSW algorithm** *(~700 LOC, shipped in v0.1.13).* `src/sql/hnsw.rs` standalone module: insert + search + layer assignment + beam search per layer + L2/cosine/dot distance dispatch. No SQL integration yet — vectors are passed in via a `get_vec` closure so the algorithm doesn't depend on table types. Tests verify recall@k ≥ 0.95 vs brute-force on randomly-generated vector sets; deterministic via a fixed RNG seed.
|
|
164
164
|
> - **✅ 7d.2 — SQL integration** *(~500 LOC).* `CREATE INDEX … USING hnsw (col)` parser + engine, INSERT wiring (also calls `hnsw.insert()` incrementally), query optimizer hook (recognizes `ORDER BY vec_distance_l2(col, literal) LIMIT k` and probes the HNSW instead of full-scanning). HNSW lives in memory only at this point; the **CREATE INDEX SQL persists in `sqlrite_master` and reopen rebuilds the graph from current rows** — partial persistence ahead of 7d.3. DELETE/UPDATE on HNSW-indexed tables refused with helpful error pointing at 7d.3.
|
|
165
|
-
> -
|
|
165
|
+
> - **✅ 7d.3 — Persistence** *(~600 LOC).* New `KIND_HNSW` cell tag and `HnswNodeCell` encoding (varint node_id + per-layer neighbor lists). Each HNSW index gets its own page tree parallel to secondary indexes. Open path loads cells directly into `HnswIndex::from_persisted_nodes` — no algorithm runs, exact bit-for-bit reproduction. Also unblocks DELETE / UPDATE on HNSW-indexed tables: those mark the index `needs_rebuild`, save rebuilds from current rows before staging. ~2× the original 300-LOC estimate because the cell encoding + tests + rebuild path together added more than expected.
|
|
166
166
|
>
|
|
167
167
|
> Each 7d.x ships as its own PR + release wave. The user-facing value lands at 7d.2; 7d.3 closes the persistence loop. 7d.1 is foundational but ships a tested algorithmic primitive on its own — useful as documentation of the engine's "from scratch" theme.
|
|
168
168
|
|
|
@@ -473,7 +473,7 @@ Approved sub-phases (Q1–Q10 resolved):
|
|
|
473
473
|
- **✅ 7a — `VECTOR(N)` column type** *(v0.1.10)* — dense fixed-dimension f32 storage via the existing cell encoding; format bumped to v4. Bracket-array literal syntax `[0.1, 0.2, …]` (Q7).
|
|
474
474
|
- **✅ 7b — Distance functions** *(v0.1.11)* — `vec_distance_l2/cosine/dot`, plus the ORDER BY-expressions parser change so KNN queries work end-to-end. Operators (`<->` `<=>` `<#>`) deferred to **7b.1** — sqlparser doesn't parse them natively, contradicting Q6's "tiny parser change" assumption.
|
|
475
475
|
- **✅ 7c — Brute-force KNN executor optimization** — bounded `BinaryHeap` of size k for `ORDER BY <expr> LIMIT k`. ~1.8× faster than full-sort at N=10k for cheap keys; bigger gains on expensive keys like `vec_distance_l2`.
|
|
476
|
-
-
|
|
476
|
+
- **✅ 7d — HNSW ANN index** — three PRs: 7d.1 (algorithm w/ recall@10 ≥ 0.95), 7d.2 (SQL integration + query optimizer), 7d.3 (persistence + DELETE/UPDATE rebuild). `CREATE INDEX … USING hnsw (col)`; fixed defaults `M=16, ef_construction=200, ef_search=50` (Q2). New `KIND_HNSW` cell tag.
|
|
477
477
|
- **7e — JSON column type + path queries** — `JSON` data type stored as bincoded `serde_json::Value` (Q3); `json_extract` / `json_array_length` / `json_object_keys` / `json_type`.
|
|
478
478
|
- **7f — ~~Full-text search with BM25~~** — **deferred to Phase 8** (Q1).
|
|
479
479
|
- **7g — `ask()` API across the product surface** — natural-language → SQL via Anthropic API (Q4), Anthropic-first then OpenAI + Ollama follow-ups. Foundational 7g.1 introduces a new `sqlrite-ask` crate (Q10 — separate crate, not a feature flag). Thin per-product adapters in 7g.2-7g.8 cover REPL, desktop, Python, Node.js, Go, WASM (JS-callback shape per Q9), and the MCP `ask` tool.
|
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sqlrite"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.15"
|
|
8
8
|
description = "Python bindings for SQLRite — a small, embeddable SQLite clone written in Rust."
|
|
9
9
|
authors = [{ name = "Joao Henrique Machado Silva", email = "joaoh82@gmail.com" }]
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -143,6 +143,11 @@ pub struct HnswIndexEntry {
|
|
|
143
143
|
pub column_name: String,
|
|
144
144
|
/// The graph itself.
|
|
145
145
|
pub index: HnswIndex,
|
|
146
|
+
/// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
|
|
147
|
+
/// invalidated the graph since the last rebuild. INSERT maintains
|
|
148
|
+
/// the graph incrementally and leaves this false. The next save
|
|
149
|
+
/// rebuilds dirty indexes from current rows before serializing.
|
|
150
|
+
pub needs_rebuild: bool,
|
|
146
151
|
}
|
|
147
152
|
|
|
148
153
|
impl Table {
|
|
@@ -178,26 +178,6 @@ pub fn execute_delete(stmt: &Statement, db: &mut Database) -> Result<usize> {
|
|
|
178
178
|
};
|
|
179
179
|
let table_name = extract_single_table_name(tables)?;
|
|
180
180
|
|
|
181
|
-
// Phase 7d.2 limitation: HNSW lacks an in-place delete-node operation.
|
|
182
|
-
// True deletion needs either soft-delete + tombstones or a graph rebuild
|
|
183
|
-
// — both nontrivial. Until 7d.3 lands persistence we don't have a
|
|
184
|
-
// natural rebuild trigger either. So: refuse DELETE on tables carrying
|
|
185
|
-
// any HNSW index, with a message that points at the workaround
|
|
186
|
-
// (DROP the index, DELETE, recreate).
|
|
187
|
-
{
|
|
188
|
-
let table = db.get_table(table_name.clone()).map_err(|_| {
|
|
189
|
-
SQLRiteError::General(format!("DELETE references unknown table '{table_name}'"))
|
|
190
|
-
})?;
|
|
191
|
-
if !table.hnsw_indexes.is_empty() {
|
|
192
|
-
let names: Vec<&str> = table.hnsw_indexes.iter().map(|e| e.name.as_str()).collect();
|
|
193
|
-
return Err(SQLRiteError::NotImplemented(format!(
|
|
194
|
-
"DELETE on tables with HNSW indexes is not supported yet \
|
|
195
|
-
(Phase 7d.3 follow-up). DROP the index first, then DELETE, then re-CREATE. \
|
|
196
|
-
Table '{table_name}' currently has: {names:?}"
|
|
197
|
-
)));
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
|
|
201
181
|
// Compute matching rowids with an immutable borrow, then mutate.
|
|
202
182
|
let matching: Vec<i64> = {
|
|
203
183
|
let table = db
|
|
@@ -224,6 +204,15 @@ pub fn execute_delete(stmt: &Statement, db: &mut Database) -> Result<usize> {
|
|
|
224
204
|
for rowid in &matching {
|
|
225
205
|
table.delete_row(*rowid);
|
|
226
206
|
}
|
|
207
|
+
// Phase 7d.3 — any DELETE invalidates every HNSW index on this
|
|
208
|
+
// table (the deleted node could still appear in other nodes'
|
|
209
|
+
// neighbor lists, breaking subsequent searches). Mark dirty so
|
|
210
|
+
// the next save rebuilds from current rows before serializing.
|
|
211
|
+
if !matching.is_empty() {
|
|
212
|
+
for entry in &mut table.hnsw_indexes {
|
|
213
|
+
entry.needs_rebuild = true;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
227
216
|
Ok(matching.len())
|
|
228
217
|
}
|
|
229
218
|
|
|
@@ -250,25 +239,6 @@ pub fn execute_update(stmt: &Statement, db: &mut Database) -> Result<usize> {
|
|
|
250
239
|
|
|
251
240
|
let table_name = extract_table_name(table)?;
|
|
252
241
|
|
|
253
|
-
// Phase 7d.2 limitation (same shape as DELETE above): we have no
|
|
254
|
-
// in-place UPDATE-an-HNSW-node primitive. UPDATE on a column NOT
|
|
255
|
-
// covered by HNSW is fine in principle, but the simplest MVP is
|
|
256
|
-
// refuse-everything-when-HNSW-is-present. Re-evaluate in 7d.3 once
|
|
257
|
-
// persistence + rebuild is in.
|
|
258
|
-
{
|
|
259
|
-
let tbl = db.get_table(table_name.clone()).map_err(|_| {
|
|
260
|
-
SQLRiteError::General(format!("UPDATE references unknown table '{table_name}'"))
|
|
261
|
-
})?;
|
|
262
|
-
if !tbl.hnsw_indexes.is_empty() {
|
|
263
|
-
let names: Vec<&str> = tbl.hnsw_indexes.iter().map(|e| e.name.as_str()).collect();
|
|
264
|
-
return Err(SQLRiteError::NotImplemented(format!(
|
|
265
|
-
"UPDATE on tables with HNSW indexes is not supported yet \
|
|
266
|
-
(Phase 7d.3 follow-up). DROP the index first if you need to mutate. \
|
|
267
|
-
Table '{table_name}' currently has: {names:?}"
|
|
268
|
-
)));
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
242
|
// Resolve assignment targets to plain column names and verify they exist.
|
|
273
243
|
let mut parsed_assignments: Vec<(String, Expr)> = Vec::with_capacity(assignments.len());
|
|
274
244
|
{
|
|
@@ -337,6 +307,24 @@ pub fn execute_update(stmt: &Statement, db: &mut Database) -> Result<usize> {
|
|
|
337
307
|
tbl.set_value(col, *rowid, v.clone())?;
|
|
338
308
|
}
|
|
339
309
|
}
|
|
310
|
+
|
|
311
|
+
// Phase 7d.3 — UPDATE may have changed a vector column that an
|
|
312
|
+
// HNSW index covers. Mark every covering index dirty so save
|
|
313
|
+
// rebuilds from current rows. (Updates that only touched
|
|
314
|
+
// non-vector columns also mark dirty, which is over-conservative
|
|
315
|
+
// but harmless — the rebuild walks rows anyway, and the cost is
|
|
316
|
+
// only paid on save.)
|
|
317
|
+
if !work.is_empty() {
|
|
318
|
+
let updated_columns: std::collections::HashSet<&str> = work
|
|
319
|
+
.iter()
|
|
320
|
+
.flat_map(|(_, values)| values.iter().map(|(c, _)| c.as_str()))
|
|
321
|
+
.collect();
|
|
322
|
+
for entry in &mut tbl.hnsw_indexes {
|
|
323
|
+
if updated_columns.contains(entry.column_name.as_str()) {
|
|
324
|
+
entry.needs_rebuild = true;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
}
|
|
340
328
|
Ok(work.len())
|
|
341
329
|
}
|
|
342
330
|
|
|
@@ -609,6 +597,8 @@ fn create_hnsw_index(
|
|
|
609
597
|
name: index_name.to_string(),
|
|
610
598
|
column_name: column_name.to_string(),
|
|
611
599
|
index: idx,
|
|
600
|
+
// Freshly built — no DELETE/UPDATE has invalidated it yet.
|
|
601
|
+
needs_rebuild: false,
|
|
612
602
|
});
|
|
613
603
|
Ok(index_name.to_string())
|
|
614
604
|
}
|
|
@@ -202,6 +202,50 @@ impl HnswIndex {
|
|
|
202
202
|
self.nodes.len()
|
|
203
203
|
}
|
|
204
204
|
|
|
205
|
+
/// Phase 7d.3 — produces (node_id, layers) pairs in ascending node_id
|
|
206
|
+
/// order, suitable for serializing the graph to disk via the
|
|
207
|
+
/// `HnswNodeCell` wire format. The graph's metadata
|
|
208
|
+
/// (entry_point + top_layer) is recoverable from the nodes alone:
|
|
209
|
+
/// top_layer = max(max_layer); entry_point = any node at top_layer.
|
|
210
|
+
/// So we don't ship a separate metadata cell.
|
|
211
|
+
pub fn serialize_nodes(&self) -> Vec<(i64, Vec<Vec<i64>>)> {
|
|
212
|
+
let mut out: Vec<(i64, Vec<Vec<i64>>)> = self
|
|
213
|
+
.nodes
|
|
214
|
+
.iter()
|
|
215
|
+
.map(|(id, n)| (*id, n.layers.clone()))
|
|
216
|
+
.collect();
|
|
217
|
+
out.sort_by_key(|(id, _)| *id);
|
|
218
|
+
out
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/// Phase 7d.3 — rebuilds an HnswIndex from a stream of (node_id, layers)
|
|
222
|
+
/// pairs as produced by `serialize_nodes` and round-tripped through
|
|
223
|
+
/// `HnswNodeCell` encode/decode. The rebuilt index has the same nodes,
|
|
224
|
+
/// same neighbor lists, same entry_point + top_layer as the source.
|
|
225
|
+
/// `seed` is fresh; the deserialized index is never inserted into via
|
|
226
|
+
/// the algorithmic `insert` path so the seed only matters if a caller
|
|
227
|
+
/// later calls `insert` after deserializing (then it controls layer
|
|
228
|
+
/// assignment for the appended node).
|
|
229
|
+
pub fn from_persisted_nodes<I>(distance: DistanceMetric, seed: u64, nodes: I) -> Self
|
|
230
|
+
where
|
|
231
|
+
I: IntoIterator<Item = (i64, Vec<Vec<i64>>)>,
|
|
232
|
+
{
|
|
233
|
+
let mut idx = Self::new(distance, seed);
|
|
234
|
+
let mut top_layer = 0usize;
|
|
235
|
+
let mut entry_point: Option<i64> = None;
|
|
236
|
+
for (id, layers) in nodes {
|
|
237
|
+
let max_layer = layers.len().saturating_sub(1);
|
|
238
|
+
if max_layer > top_layer || entry_point.is_none() {
|
|
239
|
+
top_layer = max_layer;
|
|
240
|
+
entry_point = Some(id);
|
|
241
|
+
}
|
|
242
|
+
idx.nodes.insert(id, Node { layers });
|
|
243
|
+
}
|
|
244
|
+
idx.top_layer = top_layer;
|
|
245
|
+
idx.entry_point = entry_point;
|
|
246
|
+
idx
|
|
247
|
+
}
|
|
248
|
+
|
|
205
249
|
/// Inserts a node into the graph. The node id must be unique;
|
|
206
250
|
/// re-inserting an existing id is a no-op (returns without error).
|
|
207
251
|
/// `vec` is the new node's vector; `get_vec` looks up the vector
|
|
@@ -1377,28 +1377,39 @@ mod tests {
|
|
|
1377
1377
|
assert!(resp.contains("1 row returned"), "got: {resp}");
|
|
1378
1378
|
}
|
|
1379
1379
|
|
|
1380
|
+
// Phase 7d.3 — DELETE / UPDATE on HNSW-indexed tables now works.
|
|
1381
|
+
// The 7d.2 versions of these tests asserted a refusal; replaced
|
|
1382
|
+
// with assertions that the operation succeeds + the index entry's
|
|
1383
|
+
// needs_rebuild flag flipped so the next save will rebuild.
|
|
1384
|
+
|
|
1380
1385
|
#[test]
|
|
1381
|
-
fn
|
|
1386
|
+
fn delete_on_hnsw_indexed_table_succeeds_and_marks_dirty() {
|
|
1382
1387
|
let mut db = seed_hnsw_table();
|
|
1383
1388
|
process_command("CREATE INDEX ix_e ON docs USING hnsw (e);", &mut db).unwrap();
|
|
1384
|
-
let
|
|
1385
|
-
|
|
1389
|
+
let resp = process_command("DELETE FROM docs WHERE id = 1;", &mut db).unwrap();
|
|
1390
|
+
assert!(resp.contains("1 row"), "expected 1 row deleted: {resp}");
|
|
1391
|
+
|
|
1392
|
+
let docs = db.get_table("docs".to_string()).unwrap();
|
|
1393
|
+
let entry = docs.hnsw_indexes.iter().find(|e| e.name == "ix_e").unwrap();
|
|
1386
1394
|
assert!(
|
|
1387
|
-
|
|
1388
|
-
"
|
|
1395
|
+
entry.needs_rebuild,
|
|
1396
|
+
"DELETE should have marked HNSW index dirty for rebuild on next save"
|
|
1389
1397
|
);
|
|
1390
1398
|
}
|
|
1391
1399
|
|
|
1392
1400
|
#[test]
|
|
1393
|
-
fn
|
|
1401
|
+
fn update_on_hnsw_indexed_vector_col_succeeds_and_marks_dirty() {
|
|
1394
1402
|
let mut db = seed_hnsw_table();
|
|
1395
1403
|
process_command("CREATE INDEX ix_e ON docs USING hnsw (e);", &mut db).unwrap();
|
|
1396
|
-
let
|
|
1397
|
-
process_command("UPDATE docs SET e = [9.0, 9.0] WHERE id = 1;", &mut db).
|
|
1398
|
-
|
|
1404
|
+
let resp =
|
|
1405
|
+
process_command("UPDATE docs SET e = [9.0, 9.0] WHERE id = 1;", &mut db).unwrap();
|
|
1406
|
+
assert!(resp.contains("1 row"), "expected 1 row updated: {resp}");
|
|
1407
|
+
|
|
1408
|
+
let docs = db.get_table("docs".to_string()).unwrap();
|
|
1409
|
+
let entry = docs.hnsw_indexes.iter().find(|e| e.name == "ix_e").unwrap();
|
|
1399
1410
|
assert!(
|
|
1400
|
-
|
|
1401
|
-
"
|
|
1411
|
+
entry.needs_rebuild,
|
|
1412
|
+
"UPDATE on the vector column should have marked HNSW index dirty"
|
|
1402
1413
|
);
|
|
1403
1414
|
}
|
|
1404
1415
|
|
|
@@ -57,6 +57,21 @@ pub const KIND_LOCAL: u8 = 0x01;
|
|
|
57
57
|
pub const KIND_OVERFLOW: u8 = 0x02;
|
|
58
58
|
pub const KIND_INTERIOR: u8 = 0x03;
|
|
59
59
|
pub const KIND_INDEX: u8 = 0x04;
|
|
60
|
+
/// Phase 7d.3: a single HNSW node's per-layer neighbor lists,
|
|
61
|
+
/// serialized into one cell. Body layout (after the shared
|
|
62
|
+
/// `cell_length | kind_tag` prefix):
|
|
63
|
+
///
|
|
64
|
+
/// ```text
|
|
65
|
+
/// node_id zigzag varint the rowid this graph node represents
|
|
66
|
+
/// max_layer varint highest layer this node lives in
|
|
67
|
+
/// for each layer 0..=max_layer:
|
|
68
|
+
/// count varint number of neighbors at this layer
|
|
69
|
+
/// for each: zigzag varint neighbor node_id
|
|
70
|
+
/// ```
|
|
71
|
+
///
|
|
72
|
+
/// `peek_rowid` works uniformly on this kind because it just reads
|
|
73
|
+
/// the first varint after the kind tag — exactly the `node_id` here.
|
|
74
|
+
pub const KIND_HNSW: u8 = 0x05;
|
|
60
75
|
|
|
61
76
|
/// Value type tag stored in each non-NULL value block.
|
|
62
77
|
pub mod tag {
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
//! On-disk format for a single HNSW graph node (Phase 7d.3).
|
|
2
|
+
//!
|
|
3
|
+
//! Each cell carries one node's per-layer neighbor lists. The cells live
|
|
4
|
+
//! on `TableLeaf`-style pages identical to a regular table's data tree —
|
|
5
|
+
//! same slot directory, same sibling `next_page` chain, same interior-
|
|
6
|
+
//! page mechanics from Phase 3d. The only thing different is the per-cell
|
|
7
|
+
//! body, signaled by `KIND_HNSW`.
|
|
8
|
+
//!
|
|
9
|
+
//! Reusing the table-tree shape lets `Cell::peek_rowid` work uniformly
|
|
10
|
+
//! across all cell kinds: it skips `cell_length | kind_tag` and reads the
|
|
11
|
+
//! first varint, which is `node_id` here. So slot-directory binary
|
|
12
|
+
//! search by node_id works without HNSW-specific code in the page-level
|
|
13
|
+
//! plumbing.
|
|
14
|
+
//!
|
|
15
|
+
//! ```text
|
|
16
|
+
//! cell_length varint bytes after this field
|
|
17
|
+
//! kind_tag u8 = 0x05 (KIND_HNSW)
|
|
18
|
+
//! node_id zigzag varint the rowid this graph node represents
|
|
19
|
+
//! max_layer varint highest layer this node lives in
|
|
20
|
+
//! for layer in 0..=max_layer:
|
|
21
|
+
//! count varint number of neighbors at this layer
|
|
22
|
+
//! for each neighbor:
|
|
23
|
+
//! neighbor zigzag varint neighbor's node_id
|
|
24
|
+
//! ```
|
|
25
|
+
//!
|
|
26
|
+
//! No null bitmap — every field is always present. No type tag — every
|
|
27
|
+
//! field has a fixed type (varint or zigzag varint). The encoding is
|
|
28
|
+
//! deliberately minimal because HNSW indexes can have N nodes each with
|
|
29
|
+
//! up to ~M·log(N) total neighbors, and we don't want the per-cell
|
|
30
|
+
//! overhead to dominate disk usage.
|
|
31
|
+
|
|
32
|
+
use crate::error::{Result, SQLRiteError};
|
|
33
|
+
use crate::sql::pager::cell::KIND_HNSW;
|
|
34
|
+
use crate::sql::pager::varint;
|
|
35
|
+
|
|
36
|
+
/// One HNSW node's persisted form. `layers[i]` is the list of neighbor
|
|
37
|
+
/// node_ids at layer i; the node lives at every layer 0..=layers.len()-1.
|
|
38
|
+
#[derive(Debug, Clone, PartialEq)]
|
|
39
|
+
pub struct HnswNodeCell {
|
|
40
|
+
pub node_id: i64,
|
|
41
|
+
/// `layers[0]` is the densest layer (always present); `layers.len()`
|
|
42
|
+
/// equals the node's max_layer + 1.
|
|
43
|
+
pub layers: Vec<Vec<i64>>,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
impl HnswNodeCell {
|
|
47
|
+
pub fn new(node_id: i64, layers: Vec<Vec<i64>>) -> Self {
|
|
48
|
+
Self { node_id, layers }
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/// Encodes the cell into a freshly-allocated `Vec<u8>`. The result
|
|
52
|
+
/// starts with the shared `cell_length | kind_tag` prefix and is
|
|
53
|
+
/// directly usable as a slot-directory entry on a `TableLeaf`-style
|
|
54
|
+
/// page.
|
|
55
|
+
pub fn encode(&self) -> Result<Vec<u8>> {
|
|
56
|
+
if self.layers.is_empty() {
|
|
57
|
+
return Err(SQLRiteError::Internal(format!(
|
|
58
|
+
"HNSW node {} has zero layers — every node lives at layer 0 minimum",
|
|
59
|
+
self.node_id
|
|
60
|
+
)));
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Body capacity guess: 1 (kind) + 10 (node_id) + 5 (max_layer)
|
|
64
|
+
// + per-layer overhead. Most nodes are layer-0-only so the
|
|
65
|
+
// typical body is ~1 + 10 + 1 + 1 + M·10 ≈ 175 bytes for M=16.
|
|
66
|
+
let layer_bytes = self.layers.iter().map(|l| 5 + l.len() * 10).sum::<usize>();
|
|
67
|
+
let mut body = Vec::with_capacity(1 + 10 + 5 + layer_bytes);
|
|
68
|
+
|
|
69
|
+
body.push(KIND_HNSW);
|
|
70
|
+
varint::write_i64(&mut body, self.node_id);
|
|
71
|
+
// max_layer = layers.len() - 1
|
|
72
|
+
varint::write_u64(&mut body, (self.layers.len() - 1) as u64);
|
|
73
|
+
for layer in &self.layers {
|
|
74
|
+
varint::write_u64(&mut body, layer.len() as u64);
|
|
75
|
+
for n in layer {
|
|
76
|
+
varint::write_i64(&mut body, *n);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
let mut out = Vec::with_capacity(body.len() + varint::MAX_VARINT_BYTES);
|
|
81
|
+
varint::write_u64(&mut out, body.len() as u64);
|
|
82
|
+
out.extend_from_slice(&body);
|
|
83
|
+
Ok(out)
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/// Decodes one cell starting at `pos`. Returns the cell plus the
|
|
87
|
+
/// total bytes consumed (including the leading length varint).
|
|
88
|
+
pub fn decode(buf: &[u8], pos: usize) -> Result<(HnswNodeCell, usize)> {
|
|
89
|
+
let (body_len, len_bytes) = varint::read_u64(buf, pos)?;
|
|
90
|
+
let body_start = pos + len_bytes;
|
|
91
|
+
let body_end = body_start
|
|
92
|
+
.checked_add(body_len as usize)
|
|
93
|
+
.ok_or_else(|| SQLRiteError::Internal("HNSW cell length overflow".to_string()))?;
|
|
94
|
+
if body_end > buf.len() {
|
|
95
|
+
return Err(SQLRiteError::Internal(format!(
|
|
96
|
+
"HNSW cell extends past buffer: needs {body_start}..{body_end}, have {}",
|
|
97
|
+
buf.len()
|
|
98
|
+
)));
|
|
99
|
+
}
|
|
100
|
+
let body = &buf[body_start..body_end];
|
|
101
|
+
if body.first().copied() != Some(KIND_HNSW) {
|
|
102
|
+
return Err(SQLRiteError::Internal(format!(
|
|
103
|
+
"HnswNodeCell::decode called on non-HNSW entry (kind_tag = {:#x})",
|
|
104
|
+
body.first().copied().unwrap_or(0)
|
|
105
|
+
)));
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
let mut cur = 1usize;
|
|
109
|
+
let (node_id, n) = varint::read_i64(body, cur)?;
|
|
110
|
+
cur += n;
|
|
111
|
+
let (max_layer_u64, n) = varint::read_u64(body, cur)?;
|
|
112
|
+
cur += n;
|
|
113
|
+
|
|
114
|
+
let layer_count = (max_layer_u64 as usize)
|
|
115
|
+
.checked_add(1)
|
|
116
|
+
.ok_or_else(|| SQLRiteError::Internal("HNSW max_layer overflow".to_string()))?;
|
|
117
|
+
// Sanity: max_layer is in practice ≤ ~10 for N ≤ 1B with
|
|
118
|
+
// m_l ≈ 0.36. A wildly-large value almost certainly means a
|
|
119
|
+
// corrupt cell — bail before allocating an enormous Vec.
|
|
120
|
+
if layer_count > 64 {
|
|
121
|
+
return Err(SQLRiteError::Internal(format!(
|
|
122
|
+
"HNSW node {node_id} claims max_layer {} (>= 64) — corrupt cell?",
|
|
123
|
+
layer_count - 1
|
|
124
|
+
)));
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
let mut layers = Vec::with_capacity(layer_count);
|
|
128
|
+
for _ in 0..layer_count {
|
|
129
|
+
let (count, n) = varint::read_u64(body, cur)?;
|
|
130
|
+
cur += n;
|
|
131
|
+
// Same sanity bound — a single layer's neighbor list shouldn't
|
|
132
|
+
// exceed `2 · M_max0` even after pruning bugs. 256 is a
|
|
133
|
+
// generous cap.
|
|
134
|
+
if count > 256 {
|
|
135
|
+
return Err(SQLRiteError::Internal(format!(
|
|
136
|
+
"HNSW node {node_id} layer claims {count} neighbors (>256) — corrupt cell?"
|
|
137
|
+
)));
|
|
138
|
+
}
|
|
139
|
+
let mut neighbors = Vec::with_capacity(count as usize);
|
|
140
|
+
for _ in 0..count {
|
|
141
|
+
let (id, n) = varint::read_i64(body, cur)?;
|
|
142
|
+
cur += n;
|
|
143
|
+
neighbors.push(id);
|
|
144
|
+
}
|
|
145
|
+
layers.push(neighbors);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if cur != body.len() {
|
|
149
|
+
return Err(SQLRiteError::Internal(format!(
|
|
150
|
+
"HNSW cell had {} trailing bytes",
|
|
151
|
+
body.len() - cur
|
|
152
|
+
)));
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
Ok((
|
|
156
|
+
HnswNodeCell { node_id, layers },
|
|
157
|
+
len_bytes + body_len as usize,
|
|
158
|
+
))
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
#[cfg(test)]
|
|
163
|
+
mod tests {
|
|
164
|
+
use super::*;
|
|
165
|
+
|
|
166
|
+
fn round_trip(cell: &HnswNodeCell) {
|
|
167
|
+
let bytes = cell.encode().expect("encode");
|
|
168
|
+
let (decoded, consumed) = HnswNodeCell::decode(&bytes, 0).expect("decode");
|
|
169
|
+
assert_eq!(
|
|
170
|
+
consumed,
|
|
171
|
+
bytes.len(),
|
|
172
|
+
"decode should consume the whole cell"
|
|
173
|
+
);
|
|
174
|
+
assert_eq!(&decoded, cell);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
#[test]
|
|
178
|
+
fn single_layer_node_round_trips() {
|
|
179
|
+
// Most common case: a layer-0-only node with a handful of neighbors.
|
|
180
|
+
let cell = HnswNodeCell::new(42, vec![vec![1, 2, 3, 5, 8]]);
|
|
181
|
+
round_trip(&cell);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
#[test]
|
|
185
|
+
fn multi_layer_node_round_trips() {
|
|
186
|
+
let cell = HnswNodeCell::new(
|
|
187
|
+
17,
|
|
188
|
+
vec![
|
|
189
|
+
vec![1, 2, 3, 4, 5, 6, 7, 8], // layer 0 (densest)
|
|
190
|
+
vec![1, 3, 7], // layer 1
|
|
191
|
+
vec![3], // layer 2 (sparsest)
|
|
192
|
+
],
|
|
193
|
+
);
|
|
194
|
+
round_trip(&cell);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
#[test]
|
|
198
|
+
fn empty_neighbor_layer_round_trips() {
|
|
199
|
+
// A node can have an empty layer (e.g. if its only neighbor was
|
|
200
|
+
// pruned away). The encoding must still survive.
|
|
201
|
+
let cell = HnswNodeCell::new(5, vec![vec![1, 2], vec![]]);
|
|
202
|
+
round_trip(&cell);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
#[test]
|
|
206
|
+
fn node_id_negative_and_large() {
|
|
207
|
+
// node_id is zigzag-encoded; cover both signs.
|
|
208
|
+
round_trip(&HnswNodeCell::new(-1, vec![vec![]]));
|
|
209
|
+
round_trip(&HnswNodeCell::new(i64::MAX, vec![vec![1, 2]]));
|
|
210
|
+
round_trip(&HnswNodeCell::new(i64::MIN, vec![vec![3, 4]]));
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
#[test]
|
|
214
|
+
fn zero_layers_is_rejected_at_encode() {
|
|
215
|
+
let bad = HnswNodeCell::new(1, vec![]);
|
|
216
|
+
let err = bad.encode().unwrap_err();
|
|
217
|
+
assert!(format!("{err}").contains("zero layers"));
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
#[test]
|
|
221
|
+
fn decode_rejects_wrong_kind_tag() {
|
|
222
|
+
// Build something that looks like a cell with an arbitrary
|
|
223
|
+
// (non-HNSW) tag byte and confirm decode bails.
|
|
224
|
+
let mut bad = Vec::new();
|
|
225
|
+
varint::write_u64(&mut bad, 1); // body_len
|
|
226
|
+
bad.push(0x01); // KIND_LOCAL, not KIND_HNSW
|
|
227
|
+
let err = HnswNodeCell::decode(&bad, 0).unwrap_err();
|
|
228
|
+
assert!(format!("{err}").contains("non-HNSW entry"));
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
#[test]
|
|
232
|
+
fn decode_rejects_truncated_buffer() {
|
|
233
|
+
let cell = HnswNodeCell::new(1, vec![vec![10, 20, 30]]);
|
|
234
|
+
let bytes = cell.encode().expect("encode");
|
|
235
|
+
for chop in 1..=3 {
|
|
236
|
+
let truncated = &bytes[..bytes.len() - chop];
|
|
237
|
+
assert!(
|
|
238
|
+
HnswNodeCell::decode(truncated, 0).is_err(),
|
|
239
|
+
"expected error chopping {chop} byte(s) from end of {} byte cell",
|
|
240
|
+
bytes.len()
|
|
241
|
+
);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
#[test]
|
|
246
|
+
fn decode_rejects_implausible_max_layer() {
|
|
247
|
+
// Hand-craft a cell whose max_layer is 100 (above the 64 sanity bound).
|
|
248
|
+
let mut body = Vec::new();
|
|
249
|
+
body.push(KIND_HNSW);
|
|
250
|
+
varint::write_i64(&mut body, 0); // node_id
|
|
251
|
+
varint::write_u64(&mut body, 100); // max_layer = 100 → 101 layers
|
|
252
|
+
let mut out = Vec::new();
|
|
253
|
+
varint::write_u64(&mut out, body.len() as u64);
|
|
254
|
+
out.extend_from_slice(&body);
|
|
255
|
+
let err = HnswNodeCell::decode(&out, 0).unwrap_err();
|
|
256
|
+
assert!(format!("{err}").to_lowercase().contains("corrupt"));
|
|
257
|
+
}
|
|
258
|
+
}
|