sqlrite 0.1.14__tar.gz → 0.1.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {sqlrite-0.1.14 → sqlrite-0.1.15}/Cargo.lock +5 -5
  2. {sqlrite-0.1.14 → sqlrite-0.1.15}/Cargo.toml +1 -1
  3. {sqlrite-0.1.14 → sqlrite-0.1.15}/PKG-INFO +1 -1
  4. {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/package.json +1 -1
  5. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/phase-7-plan.md +1 -1
  6. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/roadmap.md +1 -1
  7. {sqlrite-0.1.14 → sqlrite-0.1.15}/pyproject.toml +1 -1
  8. {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/python/Cargo.toml +1 -1
  9. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/db/table.rs +5 -0
  10. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/executor.rs +29 -39
  11. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/hnsw.rs +44 -0
  12. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/mod.rs +22 -11
  13. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/cell.rs +15 -0
  14. sqlrite-0.1.15/src/sql/pager/hnsw_cell.rs +258 -0
  15. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/mod.rs +337 -27
  16. {sqlrite-0.1.14 → sqlrite-0.1.15}/.github/workflows/ci.yml +0 -0
  17. {sqlrite-0.1.14 → sqlrite-0.1.15}/.github/workflows/release-pr.yml +0 -0
  18. {sqlrite-0.1.14 → sqlrite-0.1.15}/.github/workflows/release.yml +0 -0
  19. {sqlrite-0.1.14 → sqlrite-0.1.15}/.github/workflows/rust.yml +0 -0
  20. {sqlrite-0.1.14 → sqlrite-0.1.15}/.gitignore +0 -0
  21. {sqlrite-0.1.14 → sqlrite-0.1.15}/CODE_OF_CONDUCT.md +0 -0
  22. {sqlrite-0.1.14 → sqlrite-0.1.15}/LICENSE +0 -0
  23. {sqlrite-0.1.14 → sqlrite-0.1.15}/MAINTAINERS +0 -0
  24. {sqlrite-0.1.14 → sqlrite-0.1.15}/Makefile +0 -0
  25. {sqlrite-0.1.14 → sqlrite-0.1.15}/README.md +0 -0
  26. {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/index.html +0 -0
  27. {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/package-lock.json +0 -0
  28. {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/src/App.svelte +0 -0
  29. {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/src/app.css +0 -0
  30. {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/src/main.ts +0 -0
  31. {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/src/vite-env.d.ts +0 -0
  32. {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/svelte.config.js +0 -0
  33. {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/tsconfig.json +0 -0
  34. {sqlrite-0.1.14 → sqlrite-0.1.15}/desktop/vite.config.ts +0 -0
  35. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/_index.md +0 -0
  36. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/architecture.md +0 -0
  37. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/design-decisions.md +0 -0
  38. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/desktop.md +0 -0
  39. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/embedding.md +0 -0
  40. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/file-format.md +0 -0
  41. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/getting-started.md +0 -0
  42. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/pager.md +0 -0
  43. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/release-plan.md +0 -0
  44. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/release-secrets.md +0 -0
  45. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/smoke-test.md +0 -0
  46. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/sql-engine.md +0 -0
  47. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/storage-model.md +0 -0
  48. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/supported-sql.md +0 -0
  49. {sqlrite-0.1.14 → sqlrite-0.1.15}/docs/usage.md +0 -0
  50. {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/README.md +0 -0
  51. {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/c/Makefile +0 -0
  52. {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/c/hello.c +0 -0
  53. {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/go/go.mod +0 -0
  54. {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/go/hello.go +0 -0
  55. {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/nodejs/hello.mjs +0 -0
  56. {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/python/hello.py +0 -0
  57. {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/rust/quickstart.rs +0 -0
  58. {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/wasm/Makefile +0 -0
  59. {sqlrite-0.1.14 → sqlrite-0.1.15}/examples/wasm/index.html +0 -0
  60. {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite - Desktop.png +0 -0
  61. {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite Data Structures.png +0 -0
  62. {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite Simple SQL Execution High Level Diagram.png +0 -0
  63. {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite Simple SQL INSERT Execution High Level Diagram (Insert Row).png +0 -0
  64. {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite Simple SQL INSERT Execution High Level Diagram.png +0 -0
  65. {sqlrite-0.1.14 → sqlrite-0.1.15}/images/SQLRite_logo.png +0 -0
  66. {sqlrite-0.1.14 → sqlrite-0.1.15}/images/architecture.png +0 -0
  67. {sqlrite-0.1.14 → sqlrite-0.1.15}/rust-toolchain.toml +0 -0
  68. {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/AST.delete.example +0 -0
  69. {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/AST.insert.exemple +0 -0
  70. {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/AST.select.example +0 -0
  71. {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/AST.update.example +0 -0
  72. {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/CREATE TABLE sqlrite_schema.sql +0 -0
  73. {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/CREATE_TABLE with duplicate.sql +0 -0
  74. {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/CREATE_TABLE.sql +0 -0
  75. {sqlrite-0.1.14 → sqlrite-0.1.15}/samples/INSERT.sql +0 -0
  76. {sqlrite-0.1.14 → sqlrite-0.1.15}/scripts/bump-version.sh +0 -0
  77. {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/README.md +0 -0
  78. {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/conn.go +0 -0
  79. {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/go.mod +0 -0
  80. {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/rows.go +0 -0
  81. {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/sqlrite.go +0 -0
  82. {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/sqlrite_test.go +0 -0
  83. {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/go/stmt.go +0 -0
  84. {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/python/README.md +0 -0
  85. {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/python/src/lib.rs +0 -0
  86. {sqlrite-0.1.14 → sqlrite-0.1.15}/sdk/python/tests/test_sqlrite.py +0 -0
  87. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/connection.rs +0 -0
  88. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/error.rs +0 -0
  89. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/lib.rs +0 -0
  90. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/main.rs +0 -0
  91. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/meta_command/mod.rs +0 -0
  92. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/repl/mod.rs +0 -0
  93. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/db/database.rs +0 -0
  94. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/db/mod.rs +0 -0
  95. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/db/secondary_index.rs +0 -0
  96. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/file.rs +0 -0
  97. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/header.rs +0 -0
  98. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/index_cell.rs +0 -0
  99. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/interior_page.rs +0 -0
  100. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/overflow.rs +0 -0
  101. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/page.rs +0 -0
  102. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/pager.rs +0 -0
  103. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/table_page.rs +0 -0
  104. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/varint.rs +0 -0
  105. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/pager/wal.rs +0 -0
  106. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/parser/create.rs +0 -0
  107. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/parser/insert.rs +0 -0
  108. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/parser/mod.rs +0 -0
  109. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/parser/select.rs +0 -0
  110. {sqlrite-0.1.14 → sqlrite-0.1.15}/src/sql/tokenizer.rs +0 -0
@@ -3736,7 +3736,7 @@ dependencies = [
3736
3736
 
3737
3737
  [[package]]
3738
3738
  name = "sqlrite-desktop"
3739
- version = "0.1.14"
3739
+ version = "0.1.15"
3740
3740
  dependencies = [
3741
3741
  "serde",
3742
3742
  "serde_json",
@@ -3748,7 +3748,7 @@ dependencies = [
3748
3748
 
3749
3749
  [[package]]
3750
3750
  name = "sqlrite-engine"
3751
- version = "0.1.14"
3751
+ version = "0.1.15"
3752
3752
  dependencies = [
3753
3753
  "clap",
3754
3754
  "env_logger",
@@ -3763,7 +3763,7 @@ dependencies = [
3763
3763
 
3764
3764
  [[package]]
3765
3765
  name = "sqlrite-ffi"
3766
- version = "0.1.14"
3766
+ version = "0.1.15"
3767
3767
  dependencies = [
3768
3768
  "cbindgen",
3769
3769
  "sqlrite-engine",
@@ -3771,7 +3771,7 @@ dependencies = [
3771
3771
 
3772
3772
  [[package]]
3773
3773
  name = "sqlrite-nodejs"
3774
- version = "0.1.14"
3774
+ version = "0.1.15"
3775
3775
  dependencies = [
3776
3776
  "napi",
3777
3777
  "napi-build",
@@ -3781,7 +3781,7 @@ dependencies = [
3781
3781
 
3782
3782
  [[package]]
3783
3783
  name = "sqlrite-python"
3784
- version = "0.1.14"
3784
+ version = "0.1.15"
3785
3785
  dependencies = [
3786
3786
  "pyo3",
3787
3787
  "sqlrite-engine",
@@ -27,7 +27,7 @@ resolver = "3"
27
27
  # `package =` key so the import name stays `sqlrite` internally:
28
28
  # sqlrite = { package = "sqlrite-engine", path = "…" }
29
29
  name = "sqlrite-engine"
30
- version = "0.1.14"
30
+ version = "0.1.15"
31
31
  authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
32
32
  edition = "2024"
33
33
  rust-version = "1.85"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlrite
3
- Version: 0.1.14
3
+ Version: 0.1.15
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "sqlrite-desktop-frontend",
3
3
  "private": true,
4
- "version": "0.1.14",
4
+ "version": "0.1.15",
5
5
  "type": "module",
6
6
  "scripts": {
7
7
  "dev": "vite",
@@ -162,7 +162,7 @@ SELECT id, title FROM docs ORDER BY embedding <-> [0.1, ...] LIMIT 10;
162
162
  >
163
163
  > - **✅ 7d.1 — Pure HNSW algorithm** *(~700 LOC, shipped in v0.1.13).* `src/sql/hnsw.rs` standalone module: insert + search + layer assignment + beam search per layer + L2/cosine/dot distance dispatch. No SQL integration yet — vectors are passed in via a `get_vec` closure so the algorithm doesn't depend on table types. Tests verify recall@k ≥ 0.95 vs brute-force on randomly-generated vector sets; deterministic via a fixed RNG seed.
164
164
  > - **✅ 7d.2 — SQL integration** *(~500 LOC).* `CREATE INDEX … USING hnsw (col)` parser + engine, INSERT wiring (also calls `hnsw.insert()` incrementally), query optimizer hook (recognizes `ORDER BY vec_distance_l2(col, literal) LIMIT k` and probes the HNSW instead of full-scanning). HNSW lives in memory only at this point; the **CREATE INDEX SQL persists in `sqlrite_master` and reopen rebuilds the graph from current rows** — partial persistence ahead of 7d.3. DELETE/UPDATE on HNSW-indexed tables refused with helpful error pointing at 7d.3.
165
- > - **7d.3 — Persistence** *(~300 LOC).* Wire HNSW into the cell format: new `KIND_HNSW` cell tag, page-tree storage parallel to secondary indexes, save/reopen round-trip without rebuild. Also adds DELETE/UPDATE support since the persisted form gives us a natural rebuild trigger.
165
+ > - **✅ 7d.3 — Persistence** *(~600 LOC).* New `KIND_HNSW` cell tag and `HnswNodeCell` encoding (varint node_id + per-layer neighbor lists). Each HNSW index gets its own page tree parallel to secondary indexes. Open path loads cells directly into `HnswIndex::from_persisted_nodes` — no algorithm runs, exact bit-for-bit reproduction. Also unblocks DELETE / UPDATE on HNSW-indexed tables: those mark the index `needs_rebuild`, save rebuilds from current rows before staging. ~2× the original 300-LOC estimate because the cell encoding + tests + rebuild path together added more than expected.
166
166
  >
167
167
  > Each 7d.x ships as its own PR + release wave. The user-facing value lands at 7d.2; 7d.3 closes the persistence loop. 7d.1 is foundational but ships a tested algorithmic primitive on its own — useful as documentation of the engine's "from scratch" theme.
168
168
 
@@ -473,7 +473,7 @@ Approved sub-phases (Q1–Q10 resolved):
473
473
  - **✅ 7a — `VECTOR(N)` column type** *(v0.1.10)* — dense fixed-dimension f32 storage via the existing cell encoding; format bumped to v4. Bracket-array literal syntax `[0.1, 0.2, …]` (Q7).
474
474
  - **✅ 7b — Distance functions** *(v0.1.11)* — `vec_distance_l2/cosine/dot`, plus the ORDER BY-expressions parser change so KNN queries work end-to-end. Operators (`<->` `<=>` `<#>`) deferred to **7b.1** — sqlparser doesn't parse them natively, contradicting Q6's "tiny parser change" assumption.
475
475
  - **✅ 7c — Brute-force KNN executor optimization** — bounded `BinaryHeap` of size k for `ORDER BY <expr> LIMIT k`. ~1.8× faster than full-sort at N=10k for cheap keys; bigger gains on expensive keys like `vec_distance_l2`.
476
- - **7d — HNSW ANN index** — split into 7d.1 ( algorithm), 7d.2 (SQL integration), 7d.3 (persistence). `CREATE INDEX … USING hnsw (col)`; fixed defaults `M=16, ef_construction=200, ef_search=50` (Q2).
476
+ - **✅ 7d — HNSW ANN index** — three PRs: 7d.1 (algorithm w/ recall@10 ≥ 0.95), 7d.2 (SQL integration + query optimizer), 7d.3 (persistence + DELETE/UPDATE rebuild). `CREATE INDEX … USING hnsw (col)`; fixed defaults `M=16, ef_construction=200, ef_search=50` (Q2). New `KIND_HNSW` cell tag.
477
477
  - **7e — JSON column type + path queries** — `JSON` data type stored as bincoded `serde_json::Value` (Q3); `json_extract` / `json_array_length` / `json_object_keys` / `json_type`.
478
478
  - **7f — ~~Full-text search with BM25~~** — **deferred to Phase 8** (Q1).
479
479
  - **7g — `ask()` API across the product surface** — natural-language → SQL via Anthropic API (Q4), Anthropic-first then OpenAI + Ollama follow-ups. Foundational 7g.1 introduces a new `sqlrite-ask` crate (Q10 — separate crate, not a feature flag). Thin per-product adapters in 7g.2-7g.8 cover REPL, desktop, Python, Node.js, Go, WASM (JS-callback shape per Q9), and the MCP `ask` tool.
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "sqlrite"
7
- version = "0.1.14"
7
+ version = "0.1.15"
8
8
  description = "Python bindings for SQLRite — a small, embeddable SQLite clone written in Rust."
9
9
  authors = [{ name = "Joao Henrique Machado Silva", email = "joaoh82@gmail.com" }]
10
10
  license = { text = "MIT" }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "sqlrite-python"
3
- version = "0.1.14"
3
+ version = "0.1.15"
4
4
  authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
5
5
  edition = "2024"
6
6
  rust-version = "1.85"
@@ -143,6 +143,11 @@ pub struct HnswIndexEntry {
143
143
  pub column_name: String,
144
144
  /// The graph itself.
145
145
  pub index: HnswIndex,
146
+ /// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
147
+ /// invalidated the graph since the last rebuild. INSERT maintains
148
+ /// the graph incrementally and leaves this false. The next save
149
+ /// rebuilds dirty indexes from current rows before serializing.
150
+ pub needs_rebuild: bool,
146
151
  }
147
152
 
148
153
  impl Table {
@@ -178,26 +178,6 @@ pub fn execute_delete(stmt: &Statement, db: &mut Database) -> Result<usize> {
178
178
  };
179
179
  let table_name = extract_single_table_name(tables)?;
180
180
 
181
- // Phase 7d.2 limitation: HNSW lacks an in-place delete-node operation.
182
- // True deletion needs either soft-delete + tombstones or a graph rebuild
183
- // — both nontrivial. Until 7d.3 lands persistence we don't have a
184
- // natural rebuild trigger either. So: refuse DELETE on tables carrying
185
- // any HNSW index, with a message that points at the workaround
186
- // (DROP the index, DELETE, recreate).
187
- {
188
- let table = db.get_table(table_name.clone()).map_err(|_| {
189
- SQLRiteError::General(format!("DELETE references unknown table '{table_name}'"))
190
- })?;
191
- if !table.hnsw_indexes.is_empty() {
192
- let names: Vec<&str> = table.hnsw_indexes.iter().map(|e| e.name.as_str()).collect();
193
- return Err(SQLRiteError::NotImplemented(format!(
194
- "DELETE on tables with HNSW indexes is not supported yet \
195
- (Phase 7d.3 follow-up). DROP the index first, then DELETE, then re-CREATE. \
196
- Table '{table_name}' currently has: {names:?}"
197
- )));
198
- }
199
- }
200
-
201
181
  // Compute matching rowids with an immutable borrow, then mutate.
202
182
  let matching: Vec<i64> = {
203
183
  let table = db
@@ -224,6 +204,15 @@ pub fn execute_delete(stmt: &Statement, db: &mut Database) -> Result<usize> {
224
204
  for rowid in &matching {
225
205
  table.delete_row(*rowid);
226
206
  }
207
+ // Phase 7d.3 — any DELETE invalidates every HNSW index on this
208
+ // table (the deleted node could still appear in other nodes'
209
+ // neighbor lists, breaking subsequent searches). Mark dirty so
210
+ // the next save rebuilds from current rows before serializing.
211
+ if !matching.is_empty() {
212
+ for entry in &mut table.hnsw_indexes {
213
+ entry.needs_rebuild = true;
214
+ }
215
+ }
227
216
  Ok(matching.len())
228
217
  }
229
218
 
@@ -250,25 +239,6 @@ pub fn execute_update(stmt: &Statement, db: &mut Database) -> Result<usize> {
250
239
 
251
240
  let table_name = extract_table_name(table)?;
252
241
 
253
- // Phase 7d.2 limitation (same shape as DELETE above): we have no
254
- // in-place UPDATE-an-HNSW-node primitive. UPDATE on a column NOT
255
- // covered by HNSW is fine in principle, but the simplest MVP is
256
- // refuse-everything-when-HNSW-is-present. Re-evaluate in 7d.3 once
257
- // persistence + rebuild is in.
258
- {
259
- let tbl = db.get_table(table_name.clone()).map_err(|_| {
260
- SQLRiteError::General(format!("UPDATE references unknown table '{table_name}'"))
261
- })?;
262
- if !tbl.hnsw_indexes.is_empty() {
263
- let names: Vec<&str> = tbl.hnsw_indexes.iter().map(|e| e.name.as_str()).collect();
264
- return Err(SQLRiteError::NotImplemented(format!(
265
- "UPDATE on tables with HNSW indexes is not supported yet \
266
- (Phase 7d.3 follow-up). DROP the index first if you need to mutate. \
267
- Table '{table_name}' currently has: {names:?}"
268
- )));
269
- }
270
- }
271
-
272
242
  // Resolve assignment targets to plain column names and verify they exist.
273
243
  let mut parsed_assignments: Vec<(String, Expr)> = Vec::with_capacity(assignments.len());
274
244
  {
@@ -337,6 +307,24 @@ pub fn execute_update(stmt: &Statement, db: &mut Database) -> Result<usize> {
337
307
  tbl.set_value(col, *rowid, v.clone())?;
338
308
  }
339
309
  }
310
+
311
+ // Phase 7d.3 — UPDATE may have changed a vector column that an
312
+ // HNSW index covers. Mark every covering index dirty so save
313
+ // rebuilds from current rows. (Updates that only touched
314
+ // non-vector columns also mark dirty, which is over-conservative
315
+ // but harmless — the rebuild walks rows anyway, and the cost is
316
+ // only paid on save.)
317
+ if !work.is_empty() {
318
+ let updated_columns: std::collections::HashSet<&str> = work
319
+ .iter()
320
+ .flat_map(|(_, values)| values.iter().map(|(c, _)| c.as_str()))
321
+ .collect();
322
+ for entry in &mut tbl.hnsw_indexes {
323
+ if updated_columns.contains(entry.column_name.as_str()) {
324
+ entry.needs_rebuild = true;
325
+ }
326
+ }
327
+ }
340
328
  Ok(work.len())
341
329
  }
342
330
 
@@ -609,6 +597,8 @@ fn create_hnsw_index(
609
597
  name: index_name.to_string(),
610
598
  column_name: column_name.to_string(),
611
599
  index: idx,
600
+ // Freshly built — no DELETE/UPDATE has invalidated it yet.
601
+ needs_rebuild: false,
612
602
  });
613
603
  Ok(index_name.to_string())
614
604
  }
@@ -202,6 +202,50 @@ impl HnswIndex {
202
202
  self.nodes.len()
203
203
  }
204
204
 
205
+ /// Phase 7d.3 — produces (node_id, layers) pairs in ascending node_id
206
+ /// order, suitable for serializing the graph to disk via the
207
+ /// `HnswNodeCell` wire format. The graph's metadata
208
+ /// (entry_point + top_layer) is recoverable from the nodes alone:
209
+ /// top_layer = max(max_layer); entry_point = any node at top_layer.
210
+ /// So we don't ship a separate metadata cell.
211
+ pub fn serialize_nodes(&self) -> Vec<(i64, Vec<Vec<i64>>)> {
212
+ let mut out: Vec<(i64, Vec<Vec<i64>>)> = self
213
+ .nodes
214
+ .iter()
215
+ .map(|(id, n)| (*id, n.layers.clone()))
216
+ .collect();
217
+ out.sort_by_key(|(id, _)| *id);
218
+ out
219
+ }
220
+
221
+ /// Phase 7d.3 — rebuilds an HnswIndex from a stream of (node_id, layers)
222
+ /// pairs as produced by `serialize_nodes` and round-tripped through
223
+ /// `HnswNodeCell` encode/decode. The rebuilt index has the same nodes,
224
+ /// same neighbor lists, same entry_point + top_layer as the source.
225
+ /// `seed` is fresh; the deserialized index is never inserted into via
226
+ /// the algorithmic `insert` path so the seed only matters if a caller
227
+ /// later calls `insert` after deserializing (then it controls layer
228
+ /// assignment for the appended node).
229
+ pub fn from_persisted_nodes<I>(distance: DistanceMetric, seed: u64, nodes: I) -> Self
230
+ where
231
+ I: IntoIterator<Item = (i64, Vec<Vec<i64>>)>,
232
+ {
233
+ let mut idx = Self::new(distance, seed);
234
+ let mut top_layer = 0usize;
235
+ let mut entry_point: Option<i64> = None;
236
+ for (id, layers) in nodes {
237
+ let max_layer = layers.len().saturating_sub(1);
238
+ if max_layer > top_layer || entry_point.is_none() {
239
+ top_layer = max_layer;
240
+ entry_point = Some(id);
241
+ }
242
+ idx.nodes.insert(id, Node { layers });
243
+ }
244
+ idx.top_layer = top_layer;
245
+ idx.entry_point = entry_point;
246
+ idx
247
+ }
248
+
205
249
  /// Inserts a node into the graph. The node id must be unique;
206
250
  /// re-inserting an existing id is a no-op (returns without error).
207
251
  /// `vec` is the new node's vector; `get_vec` looks up the vector
@@ -1377,28 +1377,39 @@ mod tests {
1377
1377
  assert!(resp.contains("1 row returned"), "got: {resp}");
1378
1378
  }
1379
1379
 
1380
+ // Phase 7d.3 — DELETE / UPDATE on HNSW-indexed tables now works.
1381
+ // The 7d.2 versions of these tests asserted a refusal; replaced
1382
+ // with assertions that the operation succeeds + the index entry's
1383
+ // needs_rebuild flag flipped so the next save will rebuild.
1384
+
1380
1385
  #[test]
1381
- fn delete_on_hnsw_indexed_table_errors_with_helpful_message() {
1386
+ fn delete_on_hnsw_indexed_table_succeeds_and_marks_dirty() {
1382
1387
  let mut db = seed_hnsw_table();
1383
1388
  process_command("CREATE INDEX ix_e ON docs USING hnsw (e);", &mut db).unwrap();
1384
- let err = process_command("DELETE FROM docs WHERE id = 1;", &mut db).unwrap_err();
1385
- let msg = format!("{err}");
1389
+ let resp = process_command("DELETE FROM docs WHERE id = 1;", &mut db).unwrap();
1390
+ assert!(resp.contains("1 row"), "expected 1 row deleted: {resp}");
1391
+
1392
+ let docs = db.get_table("docs".to_string()).unwrap();
1393
+ let entry = docs.hnsw_indexes.iter().find(|e| e.name == "ix_e").unwrap();
1386
1394
  assert!(
1387
- msg.to_lowercase().contains("hnsw") && msg.contains("ix_e"),
1388
- "expected error mentioning HNSW + index name; got: {msg}"
1395
+ entry.needs_rebuild,
1396
+ "DELETE should have marked HNSW index dirty for rebuild on next save"
1389
1397
  );
1390
1398
  }
1391
1399
 
1392
1400
  #[test]
1393
- fn update_on_hnsw_indexed_table_errors_with_helpful_message() {
1401
+ fn update_on_hnsw_indexed_vector_col_succeeds_and_marks_dirty() {
1394
1402
  let mut db = seed_hnsw_table();
1395
1403
  process_command("CREATE INDEX ix_e ON docs USING hnsw (e);", &mut db).unwrap();
1396
- let err =
1397
- process_command("UPDATE docs SET e = [9.0, 9.0] WHERE id = 1;", &mut db).unwrap_err();
1398
- let msg = format!("{err}");
1404
+ let resp =
1405
+ process_command("UPDATE docs SET e = [9.0, 9.0] WHERE id = 1;", &mut db).unwrap();
1406
+ assert!(resp.contains("1 row"), "expected 1 row updated: {resp}");
1407
+
1408
+ let docs = db.get_table("docs".to_string()).unwrap();
1409
+ let entry = docs.hnsw_indexes.iter().find(|e| e.name == "ix_e").unwrap();
1399
1410
  assert!(
1400
- msg.to_lowercase().contains("hnsw"),
1401
- "expected error mentioning HNSW; got: {msg}"
1411
+ entry.needs_rebuild,
1412
+ "UPDATE on the vector column should have marked HNSW index dirty"
1402
1413
  );
1403
1414
  }
1404
1415
 
@@ -57,6 +57,21 @@ pub const KIND_LOCAL: u8 = 0x01;
57
57
  pub const KIND_OVERFLOW: u8 = 0x02;
58
58
  pub const KIND_INTERIOR: u8 = 0x03;
59
59
  pub const KIND_INDEX: u8 = 0x04;
60
+ /// Phase 7d.3: a single HNSW node's per-layer neighbor lists,
61
+ /// serialized into one cell. Body layout (after the shared
62
+ /// `cell_length | kind_tag` prefix):
63
+ ///
64
+ /// ```text
65
+ /// node_id zigzag varint the rowid this graph node represents
66
+ /// max_layer varint highest layer this node lives in
67
+ /// for each layer 0..=max_layer:
68
+ /// count varint number of neighbors at this layer
69
+ /// for each: zigzag varint neighbor node_id
70
+ /// ```
71
+ ///
72
+ /// `peek_rowid` works uniformly on this kind because it just reads
73
+ /// the first varint after the kind tag — exactly the `node_id` here.
74
+ pub const KIND_HNSW: u8 = 0x05;
60
75
 
61
76
  /// Value type tag stored in each non-NULL value block.
62
77
  pub mod tag {
@@ -0,0 +1,258 @@
1
+ //! On-disk format for a single HNSW graph node (Phase 7d.3).
2
+ //!
3
+ //! Each cell carries one node's per-layer neighbor lists. The cells live
4
+ //! on `TableLeaf`-style pages identical to a regular table's data tree —
5
+ //! same slot directory, same sibling `next_page` chain, same interior-
6
+ //! page mechanics from Phase 3d. The only thing different is the per-cell
7
+ //! body, signaled by `KIND_HNSW`.
8
+ //!
9
+ //! Reusing the table-tree shape lets `Cell::peek_rowid` work uniformly
10
+ //! across all cell kinds: it skips `cell_length | kind_tag` and reads the
11
+ //! first varint, which is `node_id` here. So slot-directory binary
12
+ //! search by node_id works without HNSW-specific code in the page-level
13
+ //! plumbing.
14
+ //!
15
+ //! ```text
16
+ //! cell_length varint bytes after this field
17
+ //! kind_tag u8 = 0x05 (KIND_HNSW)
18
+ //! node_id zigzag varint the rowid this graph node represents
19
+ //! max_layer varint highest layer this node lives in
20
+ //! for layer in 0..=max_layer:
21
+ //! count varint number of neighbors at this layer
22
+ //! for each neighbor:
23
+ //! neighbor zigzag varint neighbor's node_id
24
+ //! ```
25
+ //!
26
+ //! No null bitmap — every field is always present. No type tag — every
27
+ //! field has a fixed type (varint or zigzag varint). The encoding is
28
+ //! deliberately minimal because HNSW indexes can have N nodes each with
29
+ //! up to ~M·log(N) total neighbors, and we don't want the per-cell
30
+ //! overhead to dominate disk usage.
31
+
32
+ use crate::error::{Result, SQLRiteError};
33
+ use crate::sql::pager::cell::KIND_HNSW;
34
+ use crate::sql::pager::varint;
35
+
36
+ /// One HNSW node's persisted form. `layers[i]` is the list of neighbor
37
+ /// node_ids at layer i; the node lives at every layer 0..=layers.len()-1.
38
+ #[derive(Debug, Clone, PartialEq)]
39
+ pub struct HnswNodeCell {
40
+ pub node_id: i64,
41
+ /// `layers[0]` is the densest layer (always present); `layers.len()`
42
+ /// equals the node's max_layer + 1.
43
+ pub layers: Vec<Vec<i64>>,
44
+ }
45
+
46
+ impl HnswNodeCell {
47
+ pub fn new(node_id: i64, layers: Vec<Vec<i64>>) -> Self {
48
+ Self { node_id, layers }
49
+ }
50
+
51
+ /// Encodes the cell into a freshly-allocated `Vec<u8>`. The result
52
+ /// starts with the shared `cell_length | kind_tag` prefix and is
53
+ /// directly usable as a slot-directory entry on a `TableLeaf`-style
54
+ /// page.
55
+ pub fn encode(&self) -> Result<Vec<u8>> {
56
+ if self.layers.is_empty() {
57
+ return Err(SQLRiteError::Internal(format!(
58
+ "HNSW node {} has zero layers — every node lives at layer 0 minimum",
59
+ self.node_id
60
+ )));
61
+ }
62
+
63
+ // Body capacity guess: 1 (kind) + 10 (node_id) + 5 (max_layer)
64
+ // + per-layer overhead. Most nodes are layer-0-only so the
65
+ // typical body is ~1 + 10 + 1 + 1 + M·10 ≈ 175 bytes for M=16.
66
+ let layer_bytes = self.layers.iter().map(|l| 5 + l.len() * 10).sum::<usize>();
67
+ let mut body = Vec::with_capacity(1 + 10 + 5 + layer_bytes);
68
+
69
+ body.push(KIND_HNSW);
70
+ varint::write_i64(&mut body, self.node_id);
71
+ // max_layer = layers.len() - 1
72
+ varint::write_u64(&mut body, (self.layers.len() - 1) as u64);
73
+ for layer in &self.layers {
74
+ varint::write_u64(&mut body, layer.len() as u64);
75
+ for n in layer {
76
+ varint::write_i64(&mut body, *n);
77
+ }
78
+ }
79
+
80
+ let mut out = Vec::with_capacity(body.len() + varint::MAX_VARINT_BYTES);
81
+ varint::write_u64(&mut out, body.len() as u64);
82
+ out.extend_from_slice(&body);
83
+ Ok(out)
84
+ }
85
+
86
+ /// Decodes one cell starting at `pos`. Returns the cell plus the
87
+ /// total bytes consumed (including the leading length varint).
88
+ pub fn decode(buf: &[u8], pos: usize) -> Result<(HnswNodeCell, usize)> {
89
+ let (body_len, len_bytes) = varint::read_u64(buf, pos)?;
90
+ let body_start = pos + len_bytes;
91
+ let body_end = body_start
92
+ .checked_add(body_len as usize)
93
+ .ok_or_else(|| SQLRiteError::Internal("HNSW cell length overflow".to_string()))?;
94
+ if body_end > buf.len() {
95
+ return Err(SQLRiteError::Internal(format!(
96
+ "HNSW cell extends past buffer: needs {body_start}..{body_end}, have {}",
97
+ buf.len()
98
+ )));
99
+ }
100
+ let body = &buf[body_start..body_end];
101
+ if body.first().copied() != Some(KIND_HNSW) {
102
+ return Err(SQLRiteError::Internal(format!(
103
+ "HnswNodeCell::decode called on non-HNSW entry (kind_tag = {:#x})",
104
+ body.first().copied().unwrap_or(0)
105
+ )));
106
+ }
107
+
108
+ let mut cur = 1usize;
109
+ let (node_id, n) = varint::read_i64(body, cur)?;
110
+ cur += n;
111
+ let (max_layer_u64, n) = varint::read_u64(body, cur)?;
112
+ cur += n;
113
+
114
+ let layer_count = (max_layer_u64 as usize)
115
+ .checked_add(1)
116
+ .ok_or_else(|| SQLRiteError::Internal("HNSW max_layer overflow".to_string()))?;
117
+ // Sanity: max_layer is in practice ≤ ~10 for N ≤ 1B with
118
+ // m_l ≈ 0.36. A wildly-large value almost certainly means a
119
+ // corrupt cell — bail before allocating an enormous Vec.
120
+ if layer_count > 64 {
121
+ return Err(SQLRiteError::Internal(format!(
122
+ "HNSW node {node_id} claims max_layer {} (>= 64) — corrupt cell?",
123
+ layer_count - 1
124
+ )));
125
+ }
126
+
127
+ let mut layers = Vec::with_capacity(layer_count);
128
+ for _ in 0..layer_count {
129
+ let (count, n) = varint::read_u64(body, cur)?;
130
+ cur += n;
131
+ // Same sanity bound — a single layer's neighbor list shouldn't
132
+ // exceed `2 · M_max0` even after pruning bugs. 256 is a
133
+ // generous cap.
134
+ if count > 256 {
135
+ return Err(SQLRiteError::Internal(format!(
136
+ "HNSW node {node_id} layer claims {count} neighbors (>256) — corrupt cell?"
137
+ )));
138
+ }
139
+ let mut neighbors = Vec::with_capacity(count as usize);
140
+ for _ in 0..count {
141
+ let (id, n) = varint::read_i64(body, cur)?;
142
+ cur += n;
143
+ neighbors.push(id);
144
+ }
145
+ layers.push(neighbors);
146
+ }
147
+
148
+ if cur != body.len() {
149
+ return Err(SQLRiteError::Internal(format!(
150
+ "HNSW cell had {} trailing bytes",
151
+ body.len() - cur
152
+ )));
153
+ }
154
+
155
+ Ok((
156
+ HnswNodeCell { node_id, layers },
157
+ len_bytes + body_len as usize,
158
+ ))
159
+ }
160
+ }
161
+
162
+ #[cfg(test)]
163
+ mod tests {
164
+ use super::*;
165
+
166
+ fn round_trip(cell: &HnswNodeCell) {
167
+ let bytes = cell.encode().expect("encode");
168
+ let (decoded, consumed) = HnswNodeCell::decode(&bytes, 0).expect("decode");
169
+ assert_eq!(
170
+ consumed,
171
+ bytes.len(),
172
+ "decode should consume the whole cell"
173
+ );
174
+ assert_eq!(&decoded, cell);
175
+ }
176
+
177
+ #[test]
178
+ fn single_layer_node_round_trips() {
179
+ // Most common case: a layer-0-only node with a handful of neighbors.
180
+ let cell = HnswNodeCell::new(42, vec![vec![1, 2, 3, 5, 8]]);
181
+ round_trip(&cell);
182
+ }
183
+
184
+ #[test]
185
+ fn multi_layer_node_round_trips() {
186
+ let cell = HnswNodeCell::new(
187
+ 17,
188
+ vec![
189
+ vec![1, 2, 3, 4, 5, 6, 7, 8], // layer 0 (densest)
190
+ vec![1, 3, 7], // layer 1
191
+ vec![3], // layer 2 (sparsest)
192
+ ],
193
+ );
194
+ round_trip(&cell);
195
+ }
196
+
197
+ #[test]
198
+ fn empty_neighbor_layer_round_trips() {
199
+ // A node can have an empty layer (e.g. if its only neighbor was
200
+ // pruned away). The encoding must still survive.
201
+ let cell = HnswNodeCell::new(5, vec![vec![1, 2], vec![]]);
202
+ round_trip(&cell);
203
+ }
204
+
205
+ #[test]
206
+ fn node_id_negative_and_large() {
207
+ // node_id is zigzag-encoded; cover both signs.
208
+ round_trip(&HnswNodeCell::new(-1, vec![vec![]]));
209
+ round_trip(&HnswNodeCell::new(i64::MAX, vec![vec![1, 2]]));
210
+ round_trip(&HnswNodeCell::new(i64::MIN, vec![vec![3, 4]]));
211
+ }
212
+
213
+ #[test]
214
+ fn zero_layers_is_rejected_at_encode() {
215
+ let bad = HnswNodeCell::new(1, vec![]);
216
+ let err = bad.encode().unwrap_err();
217
+ assert!(format!("{err}").contains("zero layers"));
218
+ }
219
+
220
+ #[test]
221
+ fn decode_rejects_wrong_kind_tag() {
222
+ // Build something that looks like a cell with an arbitrary
223
+ // (non-HNSW) tag byte and confirm decode bails.
224
+ let mut bad = Vec::new();
225
+ varint::write_u64(&mut bad, 1); // body_len
226
+ bad.push(0x01); // KIND_LOCAL, not KIND_HNSW
227
+ let err = HnswNodeCell::decode(&bad, 0).unwrap_err();
228
+ assert!(format!("{err}").contains("non-HNSW entry"));
229
+ }
230
+
231
+ #[test]
232
+ fn decode_rejects_truncated_buffer() {
233
+ let cell = HnswNodeCell::new(1, vec![vec![10, 20, 30]]);
234
+ let bytes = cell.encode().expect("encode");
235
+ for chop in 1..=3 {
236
+ let truncated = &bytes[..bytes.len() - chop];
237
+ assert!(
238
+ HnswNodeCell::decode(truncated, 0).is_err(),
239
+ "expected error chopping {chop} byte(s) from end of {} byte cell",
240
+ bytes.len()
241
+ );
242
+ }
243
+ }
244
+
245
+ #[test]
246
+ fn decode_rejects_implausible_max_layer() {
247
+ // Hand-craft a cell whose max_layer is 100 (above the 64 sanity bound).
248
+ let mut body = Vec::new();
249
+ body.push(KIND_HNSW);
250
+ varint::write_i64(&mut body, 0); // node_id
251
+ varint::write_u64(&mut body, 100); // max_layer = 100 → 101 layers
252
+ let mut out = Vec::new();
253
+ varint::write_u64(&mut out, body.len() as u64);
254
+ out.extend_from_slice(&body);
255
+ let err = HnswNodeCell::decode(&out, 0).unwrap_err();
256
+ assert!(format!("{err}").to_lowercase().contains("corrupt"));
257
+ }
258
+ }