sqlrite 0.9.0__tar.gz → 0.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. {sqlrite-0.9.0 → sqlrite-0.9.1}/Cargo.lock +7 -7
  2. {sqlrite-0.9.0 → sqlrite-0.9.1}/Cargo.toml +2 -2
  3. {sqlrite-0.9.0 → sqlrite-0.9.1}/PKG-INFO +1 -1
  4. {sqlrite-0.9.0 → sqlrite-0.9.1}/README.md +2 -1
  5. {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/package.json +1 -1
  6. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/architecture.md +1 -0
  7. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/benchmarks.md +45 -21
  8. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/pager.md +1 -1
  9. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/phase-7-plan.md +3 -2
  10. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/supported-sql.md +20 -8
  11. {sqlrite-0.9.0 → sqlrite-0.9.1}/pyproject.toml +1 -1
  12. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/python/Cargo.toml +1 -1
  13. {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/Cargo.toml +1 -1
  14. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/connection.rs +154 -2
  15. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/db/database.rs +3 -3
  16. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/db/table.rs +16 -8
  17. sqlrite-0.9.1/src/sql/dialect.rs +100 -0
  18. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/executor.rs +138 -23
  19. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/hnsw.rs +34 -0
  20. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/mod.rs +15 -4
  21. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/mod.rs +307 -33
  22. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/params.rs +2 -2
  23. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/parser/create.rs +1 -1
  24. sqlrite-0.9.1/src/sql/pragma.rs +514 -0
  25. {sqlrite-0.9.0 → sqlrite-0.9.1}/.github/workflows/ci.yml +0 -0
  26. {sqlrite-0.9.0 → sqlrite-0.9.1}/.github/workflows/release-pr.yml +0 -0
  27. {sqlrite-0.9.0 → sqlrite-0.9.1}/.github/workflows/release.yml +0 -0
  28. {sqlrite-0.9.0 → sqlrite-0.9.1}/.github/workflows/rust.yml +0 -0
  29. {sqlrite-0.9.0 → sqlrite-0.9.1}/.gitignore +0 -0
  30. {sqlrite-0.9.0 → sqlrite-0.9.1}/CLAUDE.md +0 -0
  31. {sqlrite-0.9.0 → sqlrite-0.9.1}/CODE_OF_CONDUCT.md +0 -0
  32. {sqlrite-0.9.0 → sqlrite-0.9.1}/LICENSE +0 -0
  33. {sqlrite-0.9.0 → sqlrite-0.9.1}/MAINTAINERS +0 -0
  34. {sqlrite-0.9.0 → sqlrite-0.9.1}/Makefile +0 -0
  35. {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/index.html +0 -0
  36. {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/package-lock.json +0 -0
  37. {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/src/App.svelte +0 -0
  38. {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/src/app.css +0 -0
  39. {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/src/main.ts +0 -0
  40. {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/src/vite-env.d.ts +0 -0
  41. {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/svelte.config.js +0 -0
  42. {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/tsconfig.json +0 -0
  43. {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/vite.config.ts +0 -0
  44. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/_index.md +0 -0
  45. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/ask-backend-examples.md +0 -0
  46. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/ask.md +0 -0
  47. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/benchmarks-plan.md +0 -0
  48. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/concurrent-writes-plan.md +0 -0
  49. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/design-decisions.md +0 -0
  50. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/desktop.md +0 -0
  51. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/embedding.md +0 -0
  52. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/file-format.md +0 -0
  53. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/fts.md +0 -0
  54. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/getting-started.md +0 -0
  55. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/mcp.md +0 -0
  56. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/phase-8-plan.md +0 -0
  57. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/release-plan.md +0 -0
  58. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/release-secrets.md +0 -0
  59. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/roadmap.md +0 -0
  60. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/smoke-test.md +0 -0
  61. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/sql-engine.md +0 -0
  62. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/storage-model.md +0 -0
  63. {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/usage.md +0 -0
  64. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/README.md +0 -0
  65. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/c/Makefile +0 -0
  66. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/c/hello.c +0 -0
  67. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/go/go.mod +0 -0
  68. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/go/hello.go +0 -0
  69. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/hybrid-retrieval/README.md +0 -0
  70. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/hybrid-retrieval/hybrid_retrieval.rs +0 -0
  71. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/nodejs/hello.mjs +0 -0
  72. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/python/hello.py +0 -0
  73. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/rust/quickstart.rs +0 -0
  74. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/wasm/Makefile +0 -0
  75. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/wasm/index.html +0 -0
  76. {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/wasm/server.mjs +0 -0
  77. {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite - Desktop.png +0 -0
  78. {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite Data Structures.png +0 -0
  79. {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite Simple SQL Execution High Level Diagram.png +0 -0
  80. {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite Simple SQL INSERT Execution High Level Diagram (Insert Row).png +0 -0
  81. {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite Simple SQL INSERT Execution High Level Diagram.png +0 -0
  82. {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite_logo.png +0 -0
  83. {sqlrite-0.9.0 → sqlrite-0.9.1}/images/architecture.png +0 -0
  84. {sqlrite-0.9.0 → sqlrite-0.9.1}/rust-toolchain.toml +0 -0
  85. {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/AST.delete.example +0 -0
  86. {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/AST.insert.exemple +0 -0
  87. {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/AST.select.example +0 -0
  88. {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/AST.update.example +0 -0
  89. {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/CREATE TABLE sqlrite_schema.sql +0 -0
  90. {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/CREATE_TABLE with duplicate.sql +0 -0
  91. {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/CREATE_TABLE.sql +0 -0
  92. {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/INSERT.sql +0 -0
  93. {sqlrite-0.9.0 → sqlrite-0.9.1}/scripts/bump-version.sh +0 -0
  94. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/README.md +0 -0
  95. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/ask.go +0 -0
  96. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/ask_test.go +0 -0
  97. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/conn.go +0 -0
  98. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/go.mod +0 -0
  99. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/rows.go +0 -0
  100. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/sqlrite.go +0 -0
  101. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/sqlrite_test.go +0 -0
  102. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/stmt.go +0 -0
  103. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/python/README.md +0 -0
  104. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/python/src/lib.rs +0 -0
  105. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/python/tests/test_ask.py +0 -0
  106. {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/python/tests/test_sqlrite.py +0 -0
  107. {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/README.md +0 -0
  108. {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/src/lib.rs +0 -0
  109. {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/src/prompt.rs +0 -0
  110. {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/src/provider/anthropic.rs +0 -0
  111. {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/src/provider/mock.rs +0 -0
  112. {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/src/provider/mod.rs +0 -0
  113. {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/tests/anthropic_http.rs +0 -0
  114. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/ask/mod.rs +0 -0
  115. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/ask/schema.rs +0 -0
  116. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/error.rs +0 -0
  117. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/lib.rs +0 -0
  118. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/main.rs +0 -0
  119. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/meta_command/mod.rs +0 -0
  120. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/repl/mod.rs +0 -0
  121. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/agg.rs +0 -0
  122. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/db/mod.rs +0 -0
  123. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/db/secondary_index.rs +0 -0
  124. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/fts/bm25.rs +0 -0
  125. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/fts/mod.rs +0 -0
  126. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/fts/posting_list.rs +0 -0
  127. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/fts/tokenizer.rs +0 -0
  128. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/allocator.rs +0 -0
  129. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/cell.rs +0 -0
  130. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/file.rs +0 -0
  131. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/freelist.rs +0 -0
  132. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/fts_cell.rs +0 -0
  133. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/header.rs +0 -0
  134. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/hnsw_cell.rs +0 -0
  135. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/index_cell.rs +0 -0
  136. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/interior_page.rs +0 -0
  137. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/overflow.rs +0 -0
  138. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/page.rs +0 -0
  139. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/pager.rs +0 -0
  140. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/table_page.rs +0 -0
  141. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/varint.rs +0 -0
  142. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/wal.rs +0 -0
  143. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/parser/insert.rs +0 -0
  144. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/parser/mod.rs +0 -0
  145. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/parser/select.rs +0 -0
  146. {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/tokenizer.rs +0 -0
@@ -4799,7 +4799,7 @@ dependencies = [
4799
4799
 
4800
4800
  [[package]]
4801
4801
  name = "sqlrite-ask"
4802
- version = "0.9.0"
4802
+ version = "0.9.1"
4803
4803
  dependencies = [
4804
4804
  "serde",
4805
4805
  "serde_json",
@@ -4827,7 +4827,7 @@ dependencies = [
4827
4827
 
4828
4828
  [[package]]
4829
4829
  name = "sqlrite-desktop"
4830
- version = "0.9.0"
4830
+ version = "0.9.1"
4831
4831
  dependencies = [
4832
4832
  "serde",
4833
4833
  "serde_json",
@@ -4839,7 +4839,7 @@ dependencies = [
4839
4839
 
4840
4840
  [[package]]
4841
4841
  name = "sqlrite-engine"
4842
- version = "0.9.0"
4842
+ version = "0.9.1"
4843
4843
  dependencies = [
4844
4844
  "clap",
4845
4845
  "env_logger",
@@ -4856,7 +4856,7 @@ dependencies = [
4856
4856
 
4857
4857
  [[package]]
4858
4858
  name = "sqlrite-ffi"
4859
- version = "0.9.0"
4859
+ version = "0.9.1"
4860
4860
  dependencies = [
4861
4861
  "cbindgen",
4862
4862
  "serde",
@@ -4866,7 +4866,7 @@ dependencies = [
4866
4866
 
4867
4867
  [[package]]
4868
4868
  name = "sqlrite-mcp"
4869
- version = "0.9.0"
4869
+ version = "0.9.1"
4870
4870
  dependencies = [
4871
4871
  "clap",
4872
4872
  "libc",
@@ -4877,7 +4877,7 @@ dependencies = [
4877
4877
 
4878
4878
  [[package]]
4879
4879
  name = "sqlrite-nodejs"
4880
- version = "0.9.0"
4880
+ version = "0.9.1"
4881
4881
  dependencies = [
4882
4882
  "napi",
4883
4883
  "napi-build",
@@ -4887,7 +4887,7 @@ dependencies = [
4887
4887
 
4888
4888
  [[package]]
4889
4889
  name = "sqlrite-python"
4890
- version = "0.9.0"
4890
+ version = "0.9.1"
4891
4891
  dependencies = [
4892
4892
  "pyo3",
4893
4893
  "sqlrite-engine",
@@ -27,7 +27,7 @@ resolver = "3"
27
27
  # `package =` key so the import name stays `sqlrite` internally:
28
28
  # sqlrite = { package = "sqlrite-engine", path = "…" }
29
29
  name = "sqlrite-engine"
30
- version = "0.9.0"
30
+ version = "0.9.1"
31
31
  authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
32
32
  edition = "2024"
33
33
  rust-version = "1.85"
@@ -141,4 +141,4 @@ fs2 = { version = "0.4", optional = true }
141
141
  # crate publishes to crates.io, and a path-only dep without a
142
142
  # version field fails the manifest verification step. See PR #58
143
143
  # retrospective in docs/roadmap.md.
144
- sqlrite-ask = { version = "0.9.0", path = "sqlrite-ask", optional = true }
144
+ sqlrite-ask = { version = "0.9.1", path = "sqlrite-ask", optional = true }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlrite
3
- Version: 0.9.0
3
+ Version: 0.9.1
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -167,6 +167,7 @@ sqlrite> DELETE FROM users WHERE age < 30;
167
167
  | `UPDATE` | Multi-column `SET`; `WHERE`; UNIQUE + type enforcement; arithmetic in assignments (`SET age = age + 1`) |
168
168
  | `DELETE` | `WHERE` predicate or full-table delete |
169
169
  | `BEGIN` / `COMMIT` / `ROLLBACK` | Real transactions, snapshot-based; WAL-backed commit; single-level (no savepoints); auto-rollback if `COMMIT`'s disk write fails |
170
+ | `PRAGMA auto_vacuum` | Read (`PRAGMA auto_vacuum;`) returns the trigger threshold as a single-row result set; set (`PRAGMA auto_vacuum = 0.5;` / `= OFF;` / `= NONE;`) tunes or disables auto-VACUUM at the SQL layer for SDK / FFI / MCP consumers |
170
171
 
171
172
  Expressions in `WHERE` and `UPDATE`'s `SET` RHS:
172
173
 
@@ -305,7 +306,7 @@ Lockstep versioning — one dispatch bumps every product to the same `vX.Y.Z`. T
305
306
  - [x] **7a — `VECTOR(N)` column type** *(v0.1.10)*: dense f32 vectors with bracket-array literal syntax (`[0.1, 0.2, ...]`); file format bumped to v4
306
307
  - [x] **7b — Distance functions** *(v0.1.11)*: `vec_distance_l2/cosine/dot` + `ORDER BY <expr> LIMIT k` so KNN queries work end-to-end
307
308
  - [x] **7c — Bounded-heap top-k optimization** *(v0.1.12)*
308
- - [x] **7d — HNSW ANN index** *(v0.1.13–15)*: `CREATE INDEX … USING hnsw (col)`; recall@10 ≥ 0.95 at default `M=16, ef_construction=200, ef_search=50`; persisted as a `KIND_HNSW` cell tree
309
+ - [x] **7d — HNSW ANN index** *(v0.1.13–15, +SQLR-28)*: `CREATE INDEX … USING hnsw (col) [WITH (metric = '<l2|cosine|dot>')]`; recall@10 ≥ 0.95 at default `M=16, ef_construction=200, ef_search=50`; persisted as a `KIND_HNSW` cell tree, with the metric round-tripping through the synthesized `sqlrite_master` SQL
309
310
  - [x] **7e — JSON column type + path queries** *(v0.1.16)*: `JSON` / `JSONB` columns stored as canonical text; `json_extract` / `json_type` / `json_array_length` / `json_object_keys`; `$.key`, `[N]`, chained JSONPath subset
310
311
  - [x] **7g.1 — `sqlrite-ask` crate** *(v0.1.18)*: foundational natural-language → SQL via the [Anthropic API](https://docs.anthropic.com/) (Sonnet 4.6 by default), prompt-cached schema dump, sync `ureq` HTTP.
311
312
  - [x] **7g.2 — REPL `.ask` + dep-direction flip** *(v0.1.19)*: `.ask <question>` meta-command with `Run? [Y/n]` confirmation. The wiring required dropping the engine dep from `sqlrite-ask` (cargo cycle) — `sqlrite-ask` is now pure over `&str` schemas; the `Connection`/`Database` integration moved to the engine's new `ask` feature. Public surface for callers: `use sqlrite::{Connection, ConnectionAskExt}`.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "sqlrite-desktop-frontend",
3
3
  "private": true,
4
- "version": "0.9.0",
4
+ "version": "0.9.1",
5
5
  "type": "module",
6
6
  "scripts": {
7
7
  "dev": "vite",
@@ -104,6 +104,7 @@ The engine never depends on the SDK crates; the SDK crates each depend on the en
104
104
  | [`src/sql/hnsw.rs`](../src/sql/hnsw.rs) | Standalone HNSW algorithm — insert / search / layer assignment / beam search. Phase 7d.1. |
105
105
  | [`src/sql/fts/`](../src/sql/fts/) | Full-text search — standalone tokenizer, BM25 scorer, and in-memory `PostingList` inverted index. Wired into the executor via the `fts_match` / `bm25_score` scalar functions and the `try_fts_probe` optimizer hook. Phase 8a-8b; persistence in 8c. See [`docs/fts.md`](fts.md). |
106
106
  | [`src/sql/json.rs`](../src/sql/json.rs) | JSON column type + path-extraction functions (`json_extract`, `json_type`, `json_array_length`, `json_object_keys`). Phase 7e. |
107
+ | [`src/sql/pragma.rs`](../src/sql/pragma.rs) | `PRAGMA` dispatcher (SQLR-13). `try_parse_pragma` peeks at the SQL token stream before sqlparser sees it and routes any `PRAGMA …` shape to `execute_pragma`. First pragma wired up: `auto_vacuum` (read + set, with `OFF` / `NONE` to disable). Add new pragmas as a single arm in `execute_pragma`. |
107
108
  | [`src/sql/pager/`](../src/sql/pager/) | On-disk file format and I/O — see [file-format.md](file-format.md) and [pager.md](pager.md) for details. WAL + checkpointer + shared/exclusive lock modes (Phase 4a-4e) live here. |
108
109
 
109
110
  ## Flow of a SQL statement
@@ -90,7 +90,7 @@ A few methodology notes that change how you read the table.
90
90
  - `Statement::query_with_params(&[Value])` / `Statement::execute_with_params(&[Value])` — bind `?` placeholders at execute time without re-running sqlparser.
91
91
  - `Value::Vector(Vec<f32>)` as a first-class bind type — the 4 KB query vector for W10 is now bound directly instead of being re-lexed every iteration. The HNSW probe optimizer still recognizes the bound shape, so the algorithmic shortcut keeps firing.
92
92
 
93
- The bench harness `Driver::query_one` / `query_all` paths route through `prepare_cached` + the bound API. Every workload's `WorkloadId.version` was bumped `v1 → v2` in lockstep — old JSON envelopes keep the v1 tag and stay readable, but cross-version comparisons require an explicit acknowledgment in the comparison script. The next official pinned-host run will land the post-binding numbers; treat the v1 row above as "before" and watch this section for the "after" once republished.
93
+ The bench harness `Driver::query_one` / `query_all` paths route through `prepare_cached` + the bound API. Every workload's `WorkloadId.version` was bumped `v1 → v2` in lockstep — old JSON envelopes keep the v1 tag and stay readable, but cross-version comparisons require an explicit acknowledgment in the comparison script. The headline table below carries the v2 numbers from the post-SQLR-23 republished run (SQLR-25); the retired v1 baseline lives in the historical section underneath.
94
94
 
95
95
  **Where DuckDB is misleading.** Per-PK-probe single-row OLTP queries (W9) are SQLite's home turf, not DuckDB's. The plan flags this as "apples-to-oranges"; we still publish the number because the directional comparison is informative.
96
96
 
@@ -100,29 +100,53 @@ The bench harness `Driver::query_one` / `query_all` paths route through `prepare
100
100
 
101
101
  ## Headline numbers
102
102
 
103
- Median latency from the first official pinned-host run — [`benchmarks/results/2026-05-07-apple-9ffd55a5.json`](../benchmarks/results/2026-05-07-apple-9ffd55a5.json), Apple M1 Pro / macOS 23.5.0, criterion defaults (3 s warm-up, 5 s measurement, 100 samples on light workloads / 10 samples on heavy ones — see the JSON envelope's per-sample `samples` field). Only medians here; the JSON carries 95 % CIs, mean, std-dev, ops/s.
103
+ Median latency from the post-SQLR-23 pinned-host run — [`benchmarks/results/2026-05-08-apple-ac84d560.json`](../benchmarks/results/2026-05-08-apple-ac84d560.json), Apple M1 Pro / macOS 23.5.0, criterion defaults (3 s warm-up, 5 s measurement, 100 samples on light workloads / 10 samples on heavy ones — see the JSON envelope's per-sample `samples` field). Only medians here; the JSON carries 95 % CIs, mean, std-dev, ops/s.
104
104
 
105
105
  | Workload | SQLRite | SQLite (WAL+NORMAL) | DuckDB | Notes |
106
106
  |---|---|---|---|---|
107
- | **W1** read-by-PK | 9.87 µs | 2.05 µs | — | ~5× — parser tax |
108
- | **W2** range-100 | 23.99 ms | 60.50 µs | — | ~400× — full-scan vs index range probe |
109
- | **W2** range-1k | 24.92 ms | 585.21 µs | — | ~43× |
110
- | **W2** range-10k | 30.15 ms | 6.24 ms | — | ~5× — converges as scan dominates |
111
- | **W3** bulk insert (100k/txn) | 1.029 s | 166.43 ms | — | ~6.2× |
112
- | **W4** single-row insert | 6.76 ms | 9.78 µs | — | **~691× ⚠️** SQLR-18 |
113
- | **W5** mixed OLTP | 55.63 ms | 9.96 µs | — | **~5,580× ⚠️** SQLR-18 |
114
- | **W6** index lookup | 10.45 µs | 2.50 µs | — | ~4× — parser tax |
115
- | **W7** SUM (1M rows) | 109.47 ms | 31.14 ms | 468.74 µs | DuckDB ~66× faster than SQLite |
116
- | **W8** GROUP BY card-10 | 201.80 ms | 438.09 ms | 761.40 µs | DuckDB ~575× faster than SQLite |
117
- | **W8** GROUP BY card-1k | 1.372 s | 251.13 ms | 871.80 µs | DuckDB ~288× faster than SQLite |
118
- | **W8** GROUP BY card-100k | _skipped_ | 238.96 ms | 19.58 ms | **SQLRite skipped ⚠️** SQLR-19; DuckDB ~12× faster than SQLite |
119
- | **W9** INNER JOIN (10k×10k) | 34.25 s | 2.23 µs | 699.23 µs | **~15M× ⚠️** SQLR-20; DuckDB ~313× slower than SQLite (analytical-engine OLTP weakness) |
120
- | **W10** vector top-10 (brute-force, 10k×384) | 138.66 ms | — | — | parser cost dominates |
121
- | **W10** vector top-10 (HNSW) | 126.81 ms | — | — | masked by parser cost |
122
- | **W11** BM25 top-10 (1k docs) | 1.079 ms | 25.03 µs | — | ~43× |
123
- | **W12** hybrid (1k docs) | 713.53 µs | — | — | RAG headline |
124
-
125
- > The **canonical run** is [`benchmarks/results/2026-05-07-apple-9ffd55a5.json`](../benchmarks/results/2026-05-07-apple-9ffd55a5.json). The `dirty=true` flag in the commit metadata reflects the working-tree state when 9.6 PR was being authored (this doc + README updates uncommitted at run time); the **measurements themselves only depend on the bench binary**, which was built from the committed bench-9.5-duckdb tip. Subsequent official runs land alongside this file with their own date / host / commit.
107
+ | **W1** read-by-PK | 3.92 µs | 2.09 µs | — | ~1.9× — gap closed by SQLR-23 (was ~4.8× in v1) |
108
+ | **W2** range-100 | 24.27 ms | 66.62 µs | — | ~364× — full-scan vs index range probe |
109
+ | **W2** range-1k | 26.64 ms | 649.30 µs | — | ~41× |
110
+ | **W2** range-10k | 30.73 ms | 7.01 ms | — | ~4.4× — converges as scan dominates |
111
+ | **W3** bulk insert (100k/txn) | 606.20 ms | 183.96 ms | — | ~3.3× — 100k INSERT plan parsed once, not per-row (was ~6.2× in v1) |
112
+ | **W4** single-row insert | 6.57 ms | 11.35 µs | — | **~579× ⚠️** SQLR-18 |
113
+ | **W5** mixed OLTP | 58.00 ms | 9.65 µs | — | **~6,010× ⚠️** SQLR-18 |
114
+ | **W6** index lookup | 4.04 µs | 2.56 µs | — | ~1.6× — gap closed by SQLR-23 (was ~4.2× in v1) |
115
+ | **W7** SUM (1M rows) | 103.62 ms | 31.57 ms | 478.78 µs | DuckDB ~66× faster than SQLite |
116
+ | **W8** GROUP BY card-10 | 197.32 ms | 366.52 ms | 949.75 µs | DuckDB ~386× faster than SQLite |
117
+ | **W8** GROUP BY card-1k | 1.380 s | 240.64 ms | 1.039 ms | DuckDB ~232× faster than SQLite |
118
+ | **W8** GROUP BY card-100k | _skipped_ | 239.72 ms | 22.93 ms | **SQLRite skipped ⚠️** SQLR-19; DuckDB ~10× faster than SQLite |
119
+ | **W9** INNER JOIN (10k×10k) | 30.30 s | 2.16 µs | 484.97 µs | **~14M× ⚠️** SQLR-20; DuckDB ~225× slower than SQLite (analytical-engine OLTP weakness) |
120
+ | **W10** vector top-10 (brute-force, 10k×384) | 120.88 ms | — | — | compute-bound; modest ~13% drop vs v1 |
121
+ | **W10** vector top-10 (HNSW) | **2.40 ms** | — | — | **~53× faster than v1** ⭐ — SQLR-23 + SQLR-28 unmasked the index; HNSW now ~50× faster than brute-force |
122
+ | **W11** BM25 top-10 (1k docs) | 501.63 µs | 23.65 µs | — | ~21× — `fts_match` / `bm25_score` no longer re-parsed (was ~43× in v1) |
123
+ | **W12** hybrid (1k docs) | 607.90 µs | — | — | RAG headline (~15% faster than v1) |
124
+
125
+ > The **canonical v2 run** is [`benchmarks/results/2026-05-08-apple-ac84d560.json`](../benchmarks/results/2026-05-08-apple-ac84d560.json). It supersedes the v1 baseline (table below) end-to-end: every workload was rerun on the same canonical Apple M1 Pro host after [SQLR-23](https://github.com/joaoh82/rust_sqlite/pulls?q=SQLR-23) bumped `WorkloadId.version` from `v1 → v2` in lockstep (W10 → `v3` after [SQLR-28](https://github.com/joaoh82/rust_sqlite/pulls?q=SQLR-28) widened the HNSW probe to cosine + dot). The `dirty=true` flag reflects the working-tree state at run time (this doc update + the new envelope itself uncommitted); the **measurements themselves only depend on the bench binary**, which was built from the clean `ac84d560` tip. Subsequent official runs land alongside this file with their own date / host / commit.
126
+
127
+ ### Historical (v1, retired)
128
+
129
+ The pre-SQLR-23 baseline from [`benchmarks/results/2026-05-07-apple-9ffd55a5.json`](../benchmarks/results/2026-05-07-apple-9ffd55a5.json), retained so the methodology shift is visible. The v1→v2 jump is not an algorithmic improvement — it's the bench-driver methodology change (per-iter `inline_params` → `prepare_cached` + bound `?` parameters; `Value::Vector` for HNSW-eligible KNN). Cross-version comparisons (`W1.v1` vs `W1.v2`) are flagged in the comparison script per Q8; the [`compare.py`](../benchmarks/scripts/compare.py) v1↔v2 report walks each one.
130
+
131
+ | Workload | SQLRite (v1) | SQLite (v1) | DuckDB (v1) |
132
+ |---|---|---|---|
133
+ | **W1** read-by-PK | 9.87 µs | 2.05 µs | — |
134
+ | **W2** range-100 | 23.99 ms | 60.50 µs | — |
135
+ | **W2** range-1k | 24.92 ms | 585.21 µs | — |
136
+ | **W2** range-10k | 30.15 ms | 6.24 ms | — |
137
+ | **W3** bulk insert (100k/txn) | 1.029 s | 166.43 ms | — |
138
+ | **W4** single-row insert | 6.76 ms | 9.78 µs | — |
139
+ | **W5** mixed OLTP | 55.63 ms | 9.96 µs | — |
140
+ | **W6** index lookup | 10.45 µs | 2.50 µs | — |
141
+ | **W7** SUM (1M rows) | 109.47 ms | 31.14 ms | 468.74 µs |
142
+ | **W8** GROUP BY card-10 | 201.80 ms | 438.09 ms | 761.40 µs |
143
+ | **W8** GROUP BY card-1k | 1.372 s | 251.13 ms | 871.80 µs |
144
+ | **W8** GROUP BY card-100k | _skipped_ | 238.96 ms | 19.58 ms |
145
+ | **W9** INNER JOIN (10k×10k) | 34.25 s | 2.23 µs | 699.23 µs |
146
+ | **W10** brute-force | 138.66 ms | — | — |
147
+ | **W10** HNSW | 126.81 ms | — | — |
148
+ | **W11** BM25 top-10 (1k docs) | 1.079 ms | 25.03 µs | — |
149
+ | **W12** hybrid (1k docs) | 713.53 µs | — | — |
126
150
 
127
151
  ---
128
152
 
@@ -205,7 +205,7 @@ Format-version side effect: a save that produces a non-empty freelist promotes t
205
205
 
206
206
  After SQLR-6, the file still required a manual `VACUUM;` to actually shrink — the freelist absorbed orphan pages but the high-water mark stayed put. SQLR-10 adds a heuristic that fires `vacuum_database` automatically after a page-releasing DDL (`DROP TABLE`, `DROP INDEX`, `ALTER TABLE DROP COLUMN`) when the freelist exceeds a configurable fraction of `page_count`.
207
207
 
208
- Configuration lives on `Database::auto_vacuum_threshold: Option<f32>` and is exposed at the connection level via `Connection::set_auto_vacuum_threshold` / `auto_vacuum_threshold`. Defaults: `Some(0.25)` (SQLite parity at 25%); pass `None` to opt out per connection. The threshold is per-`Connection` runtime state and is not persisted in the file header — every reopen starts at the default. A SQL-level `PRAGMA auto_vacuum` is tracked separately (out of scope for SQLR-10).
208
+ Configuration lives on `Database::auto_vacuum_threshold: Option<f32>` and is exposed at the connection level via `Connection::set_auto_vacuum_threshold` / `auto_vacuum_threshold`, and via SQL through `PRAGMA auto_vacuum` (SQLR-13 — see [`src/sql/pragma.rs`](../src/sql/pragma.rs)). Defaults: `Some(0.25)` (SQLite parity at 25%); pass `None` (or `PRAGMA auto_vacuum = OFF`) to opt out per connection. The threshold is per-`Connection` runtime state and is not persisted in the file header — every reopen starts at the default.
209
209
 
210
210
  The trigger lives at the end of [`process_command_with_render`](../src/sql/mod.rs), immediately after the auto-save. Order matters: the freelist isn't accurate until the bottom-up rebuild runs during save, so we save first, then check the ratio. The check itself is `freelist::should_auto_vacuum(pager, threshold)`, which:
211
211
 
@@ -163,6 +163,7 @@ SELECT id, title FROM docs ORDER BY embedding <-> [0.1, ...] LIMIT 10;
163
163
  > - **✅ 7d.1 — Pure HNSW algorithm** *(~700 LOC, shipped in v0.1.13).* `src/sql/hnsw.rs` standalone module: insert + search + layer assignment + beam search per layer + L2/cosine/dot distance dispatch. No SQL integration yet — vectors are passed in via a `get_vec` closure so the algorithm doesn't depend on table types. Tests verify recall@k ≥ 0.95 vs brute-force on randomly-generated vector sets; deterministic via a fixed RNG seed.
164
164
  > - **✅ 7d.2 — SQL integration** *(~500 LOC).* `CREATE INDEX … USING hnsw (col)` parser + engine, INSERT wiring (also calls `hnsw.insert()` incrementally), query optimizer hook (recognizes `ORDER BY vec_distance_l2(col, literal) LIMIT k` and probes the HNSW instead of full-scanning). HNSW lives in memory only at this point; the **CREATE INDEX SQL persists in `sqlrite_master` and reopen rebuilds the graph from current rows** — partial persistence ahead of 7d.3. DELETE/UPDATE on HNSW-indexed tables refused with helpful error pointing at 7d.3.
165
165
  > - **✅ 7d.3 — Persistence** *(~600 LOC).* New `KIND_HNSW` cell tag and `HnswNodeCell` encoding (varint node_id + per-layer neighbor lists). Each HNSW index gets its own page tree parallel to secondary indexes. Open path loads cells directly into `HnswIndex::from_persisted_nodes` — no algorithm runs, exact bit-for-bit reproduction. Also unblocks DELETE / UPDATE on HNSW-indexed tables: those mark the index `needs_rebuild`, save rebuilds from current rows before staging. ~2× the original 300-LOC estimate because the cell encoding + tests + rebuild path together added more than expected.
166
+ > - **✅ 7d.4 (SQLR-28) — Per-index distance metric.** Q2's "deferred per-index metric knob" lands as `CREATE INDEX … USING hnsw (col) WITH (metric = '<l2|cosine|dot>')`. The metric is stored on `HnswIndexEntry` and round-tripped via the synthesized CREATE INDEX SQL in `sqlrite_master` (no file-format bump — pre-SQLR-28 rows omit the WITH clause and decode as L2). The optimizer's `try_hnsw_probe` widens to all three `vec_distance_*` functions but only fires when the query function matches the index's metric; mismatches fall through to brute-force. Surfaced by the SQLR-23 v2 bench: W10 uses cosine, the optimizer was L2-only, and the HNSW variant had been silently brute-forcing the entire time. SQLR-25 (republish v2 numbers) was the gating consumer.
166
167
  >
167
168
  > Each 7d.x ships as its own PR + release wave. The user-facing value lands at 7d.2; 7d.3 closes the persistence loop. 7d.1 is foundational but ships a tested algorithmic primitive on its own — useful as documentation of the engine's "from scratch" theme.
168
169
 
@@ -368,12 +369,12 @@ Q1–Q10 were resolved by the project owner on 2026-04-26. Each question keeps i
368
369
 
369
370
  ### Q2. HNSW parameters: fixed defaults or per-index configurable?
370
371
 
371
- > **Decided: fixed defaults** (`M=16, ef_construction=200, ef_search=50`).
372
+ > **Decided: fixed defaults** (`M=16, ef_construction=200, ef_search=50`) for the algorithmic knobs. **Distance metric** *did* land as a per-index `WITH (metric = '<l2|cosine|dot>')` clause in **SQLR-28 / sub-phase 7d.4** — see the 7d split note above. Was deferred from the original 7d.2 cut; surfaced as a gap by the SQLR-23 v2 bench, where W10's cosine query had been silently brute-forcing because the optimizer hook was L2-only.
372
373
 
373
374
  - **Fixed:** `M=16, ef_construction=200, ef_search=50`. Simpler API, less to test. Matches sqlite-vec's defaults.
374
375
  - **Configurable:** `CREATE INDEX … USING hnsw (col) WITH (m=32, ef_construction=400)`. Power-user knobs, more code, more test matrix.
375
376
 
376
- **Recommendation:** fixed defaults for MVP. Configurable can land as a follow-up if anyone actually asks.
377
+ **Recommendation:** fixed defaults for MVP. Configurable can land as a follow-up if anyone actually asks. (`metric` already came back as a follow-up; `m` / `ef_*` haven't been requested yet.)
377
378
 
378
379
  ### Q3. JSON storage format
379
380
 
@@ -113,15 +113,18 @@ These are full-citizen indexes — they're visible via `.tables`-adjacent catalo
113
113
  ### HNSW indexes (Phase 7d)
114
114
 
115
115
  ```sql
116
- CREATE INDEX <name> ON <table> USING hnsw (<vector_column>);
116
+ CREATE INDEX <name> ON <table> USING hnsw (<vector_column>)
117
+ [WITH (metric = '<l2|cosine|dot>')];
117
118
  ```
118
119
 
119
- Builds an [HNSW](https://arxiv.org/abs/1603.09320) approximate-nearest-neighbor index over a `VECTOR(N)` column. The query optimizer recognizes `ORDER BY vec_distance_l2(col, literal) LIMIT k` (or the cosine / dot variants) on an HNSW-indexed column and probes the graph instead of full-scanning. SQLR-23 — the second arg can be either an inline `[...]` literal *or* a bound `Value::Vector(...)` parameter via `Statement::query_with_params`; the optimizer recognizes both, so prepared-statement KNN queries still take the graph shortcut.
120
+ Builds an [HNSW](https://arxiv.org/abs/1603.09320) approximate-nearest-neighbor index over a `VECTOR(N)` column. The query optimizer recognizes `ORDER BY vec_distance_l2(col, literal) LIMIT k` (or the cosine / dot variants) on an HNSW-indexed column **whose metric matches the query's distance function**, and probes the graph instead of full-scanning. SQLR-23 — the second arg can be either an inline `[...]` literal *or* a bound `Value::Vector(...)` parameter via `Statement::query_with_params`; the optimizer recognizes both, so prepared-statement KNN queries still take the graph shortcut.
120
121
 
121
- - Recall@10 0.95 at default parameters (`M=16`, `ef_construction=200`, `ef_search=50`). Parameters aren't tunable from SQL yet see Q2 of [`docs/phase-7-plan.md`](phase-7-plan.md).
122
- - The index is built incrementally on `INSERT`. `DELETE` / `UPDATE` mark the index `needs_rebuild`; the next save rebuilds from current rows.
123
- - Persisted as a `KIND_HNSW` cell tree alongside the regular page hierarchy open path loads the graph bit-for-bit, no algorithm runs.
124
- - Without an HNSW index, the same `ORDER BY vec_distance_… LIMIT k` query still works it just brute-force-scans every row (Phase 7c's bounded-heap top-k optimization keeps the memory footprint to O(k)).
122
+ The `WITH (metric = '…')` clause picks the distance the graph is built for. Three values are recognized: `'l2'` (Euclidean — the default, also accepts `'euclidean'`), `'cosine'`, and `'dot'` (negated dot-product — also accepts `'inner_product'` / `'ip'`). Omitting the clause is equivalent to `metric = 'l2'`, so pre-SQLR-28 catalogs round-trip unchanged. **The metric is not a query-time choice** — the graph topology depends on the metric used during INSERT (neighbour pruning is metric-specific), so a query whose `vec_distance_*` function doesn't match the index's metric falls through to brute-force rather than getting a wrong answer back from the graph. If you need both L2 and cosine probes on the same column, create two indexes.
123
+
124
+ - Recall@10 0.95 at default parameters (`M=16`, `ef_construction=200`, `ef_search=50`). The `M` / `ef_*` knobs aren't tunable from SQL yet see Q2 of [`docs/phase-7-plan.md`](phase-7-plan.md).
125
+ - The index is built incrementally on `INSERT`. `DELETE` / `UPDATE` mark the index `needs_rebuild`; the next save rebuilds from current rows under the same metric.
126
+ - Persisted as a `KIND_HNSW` cell tree alongside the regular page hierarchy — open path loads the graph bit-for-bit, no algorithm runs. The metric travels through the synthesized CREATE INDEX SQL in `sqlrite_master`; no file-format bump.
127
+ - Without an HNSW index — or with a metric mismatch — the same `ORDER BY vec_distance_… LIMIT k` query still works; it just brute-force-scans every row (Phase 7c's bounded-heap top-k optimization keeps the memory footprint to O(k)).
125
128
 
126
129
  ### FTS indexes (Phase 8)
127
130
 
@@ -548,7 +551,16 @@ conn.set_auto_vacuum_threshold(Some(0.5))?; // fire only when freelist > 50%
548
551
  conn.set_auto_vacuum_threshold(None)?; // disable entirely (manual VACUUM only)
549
552
  ```
550
553
 
551
- The setting is per-`Connection` runtime state — it's not persisted in the file header, so every reopen starts at the default `Some(0.25)`. A SQL-level `PRAGMA auto_vacuum` knob is on the roadmap but not yet implemented (SDK consumers currently configure it via the per-binding glue or fall back to the default).
554
+ …or via SQL (SQLR-13), which is the path SDK / FFI / MCP consumers reach for since they can't call the Rust setter directly:
555
+
556
+ ```sql
557
+ PRAGMA auto_vacuum; -- read; renders a single-row result set
558
+ PRAGMA auto_vacuum = 0.5; -- arm the trigger at 50%
559
+ PRAGMA auto_vacuum = 0; -- arm at 0% (compact on any released page)
560
+ PRAGMA auto_vacuum = OFF; -- disable; equivalent: NONE, 'OFF', 'NONE'
561
+ ```
562
+
563
+ Out-of-range values (anything outside `0.0..=1.0`, `NaN`, `±∞`) and unknown identifiers like `WAL` / `FULL` are rejected with a typed error — the trigger never silently saturates or falls back to a default. The setting is per-`Connection` runtime state — it's not persisted in the file header, so every reopen starts at the default `Some(0.25)`.
552
564
 
553
565
  ---
554
566
 
@@ -618,7 +630,7 @@ For context when you hit `NotImplemented`. See [Roadmap](roadmap.md) for when th
618
630
 
619
631
  ### Session / schema
620
632
  - Multiple attached databases (`ATTACH DATABASE`, `DETACH DATABASE`)
621
- - `PRAGMA` statements beyond what the parser accepts (none currently executed)
633
+ - `PRAGMA` statements other than `auto_vacuum` (SQLR-13). The dispatcher is in place — adding a pragma is a single arm in `execute_pragma`. `journal_mode`, `synchronous`, `cache_size`, etc. are not yet wired up
622
634
  - `REPLACE INTO`, `INSERT OR IGNORE`, `INSERT OR REPLACE` (conflict-resolution clauses)
623
635
 
624
636
  ---
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "sqlrite"
7
- version = "0.9.0"
7
+ version = "0.9.1"
8
8
  description = "Python bindings for SQLRite — a small, embeddable SQLite clone written in Rust."
9
9
  authors = [{ name = "Joao Henrique Machado Silva", email = "joaoh82@gmail.com" }]
10
10
  license = { text = "MIT" }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "sqlrite-python"
3
- version = "0.9.0"
3
+ version = "0.9.1"
4
4
  authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
5
5
  edition = "2024"
6
6
  rust-version = "1.85"
@@ -10,7 +10,7 @@
10
10
  # Published to crates.io as `sqlrite-ask`. Joins the lockstep release
11
11
  # wave (`sqlrite-ask-vX.Y.Z` tag) — see `docs/release-plan.md`.
12
12
  name = "sqlrite-ask"
13
- version = "0.9.0"
13
+ version = "0.9.1"
14
14
  authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
15
15
  edition = "2024"
16
16
  rust-version = "1.85"
@@ -54,8 +54,8 @@ use std::collections::VecDeque;
54
54
  use std::path::Path;
55
55
  use std::sync::Arc;
56
56
 
57
+ use crate::sql::dialect::SqlriteDialect;
57
58
  use sqlparser::ast::Statement as AstStatement;
58
- use sqlparser::dialect::SQLiteDialect;
59
59
  use sqlparser::parser::Parser;
60
60
 
61
61
  use crate::error::{Result, SQLRiteError};
@@ -320,7 +320,7 @@ struct CachedPlan {
320
320
 
321
321
  impl CachedPlan {
322
322
  fn compile(sql: &str) -> Result<Self> {
323
- let dialect = SQLiteDialect {};
323
+ let dialect = SqlriteDialect::new();
324
324
  let mut ast = Parser::parse_sql(&dialect, sql).map_err(SQLRiteError::from)?;
325
325
  let Some(mut stmt) = ast.pop() else {
326
326
  return Err(SQLRiteError::General("no statement to prepare".to_string()));
@@ -1115,6 +1115,158 @@ mod tests {
1115
1115
  assert_eq!(rows[0].get::<i64>(0).unwrap(), 1);
1116
1116
  }
1117
1117
 
1118
+ /// SQLR-28 — cosine probe: an HNSW index built `WITH (metric =
1119
+ /// 'cosine')` must serve `ORDER BY vec_distance_cosine(col, [...])`
1120
+ /// from the graph. Self-query: querying for one of the corpus's
1121
+ /// own vectors must come back as the nearest under cosine
1122
+ /// distance.
1123
+ #[test]
1124
+ fn cosine_self_query_through_hnsw_optimizer() {
1125
+ let mut conn = Connection::open_in_memory().unwrap();
1126
+ conn.execute("CREATE TABLE v (id INTEGER PRIMARY KEY, e VECTOR(4));")
1127
+ .unwrap();
1128
+ let corpus: [(i64, [f32; 4]); 5] = [
1129
+ (1, [1.0, 0.0, 0.0, 0.0]),
1130
+ (2, [0.0, 1.0, 0.0, 0.0]),
1131
+ (3, [0.0, 0.0, 1.0, 0.0]),
1132
+ (4, [0.0, 0.0, 0.0, 1.0]),
1133
+ (5, [0.5, 0.5, 0.5, 0.5]),
1134
+ ];
1135
+ for (id, vec) in corpus {
1136
+ conn.execute(&format!(
1137
+ "INSERT INTO v (id, e) VALUES ({id}, [{}, {}, {}, {}]);",
1138
+ vec[0], vec[1], vec[2], vec[3]
1139
+ ))
1140
+ .unwrap();
1141
+ }
1142
+ conn.execute("CREATE INDEX v_hnsw ON v USING hnsw (e) WITH (metric = 'cosine');")
1143
+ .unwrap();
1144
+
1145
+ // Self-query for id=2's vector — expected nearest under cosine
1146
+ // distance is id=2 itself (cos distance 0).
1147
+ let rows = conn
1148
+ .prepare("SELECT id FROM v ORDER BY vec_distance_cosine(e, [0.0, 1.0, 0.0, 0.0]) ASC LIMIT 1")
1149
+ .unwrap()
1150
+ .query_with_params(&[])
1151
+ .unwrap()
1152
+ .collect_all()
1153
+ .unwrap();
1154
+ assert_eq!(rows.len(), 1);
1155
+ assert_eq!(rows[0].get::<i64>(0).unwrap(), 2);
1156
+ }
1157
+
1158
+ /// SQLR-28 — dot probe: same shape as the cosine test, but the
1159
+ /// index is built `WITH (metric = 'dot')` and the query uses
1160
+ /// `vec_distance_dot`. Confirms the third metric variant lights up
1161
+ /// the graph shortcut, not just l2 / cosine.
1162
+ #[test]
1163
+ fn dot_self_query_through_hnsw_optimizer() {
1164
+ let mut conn = Connection::open_in_memory().unwrap();
1165
+ conn.execute("CREATE TABLE v (id INTEGER PRIMARY KEY, e VECTOR(3));")
1166
+ .unwrap();
1167
+ // Data: distinguishable magnitudes so the dot metric resolves
1168
+ // a clear winner. `vec_distance_dot(a, b) = -(a·b)` — smaller
1169
+ // (more negative) is closer.
1170
+ let corpus: [(i64, [f32; 3]); 4] = [
1171
+ (1, [1.0, 0.0, 0.0]),
1172
+ (2, [2.0, 0.0, 0.0]),
1173
+ (3, [0.0, 1.0, 0.0]),
1174
+ (4, [0.0, 0.0, 1.0]),
1175
+ ];
1176
+ for (id, vec) in corpus {
1177
+ conn.execute(&format!(
1178
+ "INSERT INTO v (id, e) VALUES ({id}, [{}, {}, {}]);",
1179
+ vec[0], vec[1], vec[2]
1180
+ ))
1181
+ .unwrap();
1182
+ }
1183
+ conn.execute("CREATE INDEX v_hnsw ON v USING hnsw (e) WITH (metric = 'dot');")
1184
+ .unwrap();
1185
+
1186
+ // Query [3, 0, 0]: dot products are 3, 6, 0, 0 → distances
1187
+ // -3, -6, 0, 0. id=2 has the smallest (most negative) distance.
1188
+ let rows = conn
1189
+ .prepare("SELECT id FROM v ORDER BY vec_distance_dot(e, [3.0, 0.0, 0.0]) ASC LIMIT 1")
1190
+ .unwrap()
1191
+ .query_with_params(&[])
1192
+ .unwrap()
1193
+ .collect_all()
1194
+ .unwrap();
1195
+ assert_eq!(rows.len(), 1);
1196
+ assert_eq!(rows[0].get::<i64>(0).unwrap(), 2);
1197
+ }
1198
+
1199
+ /// SQLR-28 — metric mismatch must NOT take the graph shortcut.
1200
+ /// An L2-built index queried with `vec_distance_cosine` falls
1201
+ /// through to brute-force, which still returns the correct
1202
+ /// answer. We confirm the answer is correct; the slow-path
1203
+ /// behaviour itself is implicit (no error, no panic, no wrong
1204
+ /// result), which is the user-visible contract that matters.
1205
+ #[test]
1206
+ fn metric_mismatch_falls_back_to_brute_force() {
1207
+ let mut conn = Connection::open_in_memory().unwrap();
1208
+ conn.execute("CREATE TABLE v (id INTEGER PRIMARY KEY, e VECTOR(2));")
1209
+ .unwrap();
1210
+ let half_sqrt2 = std::f32::consts::FRAC_1_SQRT_2;
1211
+ let corpus: [(i64, [f32; 2]); 3] = [
1212
+ (1, [1.0, 0.0]),
1213
+ (2, [half_sqrt2, half_sqrt2]),
1214
+ (3, [0.0, 1.0]),
1215
+ ];
1216
+ for (id, vec) in corpus {
1217
+ conn.execute(&format!(
1218
+ "INSERT INTO v (id, e) VALUES ({id}, [{}, {}]);",
1219
+ vec[0], vec[1]
1220
+ ))
1221
+ .unwrap();
1222
+ }
1223
+ // Default L2 index — no WITH clause.
1224
+ conn.execute("CREATE INDEX v_hnsw_l2 ON v USING hnsw (e);")
1225
+ .unwrap();
1226
+
1227
+ // Query with cosine. Index can't help; brute-force still
1228
+ // returns the correct nearest by cosine: id=1 (cos dist 0).
1229
+ let rows = conn
1230
+ .prepare("SELECT id FROM v ORDER BY vec_distance_cosine(e, [1.0, 0.0]) ASC LIMIT 1")
1231
+ .unwrap()
1232
+ .query_with_params(&[])
1233
+ .unwrap()
1234
+ .collect_all()
1235
+ .unwrap();
1236
+ assert_eq!(rows.len(), 1);
1237
+ assert_eq!(rows[0].get::<i64>(0).unwrap(), 1);
1238
+ }
1239
+
1240
+ /// SQLR-28 — a typo in the metric name must error at CREATE INDEX
1241
+ /// time. Falling back to L2 silently is the bug we're fixing here,
1242
+ /// not the behaviour to preserve.
1243
+ #[test]
1244
+ fn unknown_metric_name_is_rejected() {
1245
+ let mut conn = Connection::open_in_memory().unwrap();
1246
+ conn.execute("CREATE TABLE v (id INTEGER PRIMARY KEY, e VECTOR(2));")
1247
+ .unwrap();
1248
+ let err = conn
1249
+ .execute("CREATE INDEX bad ON v USING hnsw (e) WITH (metric = 'cosin');")
1250
+ .unwrap_err();
1251
+ let msg = format!("{err}");
1252
+ assert!(msg.contains("unknown HNSW metric"), "got: {msg}");
1253
+ }
1254
+
1255
+ /// SQLR-28 — WITH options on a non-HNSW index must error rather
1256
+ /// than be silently ignored. An option that has no effect on the
1257
+ /// resulting index is a footgun.
1258
+ #[test]
1259
+ fn with_metric_on_btree_is_rejected() {
1260
+ let mut conn = Connection::open_in_memory().unwrap();
1261
+ conn.execute("CREATE TABLE t (a INTEGER PRIMARY KEY, b TEXT);")
1262
+ .unwrap();
1263
+ let err = conn
1264
+ .execute("CREATE INDEX bad ON t (b) WITH (metric = 'cosine');")
1265
+ .unwrap_err();
1266
+ let msg = format!("{err}");
1267
+ assert!(msg.contains("doesn't support any options"), "got: {msg}");
1268
+ }
1269
+
1118
1270
  #[test]
1119
1271
  fn prepare_cached_executes_the_same_as_prepare() {
1120
1272
  let mut conn = Connection::open_in_memory().unwrap();
@@ -198,8 +198,8 @@ impl Database {
198
198
  #[cfg(test)]
199
199
  mod tests {
200
200
  use super::*;
201
+ use crate::sql::dialect::SqlriteDialect;
201
202
  use crate::sql::parser::create::CreateQuery;
202
- use sqlparser::dialect::SQLiteDialect;
203
203
  use sqlparser::parser::Parser;
204
204
 
205
205
  #[test]
@@ -220,7 +220,7 @@ mod tests {
220
220
  last_name TEXT NOT NULl,
221
221
  email TEXT NOT NULL UNIQUE
222
222
  );";
223
- let dialect = SQLiteDialect {};
223
+ let dialect = SqlriteDialect::new();
224
224
  let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
225
225
  if ast.len() > 1 {
226
226
  panic!("Expected a single query statement, but there are more then 1.")
@@ -246,7 +246,7 @@ mod tests {
246
246
  last_name TEXT NOT NULl,
247
247
  email TEXT NOT NULL UNIQUE
248
248
  );";
249
- let dialect = SQLiteDialect {};
249
+ let dialect = SqlriteDialect::new();
250
250
  let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
251
251
  if ast.len() > 1 {
252
252
  panic!("Expected a single query statement, but there are more then 1.")
@@ -1,7 +1,7 @@
1
1
  use crate::error::{Result, SQLRiteError};
2
2
  use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
3
3
  use crate::sql::fts::PostingList;
4
- use crate::sql::hnsw::HnswIndex;
4
+ use crate::sql::hnsw::{DistanceMetric, HnswIndex};
5
5
  use crate::sql::parser::create::{CreateQuery, ParsedColumn};
6
6
  use std::collections::{BTreeMap, HashMap};
7
7
  use std::fmt;
@@ -150,10 +150,11 @@ pub struct Table {
150
150
  pub primary_key: String,
151
151
  }
152
152
 
153
- /// One HNSW index attached to a table. Phase 7d.2 only supports L2
154
- /// distance; cosine and dot are 7d.x follow-ups (would require either
155
- /// distinct USING methods like `hnsw_cosine` or a `WITH (metric = …)`
156
- /// clause see `docs/phase-7-plan.md` for the deferred decision).
153
+ /// One HNSW index attached to a table. The distance metric is fixed
154
+ /// at CREATE INDEX time via `USING hnsw (col) WITH (metric = '<m>')`
155
+ /// (`l2` / `cosine` / `dot`); omitting the WITH clause defaults to L2,
156
+ /// matching the pre-SQLR-28 behaviour for round-tripping older
157
+ /// `sqlrite_master` rows that didn't carry a metric.
157
158
  #[derive(Debug, Clone)]
158
159
  pub struct HnswIndexEntry {
159
160
  /// User-supplied name from `CREATE INDEX <name> …`. Unique across
@@ -161,6 +162,13 @@ pub struct HnswIndexEntry {
161
162
  pub name: String,
162
163
  /// The VECTOR column this index covers.
163
164
  pub column_name: String,
165
+ /// Distance metric the graph was built for. The optimizer's HNSW
166
+ /// shortcut only fires when the query's `vec_distance_*` function
167
+ /// matches this metric — picking a non-matching distance falls
168
+ /// through to brute-force, since the graph topology is metric-
169
+ /// specific (an L2-pruned graph isn't a valid cosine search graph
170
+ /// in general, and vice versa).
171
+ pub metric: DistanceMetric,
164
172
  /// The graph itself.
165
173
  pub index: HnswIndex,
166
174
  /// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
@@ -1628,7 +1636,7 @@ pub fn parse_vector_literal(s: &str) -> Result<Vec<f32>> {
1628
1636
  #[cfg(test)]
1629
1637
  mod tests {
1630
1638
  use super::*;
1631
- use sqlparser::dialect::SQLiteDialect;
1639
+ use crate::sql::dialect::SqlriteDialect;
1632
1640
  use sqlparser::parser::Parser;
1633
1641
 
1634
1642
  #[test]
@@ -1766,7 +1774,7 @@ mod tests {
1766
1774
  active BOOL,
1767
1775
  score REAL
1768
1776
  );";
1769
- let dialect = SQLiteDialect {};
1777
+ let dialect = SqlriteDialect::new();
1770
1778
  let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1771
1779
  if ast.len() > 1 {
1772
1780
  panic!("Expected a single query statement, but there are more then 1.")
@@ -1802,7 +1810,7 @@ mod tests {
1802
1810
  first_name TEXT NOT NULL,
1803
1811
  last_name TEXT NOT NULl
1804
1812
  );";
1805
- let dialect = SQLiteDialect {};
1813
+ let dialect = SqlriteDialect::new();
1806
1814
  let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1807
1815
  if ast.len() > 1 {
1808
1816
  panic!("Expected a single query statement, but there are more then 1.")