sqlrite 0.9.0__tar.gz → 0.9.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlrite-0.9.0 → sqlrite-0.9.1}/Cargo.lock +7 -7
- {sqlrite-0.9.0 → sqlrite-0.9.1}/Cargo.toml +2 -2
- {sqlrite-0.9.0 → sqlrite-0.9.1}/PKG-INFO +1 -1
- {sqlrite-0.9.0 → sqlrite-0.9.1}/README.md +2 -1
- {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/package.json +1 -1
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/architecture.md +1 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/benchmarks.md +45 -21
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/pager.md +1 -1
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/phase-7-plan.md +3 -2
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/supported-sql.md +20 -8
- {sqlrite-0.9.0 → sqlrite-0.9.1}/pyproject.toml +1 -1
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/python/Cargo.toml +1 -1
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/Cargo.toml +1 -1
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/connection.rs +154 -2
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/db/database.rs +3 -3
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/db/table.rs +16 -8
- sqlrite-0.9.1/src/sql/dialect.rs +100 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/executor.rs +138 -23
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/hnsw.rs +34 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/mod.rs +15 -4
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/mod.rs +307 -33
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/params.rs +2 -2
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/parser/create.rs +1 -1
- sqlrite-0.9.1/src/sql/pragma.rs +514 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/.github/workflows/ci.yml +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/.github/workflows/release-pr.yml +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/.github/workflows/release.yml +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/.github/workflows/rust.yml +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/.gitignore +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/CLAUDE.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/CODE_OF_CONDUCT.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/LICENSE +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/MAINTAINERS +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/Makefile +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/index.html +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/package-lock.json +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/src/App.svelte +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/src/app.css +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/src/main.ts +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/src/vite-env.d.ts +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/svelte.config.js +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/tsconfig.json +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/desktop/vite.config.ts +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/_index.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/ask-backend-examples.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/ask.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/benchmarks-plan.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/concurrent-writes-plan.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/design-decisions.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/desktop.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/embedding.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/file-format.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/fts.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/getting-started.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/mcp.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/phase-8-plan.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/release-plan.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/release-secrets.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/roadmap.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/smoke-test.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/sql-engine.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/storage-model.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/docs/usage.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/README.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/c/Makefile +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/c/hello.c +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/go/go.mod +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/go/hello.go +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/hybrid-retrieval/README.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/hybrid-retrieval/hybrid_retrieval.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/nodejs/hello.mjs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/python/hello.py +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/rust/quickstart.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/wasm/Makefile +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/wasm/index.html +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/examples/wasm/server.mjs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite - Desktop.png +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite Data Structures.png +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite Simple SQL Execution High Level Diagram.png +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite Simple SQL INSERT Execution High Level Diagram (Insert Row).png +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite Simple SQL INSERT Execution High Level Diagram.png +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/images/SQLRite_logo.png +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/images/architecture.png +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/rust-toolchain.toml +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/AST.delete.example +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/AST.insert.exemple +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/AST.select.example +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/AST.update.example +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/CREATE TABLE sqlrite_schema.sql +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/CREATE_TABLE with duplicate.sql +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/CREATE_TABLE.sql +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/samples/INSERT.sql +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/scripts/bump-version.sh +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/README.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/ask.go +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/ask_test.go +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/conn.go +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/go.mod +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/rows.go +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/sqlrite.go +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/sqlrite_test.go +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/go/stmt.go +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/python/README.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/python/src/lib.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/python/tests/test_ask.py +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sdk/python/tests/test_sqlrite.py +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/README.md +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/src/lib.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/src/prompt.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/src/provider/anthropic.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/src/provider/mock.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/src/provider/mod.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/sqlrite-ask/tests/anthropic_http.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/ask/mod.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/ask/schema.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/error.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/lib.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/main.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/meta_command/mod.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/repl/mod.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/agg.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/db/mod.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/db/secondary_index.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/fts/bm25.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/fts/mod.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/fts/posting_list.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/fts/tokenizer.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/allocator.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/cell.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/file.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/freelist.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/fts_cell.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/header.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/hnsw_cell.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/index_cell.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/interior_page.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/overflow.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/page.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/pager.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/table_page.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/varint.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/pager/wal.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/parser/insert.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/parser/mod.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/parser/select.rs +0 -0
- {sqlrite-0.9.0 → sqlrite-0.9.1}/src/sql/tokenizer.rs +0 -0
|
@@ -4799,7 +4799,7 @@ dependencies = [
|
|
|
4799
4799
|
|
|
4800
4800
|
[[package]]
|
|
4801
4801
|
name = "sqlrite-ask"
|
|
4802
|
-
version = "0.9.
|
|
4802
|
+
version = "0.9.1"
|
|
4803
4803
|
dependencies = [
|
|
4804
4804
|
"serde",
|
|
4805
4805
|
"serde_json",
|
|
@@ -4827,7 +4827,7 @@ dependencies = [
|
|
|
4827
4827
|
|
|
4828
4828
|
[[package]]
|
|
4829
4829
|
name = "sqlrite-desktop"
|
|
4830
|
-
version = "0.9.
|
|
4830
|
+
version = "0.9.1"
|
|
4831
4831
|
dependencies = [
|
|
4832
4832
|
"serde",
|
|
4833
4833
|
"serde_json",
|
|
@@ -4839,7 +4839,7 @@ dependencies = [
|
|
|
4839
4839
|
|
|
4840
4840
|
[[package]]
|
|
4841
4841
|
name = "sqlrite-engine"
|
|
4842
|
-
version = "0.9.
|
|
4842
|
+
version = "0.9.1"
|
|
4843
4843
|
dependencies = [
|
|
4844
4844
|
"clap",
|
|
4845
4845
|
"env_logger",
|
|
@@ -4856,7 +4856,7 @@ dependencies = [
|
|
|
4856
4856
|
|
|
4857
4857
|
[[package]]
|
|
4858
4858
|
name = "sqlrite-ffi"
|
|
4859
|
-
version = "0.9.
|
|
4859
|
+
version = "0.9.1"
|
|
4860
4860
|
dependencies = [
|
|
4861
4861
|
"cbindgen",
|
|
4862
4862
|
"serde",
|
|
@@ -4866,7 +4866,7 @@ dependencies = [
|
|
|
4866
4866
|
|
|
4867
4867
|
[[package]]
|
|
4868
4868
|
name = "sqlrite-mcp"
|
|
4869
|
-
version = "0.9.
|
|
4869
|
+
version = "0.9.1"
|
|
4870
4870
|
dependencies = [
|
|
4871
4871
|
"clap",
|
|
4872
4872
|
"libc",
|
|
@@ -4877,7 +4877,7 @@ dependencies = [
|
|
|
4877
4877
|
|
|
4878
4878
|
[[package]]
|
|
4879
4879
|
name = "sqlrite-nodejs"
|
|
4880
|
-
version = "0.9.
|
|
4880
|
+
version = "0.9.1"
|
|
4881
4881
|
dependencies = [
|
|
4882
4882
|
"napi",
|
|
4883
4883
|
"napi-build",
|
|
@@ -4887,7 +4887,7 @@ dependencies = [
|
|
|
4887
4887
|
|
|
4888
4888
|
[[package]]
|
|
4889
4889
|
name = "sqlrite-python"
|
|
4890
|
-
version = "0.9.
|
|
4890
|
+
version = "0.9.1"
|
|
4891
4891
|
dependencies = [
|
|
4892
4892
|
"pyo3",
|
|
4893
4893
|
"sqlrite-engine",
|
|
@@ -27,7 +27,7 @@ resolver = "3"
|
|
|
27
27
|
# `package =` key so the import name stays `sqlrite` internally:
|
|
28
28
|
# sqlrite = { package = "sqlrite-engine", path = "…" }
|
|
29
29
|
name = "sqlrite-engine"
|
|
30
|
-
version = "0.9.
|
|
30
|
+
version = "0.9.1"
|
|
31
31
|
authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
|
|
32
32
|
edition = "2024"
|
|
33
33
|
rust-version = "1.85"
|
|
@@ -141,4 +141,4 @@ fs2 = { version = "0.4", optional = true }
|
|
|
141
141
|
# crate publishes to crates.io, and a path-only dep without a
|
|
142
142
|
# version field fails the manifest verification step. See PR #58
|
|
143
143
|
# retrospective in docs/roadmap.md.
|
|
144
|
-
sqlrite-ask = { version = "0.9.
|
|
144
|
+
sqlrite-ask = { version = "0.9.1", path = "sqlrite-ask", optional = true }
|
|
@@ -167,6 +167,7 @@ sqlrite> DELETE FROM users WHERE age < 30;
|
|
|
167
167
|
| `UPDATE` | Multi-column `SET`; `WHERE`; UNIQUE + type enforcement; arithmetic in assignments (`SET age = age + 1`) |
|
|
168
168
|
| `DELETE` | `WHERE` predicate or full-table delete |
|
|
169
169
|
| `BEGIN` / `COMMIT` / `ROLLBACK` | Real transactions, snapshot-based; WAL-backed commit; single-level (no savepoints); auto-rollback if `COMMIT`'s disk write fails |
|
|
170
|
+
| `PRAGMA auto_vacuum` | Read (`PRAGMA auto_vacuum;`) returns the trigger threshold as a single-row result set; set (`PRAGMA auto_vacuum = 0.5;` / `= OFF;` / `= NONE;`) tunes or disables auto-VACUUM at the SQL layer for SDK / FFI / MCP consumers |
|
|
170
171
|
|
|
171
172
|
Expressions in `WHERE` and `UPDATE`'s `SET` RHS:
|
|
172
173
|
|
|
@@ -305,7 +306,7 @@ Lockstep versioning — one dispatch bumps every product to the same `vX.Y.Z`. T
|
|
|
305
306
|
- [x] **7a — `VECTOR(N)` column type** *(v0.1.10)*: dense f32 vectors with bracket-array literal syntax (`[0.1, 0.2, ...]`); file format bumped to v4
|
|
306
307
|
- [x] **7b — Distance functions** *(v0.1.11)*: `vec_distance_l2/cosine/dot` + `ORDER BY <expr> LIMIT k` so KNN queries work end-to-end
|
|
307
308
|
- [x] **7c — Bounded-heap top-k optimization** *(v0.1.12)*
|
|
308
|
-
- [x] **7d — HNSW ANN index** *(v0.1.13–15)*: `CREATE INDEX … USING hnsw (col)`; recall@10 ≥ 0.95 at default `M=16, ef_construction=200, ef_search=50`; persisted as a `KIND_HNSW` cell tree
|
|
309
|
+
- [x] **7d — HNSW ANN index** *(v0.1.13–15, +SQLR-28)*: `CREATE INDEX … USING hnsw (col) [WITH (metric = '<l2|cosine|dot>')]`; recall@10 ≥ 0.95 at default `M=16, ef_construction=200, ef_search=50`; persisted as a `KIND_HNSW` cell tree, with the metric round-tripping through the synthesized `sqlrite_master` SQL
|
|
309
310
|
- [x] **7e — JSON column type + path queries** *(v0.1.16)*: `JSON` / `JSONB` columns stored as canonical text; `json_extract` / `json_type` / `json_array_length` / `json_object_keys`; `$.key`, `[N]`, chained JSONPath subset
|
|
310
311
|
- [x] **7g.1 — `sqlrite-ask` crate** *(v0.1.18)*: foundational natural-language → SQL via the [Anthropic API](https://docs.anthropic.com/) (Sonnet 4.6 by default), prompt-cached schema dump, sync `ureq` HTTP.
|
|
311
312
|
- [x] **7g.2 — REPL `.ask` + dep-direction flip** *(v0.1.19)*: `.ask <question>` meta-command with `Run? [Y/n]` confirmation. The wiring required dropping the engine dep from `sqlrite-ask` (cargo cycle) — `sqlrite-ask` is now pure over `&str` schemas; the `Connection`/`Database` integration moved to the engine's new `ask` feature. Public surface for callers: `use sqlrite::{Connection, ConnectionAskExt}`.
|
|
@@ -104,6 +104,7 @@ The engine never depends on the SDK crates; the SDK crates each depend on the en
|
|
|
104
104
|
| [`src/sql/hnsw.rs`](../src/sql/hnsw.rs) | Standalone HNSW algorithm — insert / search / layer assignment / beam search. Phase 7d.1. |
|
|
105
105
|
| [`src/sql/fts/`](../src/sql/fts/) | Full-text search — standalone tokenizer, BM25 scorer, and in-memory `PostingList` inverted index. Wired into the executor via the `fts_match` / `bm25_score` scalar functions and the `try_fts_probe` optimizer hook. Phase 8a-8b; persistence in 8c. See [`docs/fts.md`](fts.md). |
|
|
106
106
|
| [`src/sql/json.rs`](../src/sql/json.rs) | JSON column type + path-extraction functions (`json_extract`, `json_type`, `json_array_length`, `json_object_keys`). Phase 7e. |
|
|
107
|
+
| [`src/sql/pragma.rs`](../src/sql/pragma.rs) | `PRAGMA` dispatcher (SQLR-13). `try_parse_pragma` peeks at the SQL token stream before sqlparser sees it and routes any `PRAGMA …` shape to `execute_pragma`. First pragma wired up: `auto_vacuum` (read + set, with `OFF` / `NONE` to disable). Add new pragmas as a single arm in `execute_pragma`. |
|
|
107
108
|
| [`src/sql/pager/`](../src/sql/pager/) | On-disk file format and I/O — see [file-format.md](file-format.md) and [pager.md](pager.md) for details. WAL + checkpointer + shared/exclusive lock modes (Phase 4a-4e) live here. |
|
|
108
109
|
|
|
109
110
|
## Flow of a SQL statement
|
|
@@ -90,7 +90,7 @@ A few methodology notes that change how you read the table.
|
|
|
90
90
|
- `Statement::query_with_params(&[Value])` / `Statement::execute_with_params(&[Value])` — bind `?` placeholders at execute time without re-running sqlparser.
|
|
91
91
|
- `Value::Vector(Vec<f32>)` as a first-class bind type — the 4 KB query vector for W10 is now bound directly instead of being re-lexed every iteration. The HNSW probe optimizer still recognizes the bound shape, so the algorithmic shortcut keeps firing.
|
|
92
92
|
|
|
93
|
-
The bench harness `Driver::query_one` / `query_all` paths route through `prepare_cached` + the bound API. Every workload's `WorkloadId.version` was bumped `v1 → v2` in lockstep — old JSON envelopes keep the v1 tag and stay readable, but cross-version comparisons require an explicit acknowledgment in the comparison script. The
|
|
93
|
+
The bench harness `Driver::query_one` / `query_all` paths route through `prepare_cached` + the bound API. Every workload's `WorkloadId.version` was bumped `v1 → v2` in lockstep — old JSON envelopes keep the v1 tag and stay readable, but cross-version comparisons require an explicit acknowledgment in the comparison script. The headline table below carries the v2 numbers from the post-SQLR-23 republished run (SQLR-25); the retired v1 baseline lives in the historical section underneath.
|
|
94
94
|
|
|
95
95
|
**Where DuckDB is misleading.** Per-PK-probe single-row OLTP queries (W9) are SQLite's home turf, not DuckDB's. The plan flags this as "apples-to-oranges"; we still publish the number because the directional comparison is informative.
|
|
96
96
|
|
|
@@ -100,29 +100,53 @@ The bench harness `Driver::query_one` / `query_all` paths route through `prepare
|
|
|
100
100
|
|
|
101
101
|
## Headline numbers
|
|
102
102
|
|
|
103
|
-
Median latency from the
|
|
103
|
+
Median latency from the post-SQLR-23 pinned-host run — [`benchmarks/results/2026-05-08-apple-ac84d560.json`](../benchmarks/results/2026-05-08-apple-ac84d560.json), Apple M1 Pro / macOS 23.5.0, criterion defaults (3 s warm-up, 5 s measurement, 100 samples on light workloads / 10 samples on heavy ones — see the JSON envelope's per-sample `samples` field). Only medians here; the JSON carries 95 % CIs, mean, std-dev, ops/s.
|
|
104
104
|
|
|
105
105
|
| Workload | SQLRite | SQLite (WAL+NORMAL) | DuckDB | Notes |
|
|
106
106
|
|---|---|---|---|---|
|
|
107
|
-
| **W1** read-by-PK |
|
|
108
|
-
| **W2** range-100 |
|
|
109
|
-
| **W2** range-1k |
|
|
110
|
-
| **W2** range-10k | 30.
|
|
111
|
-
| **W3** bulk insert (100k/txn) |
|
|
112
|
-
| **W4** single-row insert | 6.
|
|
113
|
-
| **W5** mixed OLTP |
|
|
114
|
-
| **W6** index lookup |
|
|
115
|
-
| **W7** SUM (1M rows) |
|
|
116
|
-
| **W8** GROUP BY card-10 |
|
|
117
|
-
| **W8** GROUP BY card-1k | 1.
|
|
118
|
-
| **W8** GROUP BY card-100k | _skipped_ |
|
|
119
|
-
| **W9** INNER JOIN (10k×10k) |
|
|
120
|
-
| **W10** vector top-10 (brute-force, 10k×384) |
|
|
121
|
-
| **W10** vector top-10 (HNSW) |
|
|
122
|
-
| **W11** BM25 top-10 (1k docs) |
|
|
123
|
-
| **W12** hybrid (1k docs) |
|
|
124
|
-
|
|
125
|
-
> The **canonical run** is [`benchmarks/results/2026-05-
|
|
107
|
+
| **W1** read-by-PK | 3.92 µs | 2.09 µs | — | ~1.9× — gap closed by SQLR-23 (was ~4.8× in v1) |
|
|
108
|
+
| **W2** range-100 | 24.27 ms | 66.62 µs | — | ~364× — full-scan vs index range probe |
|
|
109
|
+
| **W2** range-1k | 26.64 ms | 649.30 µs | — | ~41× |
|
|
110
|
+
| **W2** range-10k | 30.73 ms | 7.01 ms | — | ~4.4× — converges as scan dominates |
|
|
111
|
+
| **W3** bulk insert (100k/txn) | 606.20 ms | 183.96 ms | — | ~3.3× — 100k INSERT plan parsed once, not per-row (was ~6.2× in v1) |
|
|
112
|
+
| **W4** single-row insert | 6.57 ms | 11.35 µs | — | **~579× ⚠️** SQLR-18 |
|
|
113
|
+
| **W5** mixed OLTP | 58.00 ms | 9.65 µs | — | **~6,010× ⚠️** SQLR-18 |
|
|
114
|
+
| **W6** index lookup | 4.04 µs | 2.56 µs | — | ~1.6× — gap closed by SQLR-23 (was ~4.2× in v1) |
|
|
115
|
+
| **W7** SUM (1M rows) | 103.62 ms | 31.57 ms | 478.78 µs | DuckDB ~66× faster than SQLite |
|
|
116
|
+
| **W8** GROUP BY card-10 | 197.32 ms | 366.52 ms | 949.75 µs | DuckDB ~386× faster than SQLite |
|
|
117
|
+
| **W8** GROUP BY card-1k | 1.380 s | 240.64 ms | 1.039 ms | DuckDB ~232× faster than SQLite |
|
|
118
|
+
| **W8** GROUP BY card-100k | _skipped_ | 239.72 ms | 22.93 ms | **SQLRite skipped ⚠️** SQLR-19; DuckDB ~10× faster than SQLite |
|
|
119
|
+
| **W9** INNER JOIN (10k×10k) | 30.30 s | 2.16 µs | 484.97 µs | **~14M× ⚠️** SQLR-20; DuckDB ~225× slower than SQLite (analytical-engine OLTP weakness) |
|
|
120
|
+
| **W10** vector top-10 (brute-force, 10k×384) | 120.88 ms | — | — | compute-bound; modest ~13% drop vs v1 |
|
|
121
|
+
| **W10** vector top-10 (HNSW) | **2.40 ms** | — | — | **~53× faster than v1** ⭐ — SQLR-23 + SQLR-28 unmasked the index; HNSW now ~50× faster than brute-force |
|
|
122
|
+
| **W11** BM25 top-10 (1k docs) | 501.63 µs | 23.65 µs | — | ~21× — `fts_match` / `bm25_score` no longer re-parsed (was ~43× in v1) |
|
|
123
|
+
| **W12** hybrid (1k docs) | 607.90 µs | — | — | RAG headline (~15% faster than v1) |
|
|
124
|
+
|
|
125
|
+
> The **canonical v2 run** is [`benchmarks/results/2026-05-08-apple-ac84d560.json`](../benchmarks/results/2026-05-08-apple-ac84d560.json). It supersedes the v1 baseline (table below) end-to-end: every workload was rerun on the same canonical Apple M1 Pro host after [SQLR-23](https://github.com/joaoh82/rust_sqlite/pulls?q=SQLR-23) bumped `WorkloadId.version` from `v1 → v2` in lockstep (W10 → `v3` after [SQLR-28](https://github.com/joaoh82/rust_sqlite/pulls?q=SQLR-28) widened the HNSW probe to cosine + dot). The `dirty=true` flag reflects the working-tree state at run time (this doc update + the new envelope itself uncommitted); the **measurements themselves only depend on the bench binary**, which was built from the clean `ac84d560` tip. Subsequent official runs land alongside this file with their own date / host / commit.
|
|
126
|
+
|
|
127
|
+
### Historical (v1, retired)
|
|
128
|
+
|
|
129
|
+
The pre-SQLR-23 baseline from [`benchmarks/results/2026-05-07-apple-9ffd55a5.json`](../benchmarks/results/2026-05-07-apple-9ffd55a5.json), retained so the methodology shift is visible. The v1→v2 jump is not an algorithmic improvement — it's the bench-driver methodology change (per-iter `inline_params` → `prepare_cached` + bound `?` parameters; `Value::Vector` for HNSW-eligible KNN). Cross-version comparisons (`W1.v1` vs `W1.v2`) are flagged in the comparison script per Q8; the [`compare.py`](../benchmarks/scripts/compare.py) v1↔v2 report walks each one.
|
|
130
|
+
|
|
131
|
+
| Workload | SQLRite (v1) | SQLite (v1) | DuckDB (v1) |
|
|
132
|
+
|---|---|---|---|
|
|
133
|
+
| **W1** read-by-PK | 9.87 µs | 2.05 µs | — |
|
|
134
|
+
| **W2** range-100 | 23.99 ms | 60.50 µs | — |
|
|
135
|
+
| **W2** range-1k | 24.92 ms | 585.21 µs | — |
|
|
136
|
+
| **W2** range-10k | 30.15 ms | 6.24 ms | — |
|
|
137
|
+
| **W3** bulk insert (100k/txn) | 1.029 s | 166.43 ms | — |
|
|
138
|
+
| **W4** single-row insert | 6.76 ms | 9.78 µs | — |
|
|
139
|
+
| **W5** mixed OLTP | 55.63 ms | 9.96 µs | — |
|
|
140
|
+
| **W6** index lookup | 10.45 µs | 2.50 µs | — |
|
|
141
|
+
| **W7** SUM (1M rows) | 109.47 ms | 31.14 ms | 468.74 µs |
|
|
142
|
+
| **W8** GROUP BY card-10 | 201.80 ms | 438.09 ms | 761.40 µs |
|
|
143
|
+
| **W8** GROUP BY card-1k | 1.372 s | 251.13 ms | 871.80 µs |
|
|
144
|
+
| **W8** GROUP BY card-100k | _skipped_ | 238.96 ms | 19.58 ms |
|
|
145
|
+
| **W9** INNER JOIN (10k×10k) | 34.25 s | 2.23 µs | 699.23 µs |
|
|
146
|
+
| **W10** brute-force | 138.66 ms | — | — |
|
|
147
|
+
| **W10** HNSW | 126.81 ms | — | — |
|
|
148
|
+
| **W11** BM25 top-10 (1k docs) | 1.079 ms | 25.03 µs | — |
|
|
149
|
+
| **W12** hybrid (1k docs) | 713.53 µs | — | — |
|
|
126
150
|
|
|
127
151
|
---
|
|
128
152
|
|
|
@@ -205,7 +205,7 @@ Format-version side effect: a save that produces a non-empty freelist promotes t
|
|
|
205
205
|
|
|
206
206
|
After SQLR-6, the file still required a manual `VACUUM;` to actually shrink — the freelist absorbed orphan pages but the high-water mark stayed put. SQLR-10 adds a heuristic that fires `vacuum_database` automatically after a page-releasing DDL (`DROP TABLE`, `DROP INDEX`, `ALTER TABLE DROP COLUMN`) when the freelist exceeds a configurable fraction of `page_count`.
|
|
207
207
|
|
|
208
|
-
Configuration lives on `Database::auto_vacuum_threshold: Option<f32>` and is exposed at the connection level via `Connection::set_auto_vacuum_threshold` / `auto_vacuum_threshold
|
|
208
|
+
Configuration lives on `Database::auto_vacuum_threshold: Option<f32>` and is exposed at the connection level via `Connection::set_auto_vacuum_threshold` / `auto_vacuum_threshold`, and via SQL through `PRAGMA auto_vacuum` (SQLR-13 — see [`src/sql/pragma.rs`](../src/sql/pragma.rs)). Defaults: `Some(0.25)` (SQLite parity at 25%); pass `None` (or `PRAGMA auto_vacuum = OFF`) to opt out per connection. The threshold is per-`Connection` runtime state and is not persisted in the file header — every reopen starts at the default.
|
|
209
209
|
|
|
210
210
|
The trigger lives at the end of [`process_command_with_render`](../src/sql/mod.rs), immediately after the auto-save. Order matters: the freelist isn't accurate until the bottom-up rebuild runs during save, so we save first, then check the ratio. The check itself is `freelist::should_auto_vacuum(pager, threshold)`, which:
|
|
211
211
|
|
|
@@ -163,6 +163,7 @@ SELECT id, title FROM docs ORDER BY embedding <-> [0.1, ...] LIMIT 10;
|
|
|
163
163
|
> - **✅ 7d.1 — Pure HNSW algorithm** *(~700 LOC, shipped in v0.1.13).* `src/sql/hnsw.rs` standalone module: insert + search + layer assignment + beam search per layer + L2/cosine/dot distance dispatch. No SQL integration yet — vectors are passed in via a `get_vec` closure so the algorithm doesn't depend on table types. Tests verify recall@k ≥ 0.95 vs brute-force on randomly-generated vector sets; deterministic via a fixed RNG seed.
|
|
164
164
|
> - **✅ 7d.2 — SQL integration** *(~500 LOC).* `CREATE INDEX … USING hnsw (col)` parser + engine, INSERT wiring (also calls `hnsw.insert()` incrementally), query optimizer hook (recognizes `ORDER BY vec_distance_l2(col, literal) LIMIT k` and probes the HNSW instead of full-scanning). HNSW lives in memory only at this point; the **CREATE INDEX SQL persists in `sqlrite_master` and reopen rebuilds the graph from current rows** — partial persistence ahead of 7d.3. DELETE/UPDATE on HNSW-indexed tables refused with helpful error pointing at 7d.3.
|
|
165
165
|
> - **✅ 7d.3 — Persistence** *(~600 LOC).* New `KIND_HNSW` cell tag and `HnswNodeCell` encoding (varint node_id + per-layer neighbor lists). Each HNSW index gets its own page tree parallel to secondary indexes. Open path loads cells directly into `HnswIndex::from_persisted_nodes` — no algorithm runs, exact bit-for-bit reproduction. Also unblocks DELETE / UPDATE on HNSW-indexed tables: those mark the index `needs_rebuild`, save rebuilds from current rows before staging. ~2× the original 300-LOC estimate because the cell encoding + tests + rebuild path together added more than expected.
|
|
166
|
+
> - **✅ 7d.4 (SQLR-28) — Per-index distance metric.** Q2's "deferred per-index metric knob" lands as `CREATE INDEX … USING hnsw (col) WITH (metric = '<l2|cosine|dot>')`. The metric is stored on `HnswIndexEntry` and round-tripped via the synthesized CREATE INDEX SQL in `sqlrite_master` (no file-format bump — pre-SQLR-28 rows omit the WITH clause and decode as L2). The optimizer's `try_hnsw_probe` widens to all three `vec_distance_*` functions but only fires when the query function matches the index's metric; mismatches fall through to brute-force. Surfaced by the SQLR-23 v2 bench: W10 uses cosine, the optimizer was L2-only, and the HNSW variant had been silently brute-forcing the entire time. SQLR-25 (republish v2 numbers) was the gating consumer.
|
|
166
167
|
>
|
|
167
168
|
> Each 7d.x ships as its own PR + release wave. The user-facing value lands at 7d.2; 7d.3 closes the persistence loop. 7d.1 is foundational but ships a tested algorithmic primitive on its own — useful as documentation of the engine's "from scratch" theme.
|
|
168
169
|
|
|
@@ -368,12 +369,12 @@ Q1–Q10 were resolved by the project owner on 2026-04-26. Each question keeps i
|
|
|
368
369
|
|
|
369
370
|
### Q2. HNSW parameters: fixed defaults or per-index configurable?
|
|
370
371
|
|
|
371
|
-
> **Decided: fixed defaults** (`M=16, ef_construction=200, ef_search=50`).
|
|
372
|
+
> **Decided: fixed defaults** (`M=16, ef_construction=200, ef_search=50`) for the algorithmic knobs. **Distance metric** *did* land as a per-index `WITH (metric = '<l2|cosine|dot>')` clause in **SQLR-28 / sub-phase 7d.4** — see the 7d split note above. Was deferred from the original 7d.2 cut; surfaced as a gap by the SQLR-23 v2 bench, where W10's cosine query had been silently brute-forcing because the optimizer hook was L2-only.
|
|
372
373
|
|
|
373
374
|
- **Fixed:** `M=16, ef_construction=200, ef_search=50`. Simpler API, less to test. Matches sqlite-vec's defaults.
|
|
374
375
|
- **Configurable:** `CREATE INDEX … USING hnsw (col) WITH (m=32, ef_construction=400)`. Power-user knobs, more code, more test matrix.
|
|
375
376
|
|
|
376
|
-
**Recommendation:** fixed defaults for MVP. Configurable can land as a follow-up if anyone actually asks.
|
|
377
|
+
**Recommendation:** fixed defaults for MVP. Configurable can land as a follow-up if anyone actually asks. (`metric` already came back as a follow-up; `m` / `ef_*` haven't been requested yet.)
|
|
377
378
|
|
|
378
379
|
### Q3. JSON storage format
|
|
379
380
|
|
|
@@ -113,15 +113,18 @@ These are full-citizen indexes — they're visible via `.tables`-adjacent catalo
|
|
|
113
113
|
### HNSW indexes (Phase 7d)
|
|
114
114
|
|
|
115
115
|
```sql
|
|
116
|
-
CREATE INDEX <name> ON <table> USING hnsw (<vector_column>)
|
|
116
|
+
CREATE INDEX <name> ON <table> USING hnsw (<vector_column>)
|
|
117
|
+
[WITH (metric = '<l2|cosine|dot>')];
|
|
117
118
|
```
|
|
118
119
|
|
|
119
|
-
Builds an [HNSW](https://arxiv.org/abs/1603.09320) approximate-nearest-neighbor index over a `VECTOR(N)` column. The query optimizer recognizes `ORDER BY vec_distance_l2(col, literal) LIMIT k` (or the cosine / dot variants) on an HNSW-indexed column and probes the graph instead of full-scanning. SQLR-23 — the second arg can be either an inline `[...]` literal *or* a bound `Value::Vector(...)` parameter via `Statement::query_with_params`; the optimizer recognizes both, so prepared-statement KNN queries still take the graph shortcut.
|
|
120
|
+
Builds an [HNSW](https://arxiv.org/abs/1603.09320) approximate-nearest-neighbor index over a `VECTOR(N)` column. The query optimizer recognizes `ORDER BY vec_distance_l2(col, literal) LIMIT k` (or the cosine / dot variants) on an HNSW-indexed column **whose metric matches the query's distance function**, and probes the graph instead of full-scanning. SQLR-23 — the second arg can be either an inline `[...]` literal *or* a bound `Value::Vector(...)` parameter via `Statement::query_with_params`; the optimizer recognizes both, so prepared-statement KNN queries still take the graph shortcut.
|
|
120
121
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
-
|
|
124
|
-
-
|
|
122
|
+
The `WITH (metric = '…')` clause picks the distance the graph is built for. Three values are recognized: `'l2'` (Euclidean — the default, also accepts `'euclidean'`), `'cosine'`, and `'dot'` (negated dot-product — also accepts `'inner_product'` / `'ip'`). Omitting the clause is equivalent to `metric = 'l2'`, so pre-SQLR-28 catalogs round-trip unchanged. **The metric is not a query-time choice** — the graph topology depends on the metric used during INSERT (neighbour pruning is metric-specific), so a query whose `vec_distance_*` function doesn't match the index's metric falls through to brute-force rather than getting a wrong answer back from the graph. If you need both L2 and cosine probes on the same column, create two indexes.
|
|
123
|
+
|
|
124
|
+
- Recall@10 ≥ 0.95 at default parameters (`M=16`, `ef_construction=200`, `ef_search=50`). The `M` / `ef_*` knobs aren't tunable from SQL yet — see Q2 of [`docs/phase-7-plan.md`](phase-7-plan.md).
|
|
125
|
+
- The index is built incrementally on `INSERT`. `DELETE` / `UPDATE` mark the index `needs_rebuild`; the next save rebuilds from current rows under the same metric.
|
|
126
|
+
- Persisted as a `KIND_HNSW` cell tree alongside the regular page hierarchy — open path loads the graph bit-for-bit, no algorithm runs. The metric travels through the synthesized CREATE INDEX SQL in `sqlrite_master`; no file-format bump.
|
|
127
|
+
- Without an HNSW index — or with a metric mismatch — the same `ORDER BY vec_distance_… LIMIT k` query still works; it just brute-force-scans every row (Phase 7c's bounded-heap top-k optimization keeps the memory footprint to O(k)).
|
|
125
128
|
|
|
126
129
|
### FTS indexes (Phase 8)
|
|
127
130
|
|
|
@@ -548,7 +551,16 @@ conn.set_auto_vacuum_threshold(Some(0.5))?; // fire only when freelist > 50%
|
|
|
548
551
|
conn.set_auto_vacuum_threshold(None)?; // disable entirely (manual VACUUM only)
|
|
549
552
|
```
|
|
550
553
|
|
|
551
|
-
|
|
554
|
+
…or via SQL (SQLR-13), which is the path SDK / FFI / MCP consumers reach for since they can't call the Rust setter directly:
|
|
555
|
+
|
|
556
|
+
```sql
|
|
557
|
+
PRAGMA auto_vacuum; -- read; renders a single-row result set
|
|
558
|
+
PRAGMA auto_vacuum = 0.5; -- arm the trigger at 50%
|
|
559
|
+
PRAGMA auto_vacuum = 0; -- arm at 0% (compact on any released page)
|
|
560
|
+
PRAGMA auto_vacuum = OFF; -- disable; equivalent: NONE, 'OFF', 'NONE'
|
|
561
|
+
```
|
|
562
|
+
|
|
563
|
+
Out-of-range values (anything outside `0.0..=1.0`, `NaN`, `±∞`) and unknown identifiers like `WAL` / `FULL` are rejected with a typed error — the trigger never silently saturates or falls back to a default. The setting is per-`Connection` runtime state — it's not persisted in the file header, so every reopen starts at the default `Some(0.25)`.
|
|
552
564
|
|
|
553
565
|
---
|
|
554
566
|
|
|
@@ -618,7 +630,7 @@ For context when you hit `NotImplemented`. See [Roadmap](roadmap.md) for when th
|
|
|
618
630
|
|
|
619
631
|
### Session / schema
|
|
620
632
|
- Multiple attached databases (`ATTACH DATABASE`, `DETACH DATABASE`)
|
|
621
|
-
- `PRAGMA` statements
|
|
633
|
+
- `PRAGMA` statements other than `auto_vacuum` (SQLR-13). The dispatcher is in place — adding a pragma is a single arm in `execute_pragma`. `journal_mode`, `synchronous`, `cache_size`, etc. are not yet wired up
|
|
622
634
|
- `REPLACE INTO`, `INSERT OR IGNORE`, `INSERT OR REPLACE` (conflict-resolution clauses)
|
|
623
635
|
|
|
624
636
|
---
|
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sqlrite"
|
|
7
|
-
version = "0.9.
|
|
7
|
+
version = "0.9.1"
|
|
8
8
|
description = "Python bindings for SQLRite — a small, embeddable SQLite clone written in Rust."
|
|
9
9
|
authors = [{ name = "Joao Henrique Machado Silva", email = "joaoh82@gmail.com" }]
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
# Published to crates.io as `sqlrite-ask`. Joins the lockstep release
|
|
11
11
|
# wave (`sqlrite-ask-vX.Y.Z` tag) — see `docs/release-plan.md`.
|
|
12
12
|
name = "sqlrite-ask"
|
|
13
|
-
version = "0.9.
|
|
13
|
+
version = "0.9.1"
|
|
14
14
|
authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
|
|
15
15
|
edition = "2024"
|
|
16
16
|
rust-version = "1.85"
|
|
@@ -54,8 +54,8 @@ use std::collections::VecDeque;
|
|
|
54
54
|
use std::path::Path;
|
|
55
55
|
use std::sync::Arc;
|
|
56
56
|
|
|
57
|
+
use crate::sql::dialect::SqlriteDialect;
|
|
57
58
|
use sqlparser::ast::Statement as AstStatement;
|
|
58
|
-
use sqlparser::dialect::SQLiteDialect;
|
|
59
59
|
use sqlparser::parser::Parser;
|
|
60
60
|
|
|
61
61
|
use crate::error::{Result, SQLRiteError};
|
|
@@ -320,7 +320,7 @@ struct CachedPlan {
|
|
|
320
320
|
|
|
321
321
|
impl CachedPlan {
|
|
322
322
|
fn compile(sql: &str) -> Result<Self> {
|
|
323
|
-
let dialect =
|
|
323
|
+
let dialect = SqlriteDialect::new();
|
|
324
324
|
let mut ast = Parser::parse_sql(&dialect, sql).map_err(SQLRiteError::from)?;
|
|
325
325
|
let Some(mut stmt) = ast.pop() else {
|
|
326
326
|
return Err(SQLRiteError::General("no statement to prepare".to_string()));
|
|
@@ -1115,6 +1115,158 @@ mod tests {
|
|
|
1115
1115
|
assert_eq!(rows[0].get::<i64>(0).unwrap(), 1);
|
|
1116
1116
|
}
|
|
1117
1117
|
|
|
1118
|
+
/// SQLR-28 — cosine probe: an HNSW index built `WITH (metric =
|
|
1119
|
+
/// 'cosine')` must serve `ORDER BY vec_distance_cosine(col, [...])`
|
|
1120
|
+
/// from the graph. Self-query: querying for one of the corpus's
|
|
1121
|
+
/// own vectors must come back as the nearest under cosine
|
|
1122
|
+
/// distance.
|
|
1123
|
+
#[test]
|
|
1124
|
+
fn cosine_self_query_through_hnsw_optimizer() {
|
|
1125
|
+
let mut conn = Connection::open_in_memory().unwrap();
|
|
1126
|
+
conn.execute("CREATE TABLE v (id INTEGER PRIMARY KEY, e VECTOR(4));")
|
|
1127
|
+
.unwrap();
|
|
1128
|
+
let corpus: [(i64, [f32; 4]); 5] = [
|
|
1129
|
+
(1, [1.0, 0.0, 0.0, 0.0]),
|
|
1130
|
+
(2, [0.0, 1.0, 0.0, 0.0]),
|
|
1131
|
+
(3, [0.0, 0.0, 1.0, 0.0]),
|
|
1132
|
+
(4, [0.0, 0.0, 0.0, 1.0]),
|
|
1133
|
+
(5, [0.5, 0.5, 0.5, 0.5]),
|
|
1134
|
+
];
|
|
1135
|
+
for (id, vec) in corpus {
|
|
1136
|
+
conn.execute(&format!(
|
|
1137
|
+
"INSERT INTO v (id, e) VALUES ({id}, [{}, {}, {}, {}]);",
|
|
1138
|
+
vec[0], vec[1], vec[2], vec[3]
|
|
1139
|
+
))
|
|
1140
|
+
.unwrap();
|
|
1141
|
+
}
|
|
1142
|
+
conn.execute("CREATE INDEX v_hnsw ON v USING hnsw (e) WITH (metric = 'cosine');")
|
|
1143
|
+
.unwrap();
|
|
1144
|
+
|
|
1145
|
+
// Self-query for id=2's vector — expected nearest under cosine
|
|
1146
|
+
// distance is id=2 itself (cos distance 0).
|
|
1147
|
+
let rows = conn
|
|
1148
|
+
.prepare("SELECT id FROM v ORDER BY vec_distance_cosine(e, [0.0, 1.0, 0.0, 0.0]) ASC LIMIT 1")
|
|
1149
|
+
.unwrap()
|
|
1150
|
+
.query_with_params(&[])
|
|
1151
|
+
.unwrap()
|
|
1152
|
+
.collect_all()
|
|
1153
|
+
.unwrap();
|
|
1154
|
+
assert_eq!(rows.len(), 1);
|
|
1155
|
+
assert_eq!(rows[0].get::<i64>(0).unwrap(), 2);
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
/// SQLR-28 — dot probe: same shape as the cosine test, but the
|
|
1159
|
+
/// index is built `WITH (metric = 'dot')` and the query uses
|
|
1160
|
+
/// `vec_distance_dot`. Confirms the third metric variant lights up
|
|
1161
|
+
/// the graph shortcut, not just l2 / cosine.
|
|
1162
|
+
#[test]
|
|
1163
|
+
fn dot_self_query_through_hnsw_optimizer() {
|
|
1164
|
+
let mut conn = Connection::open_in_memory().unwrap();
|
|
1165
|
+
conn.execute("CREATE TABLE v (id INTEGER PRIMARY KEY, e VECTOR(3));")
|
|
1166
|
+
.unwrap();
|
|
1167
|
+
// Data: distinguishable magnitudes so the dot metric resolves
|
|
1168
|
+
// a clear winner. `vec_distance_dot(a, b) = -(a·b)` — smaller
|
|
1169
|
+
// (more negative) is closer.
|
|
1170
|
+
let corpus: [(i64, [f32; 3]); 4] = [
|
|
1171
|
+
(1, [1.0, 0.0, 0.0]),
|
|
1172
|
+
(2, [2.0, 0.0, 0.0]),
|
|
1173
|
+
(3, [0.0, 1.0, 0.0]),
|
|
1174
|
+
(4, [0.0, 0.0, 1.0]),
|
|
1175
|
+
];
|
|
1176
|
+
for (id, vec) in corpus {
|
|
1177
|
+
conn.execute(&format!(
|
|
1178
|
+
"INSERT INTO v (id, e) VALUES ({id}, [{}, {}, {}]);",
|
|
1179
|
+
vec[0], vec[1], vec[2]
|
|
1180
|
+
))
|
|
1181
|
+
.unwrap();
|
|
1182
|
+
}
|
|
1183
|
+
conn.execute("CREATE INDEX v_hnsw ON v USING hnsw (e) WITH (metric = 'dot');")
|
|
1184
|
+
.unwrap();
|
|
1185
|
+
|
|
1186
|
+
// Query [3, 0, 0]: dot products are 3, 6, 0, 0 → distances
|
|
1187
|
+
// -3, -6, 0, 0. id=2 has the smallest (most negative) distance.
|
|
1188
|
+
let rows = conn
|
|
1189
|
+
.prepare("SELECT id FROM v ORDER BY vec_distance_dot(e, [3.0, 0.0, 0.0]) ASC LIMIT 1")
|
|
1190
|
+
.unwrap()
|
|
1191
|
+
.query_with_params(&[])
|
|
1192
|
+
.unwrap()
|
|
1193
|
+
.collect_all()
|
|
1194
|
+
.unwrap();
|
|
1195
|
+
assert_eq!(rows.len(), 1);
|
|
1196
|
+
assert_eq!(rows[0].get::<i64>(0).unwrap(), 2);
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
/// SQLR-28 — metric mismatch must NOT take the graph shortcut.
|
|
1200
|
+
/// An L2-built index queried with `vec_distance_cosine` falls
|
|
1201
|
+
/// through to brute-force, which still returns the correct
|
|
1202
|
+
/// answer. We confirm the answer is correct; the slow-path
|
|
1203
|
+
/// behaviour itself is implicit (no error, no panic, no wrong
|
|
1204
|
+
/// result), which is the user-visible contract that matters.
|
|
1205
|
+
#[test]
|
|
1206
|
+
fn metric_mismatch_falls_back_to_brute_force() {
|
|
1207
|
+
let mut conn = Connection::open_in_memory().unwrap();
|
|
1208
|
+
conn.execute("CREATE TABLE v (id INTEGER PRIMARY KEY, e VECTOR(2));")
|
|
1209
|
+
.unwrap();
|
|
1210
|
+
let half_sqrt2 = std::f32::consts::FRAC_1_SQRT_2;
|
|
1211
|
+
let corpus: [(i64, [f32; 2]); 3] = [
|
|
1212
|
+
(1, [1.0, 0.0]),
|
|
1213
|
+
(2, [half_sqrt2, half_sqrt2]),
|
|
1214
|
+
(3, [0.0, 1.0]),
|
|
1215
|
+
];
|
|
1216
|
+
for (id, vec) in corpus {
|
|
1217
|
+
conn.execute(&format!(
|
|
1218
|
+
"INSERT INTO v (id, e) VALUES ({id}, [{}, {}]);",
|
|
1219
|
+
vec[0], vec[1]
|
|
1220
|
+
))
|
|
1221
|
+
.unwrap();
|
|
1222
|
+
}
|
|
1223
|
+
// Default L2 index — no WITH clause.
|
|
1224
|
+
conn.execute("CREATE INDEX v_hnsw_l2 ON v USING hnsw (e);")
|
|
1225
|
+
.unwrap();
|
|
1226
|
+
|
|
1227
|
+
// Query with cosine. Index can't help; brute-force still
|
|
1228
|
+
// returns the correct nearest by cosine: id=1 (cos dist 0).
|
|
1229
|
+
let rows = conn
|
|
1230
|
+
.prepare("SELECT id FROM v ORDER BY vec_distance_cosine(e, [1.0, 0.0]) ASC LIMIT 1")
|
|
1231
|
+
.unwrap()
|
|
1232
|
+
.query_with_params(&[])
|
|
1233
|
+
.unwrap()
|
|
1234
|
+
.collect_all()
|
|
1235
|
+
.unwrap();
|
|
1236
|
+
assert_eq!(rows.len(), 1);
|
|
1237
|
+
assert_eq!(rows[0].get::<i64>(0).unwrap(), 1);
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
/// SQLR-28 — a typo in the metric name must error at CREATE INDEX
|
|
1241
|
+
/// time. Falling back to L2 silently is the bug we're fixing here,
|
|
1242
|
+
/// not the behaviour to preserve.
|
|
1243
|
+
#[test]
|
|
1244
|
+
fn unknown_metric_name_is_rejected() {
|
|
1245
|
+
let mut conn = Connection::open_in_memory().unwrap();
|
|
1246
|
+
conn.execute("CREATE TABLE v (id INTEGER PRIMARY KEY, e VECTOR(2));")
|
|
1247
|
+
.unwrap();
|
|
1248
|
+
let err = conn
|
|
1249
|
+
.execute("CREATE INDEX bad ON v USING hnsw (e) WITH (metric = 'cosin');")
|
|
1250
|
+
.unwrap_err();
|
|
1251
|
+
let msg = format!("{err}");
|
|
1252
|
+
assert!(msg.contains("unknown HNSW metric"), "got: {msg}");
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
/// SQLR-28 — WITH options on a non-HNSW index must error rather
|
|
1256
|
+
/// than be silently ignored. An option that has no effect on the
|
|
1257
|
+
/// resulting index is a footgun.
|
|
1258
|
+
#[test]
|
|
1259
|
+
fn with_metric_on_btree_is_rejected() {
|
|
1260
|
+
let mut conn = Connection::open_in_memory().unwrap();
|
|
1261
|
+
conn.execute("CREATE TABLE t (a INTEGER PRIMARY KEY, b TEXT);")
|
|
1262
|
+
.unwrap();
|
|
1263
|
+
let err = conn
|
|
1264
|
+
.execute("CREATE INDEX bad ON t (b) WITH (metric = 'cosine');")
|
|
1265
|
+
.unwrap_err();
|
|
1266
|
+
let msg = format!("{err}");
|
|
1267
|
+
assert!(msg.contains("doesn't support any options"), "got: {msg}");
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1118
1270
|
#[test]
|
|
1119
1271
|
fn prepare_cached_executes_the_same_as_prepare() {
|
|
1120
1272
|
let mut conn = Connection::open_in_memory().unwrap();
|
|
@@ -198,8 +198,8 @@ impl Database {
|
|
|
198
198
|
#[cfg(test)]
|
|
199
199
|
mod tests {
|
|
200
200
|
use super::*;
|
|
201
|
+
use crate::sql::dialect::SqlriteDialect;
|
|
201
202
|
use crate::sql::parser::create::CreateQuery;
|
|
202
|
-
use sqlparser::dialect::SQLiteDialect;
|
|
203
203
|
use sqlparser::parser::Parser;
|
|
204
204
|
|
|
205
205
|
#[test]
|
|
@@ -220,7 +220,7 @@ mod tests {
|
|
|
220
220
|
last_name TEXT NOT NULl,
|
|
221
221
|
email TEXT NOT NULL UNIQUE
|
|
222
222
|
);";
|
|
223
|
-
let dialect =
|
|
223
|
+
let dialect = SqlriteDialect::new();
|
|
224
224
|
let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
|
|
225
225
|
if ast.len() > 1 {
|
|
226
226
|
panic!("Expected a single query statement, but there are more then 1.")
|
|
@@ -246,7 +246,7 @@ mod tests {
|
|
|
246
246
|
last_name TEXT NOT NULl,
|
|
247
247
|
email TEXT NOT NULL UNIQUE
|
|
248
248
|
);";
|
|
249
|
-
let dialect =
|
|
249
|
+
let dialect = SqlriteDialect::new();
|
|
250
250
|
let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
|
|
251
251
|
if ast.len() > 1 {
|
|
252
252
|
panic!("Expected a single query statement, but there are more then 1.")
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
use crate::error::{Result, SQLRiteError};
|
|
2
2
|
use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
|
|
3
3
|
use crate::sql::fts::PostingList;
|
|
4
|
-
use crate::sql::hnsw::HnswIndex;
|
|
4
|
+
use crate::sql::hnsw::{DistanceMetric, HnswIndex};
|
|
5
5
|
use crate::sql::parser::create::{CreateQuery, ParsedColumn};
|
|
6
6
|
use std::collections::{BTreeMap, HashMap};
|
|
7
7
|
use std::fmt;
|
|
@@ -150,10 +150,11 @@ pub struct Table {
|
|
|
150
150
|
pub primary_key: String,
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
/// One HNSW index attached to a table.
|
|
154
|
-
///
|
|
155
|
-
///
|
|
156
|
-
///
|
|
153
|
+
/// One HNSW index attached to a table. The distance metric is fixed
|
|
154
|
+
/// at CREATE INDEX time via `USING hnsw (col) WITH (metric = '<m>')`
|
|
155
|
+
/// (`l2` / `cosine` / `dot`); omitting the WITH clause defaults to L2,
|
|
156
|
+
/// matching the pre-SQLR-28 behaviour for round-tripping older
|
|
157
|
+
/// `sqlrite_master` rows that didn't carry a metric.
|
|
157
158
|
#[derive(Debug, Clone)]
|
|
158
159
|
pub struct HnswIndexEntry {
|
|
159
160
|
/// User-supplied name from `CREATE INDEX <name> …`. Unique across
|
|
@@ -161,6 +162,13 @@ pub struct HnswIndexEntry {
|
|
|
161
162
|
pub name: String,
|
|
162
163
|
/// The VECTOR column this index covers.
|
|
163
164
|
pub column_name: String,
|
|
165
|
+
/// Distance metric the graph was built for. The optimizer's HNSW
|
|
166
|
+
/// shortcut only fires when the query's `vec_distance_*` function
|
|
167
|
+
/// matches this metric — picking a non-matching distance falls
|
|
168
|
+
/// through to brute-force, since the graph topology is metric-
|
|
169
|
+
/// specific (an L2-pruned graph isn't a valid cosine search graph
|
|
170
|
+
/// in general, and vice versa).
|
|
171
|
+
pub metric: DistanceMetric,
|
|
164
172
|
/// The graph itself.
|
|
165
173
|
pub index: HnswIndex,
|
|
166
174
|
/// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
|
|
@@ -1628,7 +1636,7 @@ pub fn parse_vector_literal(s: &str) -> Result<Vec<f32>> {
|
|
|
1628
1636
|
#[cfg(test)]
|
|
1629
1637
|
mod tests {
|
|
1630
1638
|
use super::*;
|
|
1631
|
-
use
|
|
1639
|
+
use crate::sql::dialect::SqlriteDialect;
|
|
1632
1640
|
use sqlparser::parser::Parser;
|
|
1633
1641
|
|
|
1634
1642
|
#[test]
|
|
@@ -1766,7 +1774,7 @@ mod tests {
|
|
|
1766
1774
|
active BOOL,
|
|
1767
1775
|
score REAL
|
|
1768
1776
|
);";
|
|
1769
|
-
let dialect =
|
|
1777
|
+
let dialect = SqlriteDialect::new();
|
|
1770
1778
|
let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
|
|
1771
1779
|
if ast.len() > 1 {
|
|
1772
1780
|
panic!("Expected a single query statement, but there are more then 1.")
|
|
@@ -1802,7 +1810,7 @@ mod tests {
|
|
|
1802
1810
|
first_name TEXT NOT NULL,
|
|
1803
1811
|
last_name TEXT NOT NULl
|
|
1804
1812
|
);";
|
|
1805
|
-
let dialect =
|
|
1813
|
+
let dialect = SqlriteDialect::new();
|
|
1806
1814
|
let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
|
|
1807
1815
|
if ast.len() > 1 {
|
|
1808
1816
|
panic!("Expected a single query statement, but there are more then 1.")
|