sqlrite 0.1.14__tar.gz → 0.1.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlrite-0.1.14 → sqlrite-0.1.16}/Cargo.lock +7 -5
- {sqlrite-0.1.14 → sqlrite-0.1.16}/Cargo.toml +9 -1
- {sqlrite-0.1.14 → sqlrite-0.1.16}/PKG-INFO +1 -1
- {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/package.json +1 -1
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/phase-7-plan.md +10 -6
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/roadmap.md +2 -2
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/supported-sql.md +33 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/pyproject.toml +1 -1
- {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/python/Cargo.toml +1 -1
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/db/table.rs +55 -1
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/executor.rs +321 -39
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/hnsw.rs +44 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/mod.rs +250 -11
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/cell.rs +15 -0
- sqlrite-0.1.16/src/sql/pager/hnsw_cell.rs +258 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/mod.rs +392 -27
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/parser/create.rs +5 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/.github/workflows/ci.yml +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/.github/workflows/release-pr.yml +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/.github/workflows/release.yml +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/.github/workflows/rust.yml +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/.gitignore +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/CODE_OF_CONDUCT.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/LICENSE +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/MAINTAINERS +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/Makefile +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/README.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/index.html +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/package-lock.json +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/src/App.svelte +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/src/app.css +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/src/main.ts +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/src/vite-env.d.ts +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/svelte.config.js +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/tsconfig.json +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/vite.config.ts +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/_index.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/architecture.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/design-decisions.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/desktop.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/embedding.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/file-format.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/getting-started.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/pager.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/release-plan.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/release-secrets.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/smoke-test.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/sql-engine.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/storage-model.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/usage.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/README.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/c/Makefile +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/c/hello.c +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/go/go.mod +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/go/hello.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/nodejs/hello.mjs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/python/hello.py +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/rust/quickstart.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/wasm/Makefile +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/wasm/index.html +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite - Desktop.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite Data Structures.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite Simple SQL Execution High Level Diagram.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite Simple SQL INSERT Execution High Level Diagram (Insert Row).png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite Simple SQL INSERT Execution High Level Diagram.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite_logo.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/images/architecture.png +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/rust-toolchain.toml +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/AST.delete.example +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/AST.insert.exemple +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/AST.select.example +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/AST.update.example +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/CREATE TABLE sqlrite_schema.sql +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/CREATE_TABLE with duplicate.sql +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/CREATE_TABLE.sql +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/INSERT.sql +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/scripts/bump-version.sh +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/README.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/conn.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/go.mod +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/rows.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/sqlrite.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/sqlrite_test.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/stmt.go +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/python/README.md +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/python/src/lib.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/python/tests/test_sqlrite.py +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/connection.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/error.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/lib.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/main.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/meta_command/mod.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/repl/mod.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/db/database.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/db/mod.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/db/secondary_index.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/file.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/header.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/index_cell.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/interior_page.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/overflow.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/page.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/pager.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/table_page.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/varint.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/wal.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/parser/insert.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/parser/mod.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/parser/select.rs +0 -0
- {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/tokenizer.rs +0 -0
|
@@ -3511,6 +3511,7 @@ version = "1.0.149"
|
|
|
3511
3511
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3512
3512
|
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
|
|
3513
3513
|
dependencies = [
|
|
3514
|
+
"indexmap 2.14.0",
|
|
3514
3515
|
"itoa",
|
|
3515
3516
|
"memchr",
|
|
3516
3517
|
"serde",
|
|
@@ -3736,7 +3737,7 @@ dependencies = [
|
|
|
3736
3737
|
|
|
3737
3738
|
[[package]]
|
|
3738
3739
|
name = "sqlrite-desktop"
|
|
3739
|
-
version = "0.1.
|
|
3740
|
+
version = "0.1.16"
|
|
3740
3741
|
dependencies = [
|
|
3741
3742
|
"serde",
|
|
3742
3743
|
"serde_json",
|
|
@@ -3748,7 +3749,7 @@ dependencies = [
|
|
|
3748
3749
|
|
|
3749
3750
|
[[package]]
|
|
3750
3751
|
name = "sqlrite-engine"
|
|
3751
|
-
version = "0.1.
|
|
3752
|
+
version = "0.1.16"
|
|
3752
3753
|
dependencies = [
|
|
3753
3754
|
"clap",
|
|
3754
3755
|
"env_logger",
|
|
@@ -3757,13 +3758,14 @@ dependencies = [
|
|
|
3757
3758
|
"prettytable-rs",
|
|
3758
3759
|
"rustyline",
|
|
3759
3760
|
"rustyline-derive",
|
|
3761
|
+
"serde_json",
|
|
3760
3762
|
"sqlparser",
|
|
3761
3763
|
"thiserror 2.0.18",
|
|
3762
3764
|
]
|
|
3763
3765
|
|
|
3764
3766
|
[[package]]
|
|
3765
3767
|
name = "sqlrite-ffi"
|
|
3766
|
-
version = "0.1.
|
|
3768
|
+
version = "0.1.16"
|
|
3767
3769
|
dependencies = [
|
|
3768
3770
|
"cbindgen",
|
|
3769
3771
|
"sqlrite-engine",
|
|
@@ -3771,7 +3773,7 @@ dependencies = [
|
|
|
3771
3773
|
|
|
3772
3774
|
[[package]]
|
|
3773
3775
|
name = "sqlrite-nodejs"
|
|
3774
|
-
version = "0.1.
|
|
3776
|
+
version = "0.1.16"
|
|
3775
3777
|
dependencies = [
|
|
3776
3778
|
"napi",
|
|
3777
3779
|
"napi-build",
|
|
@@ -3781,7 +3783,7 @@ dependencies = [
|
|
|
3781
3783
|
|
|
3782
3784
|
[[package]]
|
|
3783
3785
|
name = "sqlrite-python"
|
|
3784
|
-
version = "0.1.
|
|
3786
|
+
version = "0.1.16"
|
|
3785
3787
|
dependencies = [
|
|
3786
3788
|
"pyo3",
|
|
3787
3789
|
"sqlrite-engine",
|
|
@@ -27,7 +27,7 @@ resolver = "3"
|
|
|
27
27
|
# `package =` key so the import name stays `sqlrite` internally:
|
|
28
28
|
# sqlrite = { package = "sqlrite-engine", path = "…" }
|
|
29
29
|
name = "sqlrite-engine"
|
|
30
|
-
version = "0.1.
|
|
30
|
+
version = "0.1.16"
|
|
31
31
|
authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
|
|
32
32
|
edition = "2024"
|
|
33
33
|
rust-version = "1.85"
|
|
@@ -82,6 +82,14 @@ log = "0.4"
|
|
|
82
82
|
sqlparser = "0.61"
|
|
83
83
|
thiserror = "2.0"
|
|
84
84
|
prettytable-rs = "0.10"
|
|
85
|
+
# Phase 7e: JSON column type. `serde_json` powers both the validation
|
|
86
|
+
# step at INSERT time (parse-and-discard to confirm the text is valid
|
|
87
|
+
# JSON) and the path extraction inside the json_extract / json_type
|
|
88
|
+
# / json_array_length / json_object_keys SQL functions. `preserve_order`
|
|
89
|
+
# keeps object keys in insertion order so json_object_keys output is
|
|
90
|
+
# stable; without it, BTreeMap-backed Maps would alphabetically sort,
|
|
91
|
+
# which surprises callers re-serializing the same JSON.
|
|
92
|
+
serde_json = { version = "1", features = ["preserve_order"] }
|
|
85
93
|
|
|
86
94
|
# CLI-only deps (feature-gated). `optional = true` + the `cli`
|
|
87
95
|
# feature above means these only land in the dep graph when
|
|
@@ -162,7 +162,7 @@ SELECT id, title FROM docs ORDER BY embedding <-> [0.1, ...] LIMIT 10;
|
|
|
162
162
|
>
|
|
163
163
|
> - **✅ 7d.1 — Pure HNSW algorithm** *(~700 LOC, shipped in v0.1.13).* `src/sql/hnsw.rs` standalone module: insert + search + layer assignment + beam search per layer + L2/cosine/dot distance dispatch. No SQL integration yet — vectors are passed in via a `get_vec` closure so the algorithm doesn't depend on table types. Tests verify recall@k ≥ 0.95 vs brute-force on randomly-generated vector sets; deterministic via a fixed RNG seed.
|
|
164
164
|
> - **✅ 7d.2 — SQL integration** *(~500 LOC).* `CREATE INDEX … USING hnsw (col)` parser + engine, INSERT wiring (also calls `hnsw.insert()` incrementally), query optimizer hook (recognizes `ORDER BY vec_distance_l2(col, literal) LIMIT k` and probes the HNSW instead of full-scanning). HNSW lives in memory only at this point; the **CREATE INDEX SQL persists in `sqlrite_master` and reopen rebuilds the graph from current rows** — partial persistence ahead of 7d.3. DELETE/UPDATE on HNSW-indexed tables refused with helpful error pointing at 7d.3.
|
|
165
|
-
> -
|
|
165
|
+
> - **✅ 7d.3 — Persistence** *(~600 LOC).* New `KIND_HNSW` cell tag and `HnswNodeCell` encoding (varint node_id + per-layer neighbor lists). Each HNSW index gets its own page tree parallel to secondary indexes. Open path loads cells directly into `HnswIndex::from_persisted_nodes` — no algorithm runs, exact bit-for-bit reproduction. Also unblocks DELETE / UPDATE on HNSW-indexed tables: those mark the index `needs_rebuild`, save rebuilds from current rows before staging. ~2× the original 300-LOC estimate because the cell encoding + tests + rebuild path together added more than expected.
|
|
166
166
|
>
|
|
167
167
|
> Each 7d.x ships as its own PR + release wave. The user-facing value lands at 7d.2; 7d.3 closes the persistence loop. 7d.1 is foundational but ships a tested algorithmic primitive on its own — useful as documentation of the engine's "from scratch" theme.
|
|
168
168
|
|
|
@@ -170,14 +170,14 @@ SELECT id, title FROM docs ORDER BY embedding <-> [0.1, ...] LIMIT 10;
|
|
|
170
170
|
|
|
171
171
|
---
|
|
172
172
|
|
|
173
|
-
### 7e — JSON column type + path queries
|
|
173
|
+
### ✅ 7e — JSON column type + path queries
|
|
174
174
|
|
|
175
|
-
**What.** New `JSON` data type.
|
|
175
|
+
**What.** New `JSON` data type. Stored as canonical UTF-8 text and validated at INSERT/UPDATE time via `serde_json::from_str`. The four path-extraction functions parse on demand:
|
|
176
176
|
|
|
177
177
|
- `json_extract(col, '$.path')` — returns the value at the path, NULL if absent
|
|
178
|
-
- `json_array_length(col, '$.path')` — array length, NULL for non-array
|
|
179
|
-
- `json_object_keys(col, '$.path')` —
|
|
180
|
-
- `json_type(col, '$.path')` — `'null'
|
|
178
|
+
- `json_array_length(col, '$.path')` — array length, NULL for non-array, errors for non-array-with-path-resolved
|
|
179
|
+
- `json_object_keys(col, '$.path')` — JSON-array text of keys (see scope-correction note in Q3 below; SQLite's set-returning shape requires features we don't have)
|
|
180
|
+
- `json_type(col, '$.path')` — `'null'` / `'true'` / `'false'` / `'integer'` / `'real'` / `'text'` / `'array'` / `'object'` (matches SQLite JSON1 conventions)
|
|
181
181
|
|
|
182
182
|
**Why this matters for AI-era specifically.** LLM tool-call outputs are JSON. RAG citation arrays are JSON. Agent scratchpads are JSON. Storing them as TEXT and re-parsing on every query is wasteful.
|
|
183
183
|
|
|
@@ -378,6 +378,10 @@ Q1–Q10 were resolved by the project owner on 2026-04-26. Each question keeps i
|
|
|
378
378
|
### Q3. JSON storage format
|
|
379
379
|
|
|
380
380
|
> **Decided: bincoded `serde_json::Value`** for the MVP. JSON indexing remains a future phase.
|
|
381
|
+
>
|
|
382
|
+
> **Scope correction (2026-04-28, during 7e implementation):** Q3's "bincoded `Value`" answer was settled before remembering that bincode was removed from the engine in Phase 3c (cell-based encoding replaced it). Rather than re-add bincode for one column type, **7e ships JSON-as-canonical-text** — same as SQLite's JSON1 extension. INSERT/UPDATE call `serde_json::from_str` to validate; the four `json_*` functions re-parse on demand. Trade-off: ~2× storage vs. binary, plus per-call parse overhead — both acceptable for MVP and consistent with SQLite's choice. JSONB-style binary indexing remains a future-phase optimization, but doesn't block 7e.
|
|
383
|
+
>
|
|
384
|
+
> One additional 7e divergence from the original plan: `json_object_keys` is supposed to be a *table-valued function* (one row per key, like SQLite's). We don't yet support set-returning functions in the executor, so 7e returns the keys as a JSON-array text instead. Caller can iterate via `json_array_length` + `json_extract` indexing. Documented in `docs/supported-sql.md` so users see the divergence up front.
|
|
381
385
|
|
|
382
386
|
- **bincoded `serde_json::Value`:** one-line implementation, fast read/write, opaque on disk.
|
|
383
387
|
- **Parsed AST as cell-encoded structure:** more code, but lets us index into JSON without a full deserialize.
|
|
@@ -473,8 +473,8 @@ Approved sub-phases (Q1–Q10 resolved):
|
|
|
473
473
|
- **✅ 7a — `VECTOR(N)` column type** *(v0.1.10)* — dense fixed-dimension f32 storage via the existing cell encoding; format bumped to v4. Bracket-array literal syntax `[0.1, 0.2, …]` (Q7).
|
|
474
474
|
- **✅ 7b — Distance functions** *(v0.1.11)* — `vec_distance_l2/cosine/dot`, plus the ORDER BY-expressions parser change so KNN queries work end-to-end. Operators (`<->` `<=>` `<#>`) deferred to **7b.1** — sqlparser doesn't parse them natively, contradicting Q6's "tiny parser change" assumption.
|
|
475
475
|
- **✅ 7c — Brute-force KNN executor optimization** — bounded `BinaryHeap` of size k for `ORDER BY <expr> LIMIT k`. ~1.8× faster than full-sort at N=10k for cheap keys; bigger gains on expensive keys like `vec_distance_l2`.
|
|
476
|
-
-
|
|
477
|
-
-
|
|
476
|
+
- **✅ 7d — HNSW ANN index** — three PRs: 7d.1 (algorithm w/ recall@10 ≥ 0.95), 7d.2 (SQL integration + query optimizer), 7d.3 (persistence + DELETE/UPDATE rebuild). `CREATE INDEX … USING hnsw (col)`; fixed defaults `M=16, ef_construction=200, ef_search=50` (Q2). New `KIND_HNSW` cell tag.
|
|
477
|
+
- **✅ 7e — JSON column type + path queries** — `JSON` data type stored as canonical text (validated via `serde_json::from_str` at INSERT/UPDATE time; SQLite-JSON1-style — Q3 scope correction since bincode was removed in Phase 3c). Functions: `json_extract` / `json_type` / `json_array_length` / `json_object_keys`. Path subset supports `$`, `.key`, `[N]`, chained. `json_object_keys` returns a JSON-array text rather than a table-valued result (no set-returning functions in the executor yet).
|
|
478
478
|
- **7f — ~~Full-text search with BM25~~** — **deferred to Phase 8** (Q1).
|
|
479
479
|
- **7g — `ask()` API across the product surface** — natural-language → SQL via Anthropic API (Q4), Anthropic-first then OpenAI + Ollama follow-ups. Foundational 7g.1 introduces a new `sqlrite-ask` crate (Q10 — separate crate, not a feature flag). Thin per-product adapters in 7g.2-7g.8 cover REPL, desktop, Python, Node.js, Go, WASM (JS-callback shape per Q9), and the MCP `ask` tool.
|
|
480
480
|
- **7h — MCP server adapter** — new `sqlrite-mcp` binary, hand-rolled JSON-RPC + tool framework (Q5).
|
|
@@ -35,6 +35,7 @@ CREATE TABLE <name> (<col> <type> [column_constraint]* [, ...]);
|
|
|
35
35
|
| `REAL`, `FLOAT`, `DOUBLE`, `DECIMAL` | Real (f64) | Double-precision; `DECIMAL(p,s)` precision/scale parsed and ignored |
|
|
36
36
|
| `BOOLEAN` | Boolean | Stored compactly in the null bitmap's sibling bits; accepts `TRUE` / `FALSE` |
|
|
37
37
|
| `VECTOR(N)` | Vector (Vec\<f32\>, fixed dim N) | **Phase 7a.** Dense f32 array of fixed dimension. `N` is required and must be ≥ 1. Inserted as bracket-array literals `[0.1, 0.2, ...]`. Dimension is enforced at INSERT/UPDATE; mismatched-length values are rejected. Distance functions and ANN indexing land in 7b–7d. |
|
|
38
|
+
| `JSON`, `JSONB` | Text (canonical JSON) | **Phase 7e.** JSON document stored as canonical UTF-8 text — same as SQLite's JSON1 extension (Q3 scope correction since bincode was removed in Phase 3c). INSERT/UPDATE values are validated via `serde_json::from_str`; malformed JSON is rejected with a typed error and no row is written. `JSONB` is accepted as an alias for `JSON` (PostgreSQL convention; both store as text in our case). Path-style read access via the `json_extract` / `json_type` / `json_array_length` / `json_object_keys` functions below. |
|
|
38
39
|
|
|
39
40
|
### Column constraints
|
|
40
41
|
|
|
@@ -203,6 +204,10 @@ Same set accepted by `INSERT` (see [Value literals accepted](#value-literals-acc
|
|
|
203
204
|
| `vec_distance_l2(a, b)` | Real (f64) | Euclidean distance √Σ(aᵢ−bᵢ)². Smaller is closer. *(Phase 7b)* |
|
|
204
205
|
| `vec_distance_cosine(a, b)` | Real (f64) | Cosine distance `1 − (a·b) / (‖a‖·‖b‖)`. Errors on zero-magnitude vectors (cosine is undefined). Smaller is closer; identical vectors return 0.0, orthogonal vectors return 1.0. *(Phase 7b)* |
|
|
205
206
|
| `vec_distance_dot(a, b)` | Real (f64) | Negated dot product `−(a·b)`. Negation makes "smaller is closer" consistent with the others. For unit-norm vectors equals `vec_distance_cosine(a, b) - 1`. *(Phase 7b)* |
|
|
207
|
+
| `json_extract(json, path)` | Depends on the resolved node | Walks `path` over `json` and returns the resolved value coerced to the closest SQL type — JSON strings → `TEXT`, numbers → `INTEGER` / `REAL`, booleans → `BOOLEAN`, `null` → `NULL`, and composites (`object` / `array`) → their canonical JSON-text serialization. Path defaults to `$` when only one argument is supplied. A path that doesn't resolve returns `NULL`. *(Phase 7e)* |
|
|
208
|
+
| `json_type(json[, path])` | Text | One of `'object'`, `'array'`, `'string'`, `'integer'`, `'real'`, `'true'`, `'false'`, `'null'`. Path defaults to `$`. *(Phase 7e)* |
|
|
209
|
+
| `json_array_length(json[, path])` | Integer | Number of elements in the JSON array at `path`. Errors if the resolved node is not an array. Path defaults to `$`. *(Phase 7e)* |
|
|
210
|
+
| `json_object_keys(json[, path])` | Text (JSON-array string) | Returns the object's keys as a JSON-array text in insertion order — e.g. `'["a","b","c"]'`. Path defaults to `$`. **Diverges from SQLite**, which exposes keys as a *table-valued* function (one row per key). SQLRite has no set-returning functions yet, so we return the keys as a JSON array and let callers parse if needed. *(Phase 7e)* |
|
|
206
211
|
|
|
207
212
|
All three vector-distance functions take exactly two arguments, both of which must be vectors of the same dimension. Either argument can be a column reference (`embedding`), a bracket-array literal (`[0.1, 0.2, 0.3]`), or any sub-expression that evaluates to a vector. Mismatched dimensions error with `vector dimensions don't match (lhs=N, rhs=M)`.
|
|
208
213
|
|
|
@@ -216,6 +221,34 @@ LIMIT 10;
|
|
|
216
221
|
|
|
217
222
|
> **Operator forms (`<->` `<=>` `<#>`) are not supported yet.** They're the de facto pgvector convention but blocked on a sqlparser limitation — will land as a Phase 7b.1 follow-up. Use the function-call form for now.
|
|
218
223
|
|
|
224
|
+
#### JSON path syntax
|
|
225
|
+
|
|
226
|
+
The `json_*` functions accept a string path argument with a small subset of JSONPath:
|
|
227
|
+
|
|
228
|
+
| Token | Meaning |
|
|
229
|
+
|---|---|
|
|
230
|
+
| `$` | Root of the document (default if path is omitted). |
|
|
231
|
+
| `.key` | Object member access. Bare keys only — no quoted-string variant yet. |
|
|
232
|
+
| `[N]` | Array index (0-based). Negative indices are not supported. |
|
|
233
|
+
|
|
234
|
+
Tokens chain naturally: `$.user.tags[0]`, `$[2].name`, `$.matrix[1][0]`. A malformed path (unbalanced brackets, missing `$`) errors at runtime with a typed message; a well-formed path that simply doesn't resolve returns `NULL`.
|
|
235
|
+
|
|
236
|
+
```sql
|
|
237
|
+
CREATE TABLE events (id INTEGER PRIMARY KEY, payload JSON);
|
|
238
|
+
|
|
239
|
+
INSERT INTO events (payload) VALUES
|
|
240
|
+
('{"user": {"name": "alice", "tags": ["admin", "ops"]}, "score": 42}'),
|
|
241
|
+
('{"user": {"name": "bob", "tags": []}, "score": 7}');
|
|
242
|
+
|
|
243
|
+
SELECT id,
|
|
244
|
+
json_extract(payload, '$.user.name') AS name,
|
|
245
|
+
json_extract(payload, '$.user.tags[0]') AS first_tag,
|
|
246
|
+
json_array_length(payload, '$.user.tags') AS tag_count,
|
|
247
|
+
json_type(payload, '$.score') AS score_type
|
|
248
|
+
FROM events
|
|
249
|
+
WHERE json_extract(payload, '$.user.name') = 'alice';
|
|
250
|
+
```
|
|
251
|
+
|
|
219
252
|
### Type coercion in arithmetic
|
|
220
253
|
|
|
221
254
|
- **Integer-only ops stay integer.** `1 + 2` → `3` (Integer).
|
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sqlrite"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.16"
|
|
8
8
|
description = "Python bindings for SQLRite — a small, embeddable SQLite clone written in Rust."
|
|
9
9
|
authors = [{ name = "Joao Henrique Machado Silva", email = "joaoh82@gmail.com" }]
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -26,6 +26,16 @@ pub enum DataType {
|
|
|
26
26
|
/// declared dimension; every value stored in the column must have
|
|
27
27
|
/// exactly that many elements.
|
|
28
28
|
Vector(usize),
|
|
29
|
+
/// Phase 7e — JSON column. Stored as canonical UTF-8 text (matches
|
|
30
|
+
/// SQLite's JSON1 extension), validated at INSERT time. The
|
|
31
|
+
/// `json_extract` family of functions parses on demand and returns
|
|
32
|
+
/// either a primitive `Value` (Integer / Real / Text / Bool / Null)
|
|
33
|
+
/// or a Text value carrying the JSON-encoded sub-object/array.
|
|
34
|
+
/// Q3 originally specified `bincoded serde_json::Value`, but bincode
|
|
35
|
+
/// was removed from the engine in Phase 3c — see the scope-correction
|
|
36
|
+
/// note in `docs/phase-7-plan.md` for the rationale on switching to
|
|
37
|
+
/// text storage.
|
|
38
|
+
Json,
|
|
29
39
|
None,
|
|
30
40
|
Invalid,
|
|
31
41
|
}
|
|
@@ -44,6 +54,7 @@ impl DataType {
|
|
|
44
54
|
"text" => DataType::Text,
|
|
45
55
|
"real" => DataType::Real,
|
|
46
56
|
"bool" => DataType::Bool,
|
|
57
|
+
"json" => DataType::Json,
|
|
47
58
|
"none" => DataType::None,
|
|
48
59
|
other if other.starts_with("vector(") && other.ends_with(')') => {
|
|
49
60
|
// Strip the `vector(` prefix and trailing `)`, parse what's
|
|
@@ -77,6 +88,7 @@ impl DataType {
|
|
|
77
88
|
DataType::Real => "Real".to_string(),
|
|
78
89
|
DataType::Bool => "Bool".to_string(),
|
|
79
90
|
DataType::Vector(dim) => format!("vector({dim})"),
|
|
91
|
+
DataType::Json => "Json".to_string(),
|
|
80
92
|
DataType::None => "None".to_string(),
|
|
81
93
|
DataType::Invalid => "Invalid".to_string(),
|
|
82
94
|
}
|
|
@@ -91,6 +103,7 @@ impl fmt::Display for DataType {
|
|
|
91
103
|
DataType::Real => f.write_str("Real"),
|
|
92
104
|
DataType::Bool => f.write_str("Boolean"),
|
|
93
105
|
DataType::Vector(dim) => write!(f, "Vector({dim})"),
|
|
106
|
+
DataType::Json => f.write_str("Json"),
|
|
94
107
|
DataType::None => f.write_str("None"),
|
|
95
108
|
DataType::Invalid => f.write_str("Invalid"),
|
|
96
109
|
}
|
|
@@ -143,6 +156,11 @@ pub struct HnswIndexEntry {
|
|
|
143
156
|
pub column_name: String,
|
|
144
157
|
/// The graph itself.
|
|
145
158
|
pub index: HnswIndex,
|
|
159
|
+
/// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
|
|
160
|
+
/// invalidated the graph since the last rebuild. INSERT maintains
|
|
161
|
+
/// the graph incrementally and leaves this false. The next save
|
|
162
|
+
/// rebuilds dirty indexes from current rows before serializing.
|
|
163
|
+
pub needs_rebuild: bool,
|
|
146
164
|
}
|
|
147
165
|
|
|
148
166
|
impl Table {
|
|
@@ -178,6 +196,12 @@ impl Table {
|
|
|
178
196
|
// itself doesn't carry the dim — every stored Vec<f32>
|
|
179
197
|
// already has it via .len().
|
|
180
198
|
DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
|
|
199
|
+
// Phase 7e — JSON columns reuse Text storage (with
|
|
200
|
+
// INSERT-time validation that the bytes parse as JSON).
|
|
201
|
+
// No new Row variant; json_extract / json_type / etc.
|
|
202
|
+
// re-parse from text on demand. See `docs/phase-7-plan.md`
|
|
203
|
+
// Q3's scope-correction note for the storage choice.
|
|
204
|
+
DataType::Json => Row::Text(BTreeMap::new()),
|
|
181
205
|
DataType::Invalid | DataType::None => Row::None,
|
|
182
206
|
};
|
|
183
207
|
table_rows
|
|
@@ -535,7 +559,16 @@ impl Table {
|
|
|
535
559
|
(Row::Real(m), Value::Integer(v), _) => {
|
|
536
560
|
m.insert(rowid, *v as f32);
|
|
537
561
|
}
|
|
538
|
-
(Row::Text(m), Value::Text(v),
|
|
562
|
+
(Row::Text(m), Value::Text(v), dt) => {
|
|
563
|
+
// Phase 7e — UPDATE on a JSON column also validates
|
|
564
|
+
// the new text is well-formed JSON, mirroring INSERT.
|
|
565
|
+
if matches!(dt, DataType::Json) {
|
|
566
|
+
if let Err(e) = serde_json::from_str::<serde_json::Value>(v) {
|
|
567
|
+
return Err(SQLRiteError::General(format!(
|
|
568
|
+
"Type mismatch: expected JSON for column '{column}', got '{v}': {e}"
|
|
569
|
+
)));
|
|
570
|
+
}
|
|
571
|
+
}
|
|
539
572
|
m.insert(rowid, v.clone());
|
|
540
573
|
}
|
|
541
574
|
(Row::Bool(m), Value::Bool(v), _) => {
|
|
@@ -650,6 +683,14 @@ impl Table {
|
|
|
650
683
|
}
|
|
651
684
|
Value::Vector(parsed_vec)
|
|
652
685
|
}
|
|
686
|
+
DataType::Json => {
|
|
687
|
+
// JSON values stored as Text. UNIQUE on a JSON column
|
|
688
|
+
// compares the canonical text representation
|
|
689
|
+
// verbatim — `{"a": 1}` and `{"a":1}` are distinct.
|
|
690
|
+
// Document this if anyone actually requests UNIQUE
|
|
691
|
+
// JSON; for MVP, treat-as-text is fine.
|
|
692
|
+
Value::Text(val.clone())
|
|
693
|
+
}
|
|
653
694
|
DataType::None | DataType::Invalid => {
|
|
654
695
|
return Err(SQLRiteError::Internal(format!(
|
|
655
696
|
"column '{name}' has an unsupported datatype"
|
|
@@ -779,6 +820,19 @@ impl Table {
|
|
|
779
820
|
Some(Value::Integer(parsed as i64))
|
|
780
821
|
}
|
|
781
822
|
Row::Text(tree) => {
|
|
823
|
+
// Phase 7e — JSON columns also reach here (they
|
|
824
|
+
// share Row::Text storage with TEXT columns).
|
|
825
|
+
// Validate the value parses as JSON before
|
|
826
|
+
// storing; otherwise we'd happily write
|
|
827
|
+
// `not-json-at-all` and only fail when
|
|
828
|
+
// json_extract tried to parse it later.
|
|
829
|
+
if matches!(self.columns[i].datatype, DataType::Json) && val != "Null" {
|
|
830
|
+
if let Err(e) = serde_json::from_str::<serde_json::Value>(&val) {
|
|
831
|
+
return Err(SQLRiteError::General(format!(
|
|
832
|
+
"Type mismatch: expected JSON for column '{key}', got '{val}': {e}"
|
|
833
|
+
)));
|
|
834
|
+
}
|
|
835
|
+
}
|
|
782
836
|
tree.insert(next_rowid, val.to_string());
|
|
783
837
|
// "Null" sentinel stays out of the index — it isn't a
|
|
784
838
|
// real user value.
|