sqlrite 0.1.14__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {sqlrite-0.1.14 → sqlrite-0.1.16}/Cargo.lock +7 -5
  2. {sqlrite-0.1.14 → sqlrite-0.1.16}/Cargo.toml +9 -1
  3. {sqlrite-0.1.14 → sqlrite-0.1.16}/PKG-INFO +1 -1
  4. {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/package.json +1 -1
  5. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/phase-7-plan.md +10 -6
  6. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/roadmap.md +2 -2
  7. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/supported-sql.md +33 -0
  8. {sqlrite-0.1.14 → sqlrite-0.1.16}/pyproject.toml +1 -1
  9. {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/python/Cargo.toml +1 -1
  10. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/db/table.rs +55 -1
  11. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/executor.rs +321 -39
  12. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/hnsw.rs +44 -0
  13. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/mod.rs +250 -11
  14. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/cell.rs +15 -0
  15. sqlrite-0.1.16/src/sql/pager/hnsw_cell.rs +258 -0
  16. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/mod.rs +392 -27
  17. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/parser/create.rs +5 -0
  18. {sqlrite-0.1.14 → sqlrite-0.1.16}/.github/workflows/ci.yml +0 -0
  19. {sqlrite-0.1.14 → sqlrite-0.1.16}/.github/workflows/release-pr.yml +0 -0
  20. {sqlrite-0.1.14 → sqlrite-0.1.16}/.github/workflows/release.yml +0 -0
  21. {sqlrite-0.1.14 → sqlrite-0.1.16}/.github/workflows/rust.yml +0 -0
  22. {sqlrite-0.1.14 → sqlrite-0.1.16}/.gitignore +0 -0
  23. {sqlrite-0.1.14 → sqlrite-0.1.16}/CODE_OF_CONDUCT.md +0 -0
  24. {sqlrite-0.1.14 → sqlrite-0.1.16}/LICENSE +0 -0
  25. {sqlrite-0.1.14 → sqlrite-0.1.16}/MAINTAINERS +0 -0
  26. {sqlrite-0.1.14 → sqlrite-0.1.16}/Makefile +0 -0
  27. {sqlrite-0.1.14 → sqlrite-0.1.16}/README.md +0 -0
  28. {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/index.html +0 -0
  29. {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/package-lock.json +0 -0
  30. {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/src/App.svelte +0 -0
  31. {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/src/app.css +0 -0
  32. {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/src/main.ts +0 -0
  33. {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/src/vite-env.d.ts +0 -0
  34. {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/svelte.config.js +0 -0
  35. {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/tsconfig.json +0 -0
  36. {sqlrite-0.1.14 → sqlrite-0.1.16}/desktop/vite.config.ts +0 -0
  37. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/_index.md +0 -0
  38. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/architecture.md +0 -0
  39. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/design-decisions.md +0 -0
  40. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/desktop.md +0 -0
  41. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/embedding.md +0 -0
  42. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/file-format.md +0 -0
  43. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/getting-started.md +0 -0
  44. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/pager.md +0 -0
  45. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/release-plan.md +0 -0
  46. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/release-secrets.md +0 -0
  47. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/smoke-test.md +0 -0
  48. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/sql-engine.md +0 -0
  49. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/storage-model.md +0 -0
  50. {sqlrite-0.1.14 → sqlrite-0.1.16}/docs/usage.md +0 -0
  51. {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/README.md +0 -0
  52. {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/c/Makefile +0 -0
  53. {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/c/hello.c +0 -0
  54. {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/go/go.mod +0 -0
  55. {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/go/hello.go +0 -0
  56. {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/nodejs/hello.mjs +0 -0
  57. {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/python/hello.py +0 -0
  58. {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/rust/quickstart.rs +0 -0
  59. {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/wasm/Makefile +0 -0
  60. {sqlrite-0.1.14 → sqlrite-0.1.16}/examples/wasm/index.html +0 -0
  61. {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite - Desktop.png +0 -0
  62. {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite Data Structures.png +0 -0
  63. {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite Simple SQL Execution High Level Diagram.png +0 -0
  64. {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite Simple SQL INSERT Execution High Level Diagram (Insert Row).png +0 -0
  65. {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite Simple SQL INSERT Execution High Level Diagram.png +0 -0
  66. {sqlrite-0.1.14 → sqlrite-0.1.16}/images/SQLRite_logo.png +0 -0
  67. {sqlrite-0.1.14 → sqlrite-0.1.16}/images/architecture.png +0 -0
  68. {sqlrite-0.1.14 → sqlrite-0.1.16}/rust-toolchain.toml +0 -0
  69. {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/AST.delete.example +0 -0
  70. {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/AST.insert.exemple +0 -0
  71. {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/AST.select.example +0 -0
  72. {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/AST.update.example +0 -0
  73. {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/CREATE TABLE sqlrite_schema.sql +0 -0
  74. {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/CREATE_TABLE with duplicate.sql +0 -0
  75. {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/CREATE_TABLE.sql +0 -0
  76. {sqlrite-0.1.14 → sqlrite-0.1.16}/samples/INSERT.sql +0 -0
  77. {sqlrite-0.1.14 → sqlrite-0.1.16}/scripts/bump-version.sh +0 -0
  78. {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/README.md +0 -0
  79. {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/conn.go +0 -0
  80. {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/go.mod +0 -0
  81. {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/rows.go +0 -0
  82. {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/sqlrite.go +0 -0
  83. {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/sqlrite_test.go +0 -0
  84. {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/go/stmt.go +0 -0
  85. {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/python/README.md +0 -0
  86. {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/python/src/lib.rs +0 -0
  87. {sqlrite-0.1.14 → sqlrite-0.1.16}/sdk/python/tests/test_sqlrite.py +0 -0
  88. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/connection.rs +0 -0
  89. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/error.rs +0 -0
  90. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/lib.rs +0 -0
  91. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/main.rs +0 -0
  92. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/meta_command/mod.rs +0 -0
  93. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/repl/mod.rs +0 -0
  94. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/db/database.rs +0 -0
  95. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/db/mod.rs +0 -0
  96. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/db/secondary_index.rs +0 -0
  97. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/file.rs +0 -0
  98. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/header.rs +0 -0
  99. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/index_cell.rs +0 -0
  100. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/interior_page.rs +0 -0
  101. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/overflow.rs +0 -0
  102. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/page.rs +0 -0
  103. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/pager.rs +0 -0
  104. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/table_page.rs +0 -0
  105. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/varint.rs +0 -0
  106. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/pager/wal.rs +0 -0
  107. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/parser/insert.rs +0 -0
  108. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/parser/mod.rs +0 -0
  109. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/parser/select.rs +0 -0
  110. {sqlrite-0.1.14 → sqlrite-0.1.16}/src/sql/tokenizer.rs +0 -0
@@ -3511,6 +3511,7 @@ version = "1.0.149"
3511
3511
  source = "registry+https://github.com/rust-lang/crates.io-index"
3512
3512
  checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
3513
3513
  dependencies = [
3514
+ "indexmap 2.14.0",
3514
3515
  "itoa",
3515
3516
  "memchr",
3516
3517
  "serde",
@@ -3736,7 +3737,7 @@ dependencies = [
3736
3737
 
3737
3738
  [[package]]
3738
3739
  name = "sqlrite-desktop"
3739
- version = "0.1.14"
3740
+ version = "0.1.16"
3740
3741
  dependencies = [
3741
3742
  "serde",
3742
3743
  "serde_json",
@@ -3748,7 +3749,7 @@ dependencies = [
3748
3749
 
3749
3750
  [[package]]
3750
3751
  name = "sqlrite-engine"
3751
- version = "0.1.14"
3752
+ version = "0.1.16"
3752
3753
  dependencies = [
3753
3754
  "clap",
3754
3755
  "env_logger",
@@ -3757,13 +3758,14 @@ dependencies = [
3757
3758
  "prettytable-rs",
3758
3759
  "rustyline",
3759
3760
  "rustyline-derive",
3761
+ "serde_json",
3760
3762
  "sqlparser",
3761
3763
  "thiserror 2.0.18",
3762
3764
  ]
3763
3765
 
3764
3766
  [[package]]
3765
3767
  name = "sqlrite-ffi"
3766
- version = "0.1.14"
3768
+ version = "0.1.16"
3767
3769
  dependencies = [
3768
3770
  "cbindgen",
3769
3771
  "sqlrite-engine",
@@ -3771,7 +3773,7 @@ dependencies = [
3771
3773
 
3772
3774
  [[package]]
3773
3775
  name = "sqlrite-nodejs"
3774
- version = "0.1.14"
3776
+ version = "0.1.16"
3775
3777
  dependencies = [
3776
3778
  "napi",
3777
3779
  "napi-build",
@@ -3781,7 +3783,7 @@ dependencies = [
3781
3783
 
3782
3784
  [[package]]
3783
3785
  name = "sqlrite-python"
3784
- version = "0.1.14"
3786
+ version = "0.1.16"
3785
3787
  dependencies = [
3786
3788
  "pyo3",
3787
3789
  "sqlrite-engine",
@@ -27,7 +27,7 @@ resolver = "3"
27
27
  # `package =` key so the import name stays `sqlrite` internally:
28
28
  # sqlrite = { package = "sqlrite-engine", path = "…" }
29
29
  name = "sqlrite-engine"
30
- version = "0.1.14"
30
+ version = "0.1.16"
31
31
  authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
32
32
  edition = "2024"
33
33
  rust-version = "1.85"
@@ -82,6 +82,14 @@ log = "0.4"
82
82
  sqlparser = "0.61"
83
83
  thiserror = "2.0"
84
84
  prettytable-rs = "0.10"
85
+ # Phase 7e: JSON column type. `serde_json` powers both the validation
86
+ # step at INSERT time (parse-and-discard to confirm the text is valid
87
+ # JSON) and the path extraction inside the json_extract / json_type
88
+ # / json_array_length / json_object_keys SQL functions. `preserve_order`
89
+ # keeps object keys in insertion order so json_object_keys output is
90
+ # stable; without it, BTreeMap-backed Maps would alphabetically sort,
91
+ # which surprises callers re-serializing the same JSON.
92
+ serde_json = { version = "1", features = ["preserve_order"] }
85
93
 
86
94
  # CLI-only deps (feature-gated). `optional = true` + the `cli`
87
95
  # feature above means these only land in the dep graph when
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlrite
3
- Version: 0.1.14
3
+ Version: 0.1.16
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "sqlrite-desktop-frontend",
3
3
  "private": true,
4
- "version": "0.1.14",
4
+ "version": "0.1.16",
5
5
  "type": "module",
6
6
  "scripts": {
7
7
  "dev": "vite",
@@ -162,7 +162,7 @@ SELECT id, title FROM docs ORDER BY embedding <-> [0.1, ...] LIMIT 10;
162
162
  >
163
163
  > - **✅ 7d.1 — Pure HNSW algorithm** *(~700 LOC, shipped in v0.1.13).* `src/sql/hnsw.rs` standalone module: insert + search + layer assignment + beam search per layer + L2/cosine/dot distance dispatch. No SQL integration yet — vectors are passed in via a `get_vec` closure so the algorithm doesn't depend on table types. Tests verify recall@k ≥ 0.95 vs brute-force on randomly-generated vector sets; deterministic via a fixed RNG seed.
164
164
  > - **✅ 7d.2 — SQL integration** *(~500 LOC).* `CREATE INDEX … USING hnsw (col)` parser + engine, INSERT wiring (also calls `hnsw.insert()` incrementally), query optimizer hook (recognizes `ORDER BY vec_distance_l2(col, literal) LIMIT k` and probes the HNSW instead of full-scanning). HNSW lives in memory only at this point; the **CREATE INDEX SQL persists in `sqlrite_master` and reopen rebuilds the graph from current rows** — partial persistence ahead of 7d.3. DELETE/UPDATE on HNSW-indexed tables refused with helpful error pointing at 7d.3.
165
- > - **7d.3 — Persistence** *(~300 LOC).* Wire HNSW into the cell format: new `KIND_HNSW` cell tag, page-tree storage parallel to secondary indexes, save/reopen round-trip without rebuild. Also adds DELETE/UPDATE support since the persisted form gives us a natural rebuild trigger.
165
+ > - **✅ 7d.3 — Persistence** *(~600 LOC).* New `KIND_HNSW` cell tag and `HnswNodeCell` encoding (varint node_id + per-layer neighbor lists). Each HNSW index gets its own page tree parallel to secondary indexes. Open path loads cells directly into `HnswIndex::from_persisted_nodes` — no algorithm runs, exact bit-for-bit reproduction. Also unblocks DELETE / UPDATE on HNSW-indexed tables: those mark the index `needs_rebuild`, save rebuilds from current rows before staging. ~2× the original 300-LOC estimate because the cell encoding + tests + rebuild path together added more than expected.
166
166
  >
167
167
  > Each 7d.x ships as its own PR + release wave. The user-facing value lands at 7d.2; 7d.3 closes the persistence loop. 7d.1 is foundational but ships a tested algorithmic primitive on its own — useful as documentation of the engine's "from scratch" theme.
168
168
 
@@ -170,14 +170,14 @@ SELECT id, title FROM docs ORDER BY embedding <-> [0.1, ...] LIMIT 10;
170
170
 
171
171
  ---
172
172
 
173
- ### 7e — JSON column type + path queries
173
+ ### 7e — JSON column type + path queries
174
174
 
175
- **What.** New `JSON` data type. Store as bincoded `serde_json::Value` (or as a parsed AST see open questions). Support a small set of extraction functions:
175
+ **What.** New `JSON` data type. Stored as canonical UTF-8 text and validated at INSERT/UPDATE time via `serde_json::from_str`. The four path-extraction functions parse on demand:
176
176
 
177
177
  - `json_extract(col, '$.path')` — returns the value at the path, NULL if absent
178
- - `json_array_length(col, '$.path')` — array length, NULL for non-array
179
- - `json_object_keys(col, '$.path')` — TEXT array of keys, NULL for non-object
180
- - `json_type(col, '$.path')` — `'null'`, `'bool'`, `'number'`, `'string'`, `'array'`, `'object'`
178
+ - `json_array_length(col, '$.path')` — array length, NULL for non-array, errors for non-array-with-path-resolved
179
+ - `json_object_keys(col, '$.path')` — JSON-array text of keys (see scope-correction note in Q3 below; SQLite's set-returning shape requires features we don't have)
180
+ - `json_type(col, '$.path')` — `'null'` / `'true'` / `'false'` / `'integer'` / `'real'` / `'text'` / `'array'` / `'object'` (matches SQLite JSON1 conventions)
181
181
 
182
182
  **Why this matters for AI-era specifically.** LLM tool-call outputs are JSON. RAG citation arrays are JSON. Agent scratchpads are JSON. Storing them as TEXT and re-parsing on every query is wasteful.
183
183
 
@@ -378,6 +378,10 @@ Q1–Q10 were resolved by the project owner on 2026-04-26. Each question keeps i
378
378
  ### Q3. JSON storage format
379
379
 
380
380
  > **Decided: bincoded `serde_json::Value`** for the MVP. JSON indexing remains a future phase.
381
+ >
382
+ > **Scope correction (2026-04-28, during 7e implementation):** Q3's "bincoded `Value`" answer was settled before remembering that bincode was removed from the engine in Phase 3c (cell-based encoding replaced it). Rather than re-add bincode for one column type, **7e ships JSON-as-canonical-text** — same as SQLite's JSON1 extension. INSERT/UPDATE call `serde_json::from_str` to validate; the four `json_*` functions re-parse on demand. Trade-off: ~2× storage vs. binary, plus per-call parse overhead — both acceptable for MVP and consistent with SQLite's choice. JSONB-style binary indexing remains a future-phase optimization, but doesn't block 7e.
383
+ >
384
+ > One additional 7e divergence from the original plan: `json_object_keys` is supposed to be a *table-valued function* (one row per key, like SQLite's). We don't yet support set-returning functions in the executor, so 7e returns the keys as a JSON-array text instead. Caller can iterate via `json_array_length` + `json_extract` indexing. Documented in `docs/supported-sql.md` so users see the divergence up front.
381
385
 
382
386
  - **bincoded `serde_json::Value`:** one-line implementation, fast read/write, opaque on disk.
383
387
  - **Parsed AST as cell-encoded structure:** more code, but lets us index into JSON without a full deserialize.
@@ -473,8 +473,8 @@ Approved sub-phases (Q1–Q10 resolved):
473
473
  - **✅ 7a — `VECTOR(N)` column type** *(v0.1.10)* — dense fixed-dimension f32 storage via the existing cell encoding; format bumped to v4. Bracket-array literal syntax `[0.1, 0.2, …]` (Q7).
474
474
  - **✅ 7b — Distance functions** *(v0.1.11)* — `vec_distance_l2/cosine/dot`, plus the ORDER BY-expressions parser change so KNN queries work end-to-end. Operators (`<->` `<=>` `<#>`) deferred to **7b.1** — sqlparser doesn't parse them natively, contradicting Q6's "tiny parser change" assumption.
475
475
  - **✅ 7c — Brute-force KNN executor optimization** — bounded `BinaryHeap` of size k for `ORDER BY <expr> LIMIT k`. ~1.8× faster than full-sort at N=10k for cheap keys; bigger gains on expensive keys like `vec_distance_l2`.
476
- - **7d — HNSW ANN index** — split into 7d.1 ( algorithm), 7d.2 (SQL integration), 7d.3 (persistence). `CREATE INDEX … USING hnsw (col)`; fixed defaults `M=16, ef_construction=200, ef_search=50` (Q2).
477
- - **7e — JSON column type + path queries** — `JSON` data type stored as bincoded `serde_json::Value` (Q3); `json_extract` / `json_array_length` / `json_object_keys` / `json_type`.
476
+ - **✅ 7d — HNSW ANN index** — three PRs: 7d.1 (algorithm w/ recall@10 ≥ 0.95), 7d.2 (SQL integration + query optimizer), 7d.3 (persistence + DELETE/UPDATE rebuild). `CREATE INDEX … USING hnsw (col)`; fixed defaults `M=16, ef_construction=200, ef_search=50` (Q2). New `KIND_HNSW` cell tag.
477
+ - **✅ 7e — JSON column type + path queries** — `JSON` data type stored as canonical text (validated via `serde_json::from_str` at INSERT/UPDATE time; SQLite-JSON1-style — Q3 scope correction since bincode was removed in Phase 3c). Functions: `json_extract` / `json_type` / `json_array_length` / `json_object_keys`. Path subset supports `$`, `.key`, `[N]`, chained. `json_object_keys` returns a JSON-array text rather than a table-valued result (no set-returning functions in the executor yet).
478
478
  - **7f — ~~Full-text search with BM25~~** — **deferred to Phase 8** (Q1).
479
479
  - **7g — `ask()` API across the product surface** — natural-language → SQL via Anthropic API (Q4), Anthropic-first then OpenAI + Ollama follow-ups. Foundational 7g.1 introduces a new `sqlrite-ask` crate (Q10 — separate crate, not a feature flag). Thin per-product adapters in 7g.2-7g.8 cover REPL, desktop, Python, Node.js, Go, WASM (JS-callback shape per Q9), and the MCP `ask` tool.
480
480
  - **7h — MCP server adapter** — new `sqlrite-mcp` binary, hand-rolled JSON-RPC + tool framework (Q5).
@@ -35,6 +35,7 @@ CREATE TABLE <name> (<col> <type> [column_constraint]* [, ...]);
35
35
  | `REAL`, `FLOAT`, `DOUBLE`, `DECIMAL` | Real (f64) | Double-precision; `DECIMAL(p,s)` precision/scale parsed and ignored |
36
36
  | `BOOLEAN` | Boolean | Stored compactly in the null bitmap's sibling bits; accepts `TRUE` / `FALSE` |
37
37
  | `VECTOR(N)` | Vector (Vec\<f32\>, fixed dim N) | **Phase 7a.** Dense f32 array of fixed dimension. `N` is required and must be ≥ 1. Inserted as bracket-array literals `[0.1, 0.2, ...]`. Dimension is enforced at INSERT/UPDATE; mismatched-length values are rejected. Distance functions and ANN indexing land in 7b–7d. |
38
+ | `JSON`, `JSONB` | Text (canonical JSON) | **Phase 7e.** JSON document stored as canonical UTF-8 text — same as SQLite's JSON1 extension (Q3 scope correction since bincode was removed in Phase 3c). INSERT/UPDATE values are validated via `serde_json::from_str`; malformed JSON is rejected with a typed error and no row is written. `JSONB` is accepted as an alias for `JSON` (PostgreSQL convention; both store as text in our case). Path-style read access via the `json_extract` / `json_type` / `json_array_length` / `json_object_keys` functions below. |
38
39
 
39
40
  ### Column constraints
40
41
 
@@ -203,6 +204,10 @@ Same set accepted by `INSERT` (see [Value literals accepted](#value-literals-acc
203
204
  | `vec_distance_l2(a, b)` | Real (f64) | Euclidean distance √Σ(aᵢ−bᵢ)². Smaller is closer. *(Phase 7b)* |
204
205
  | `vec_distance_cosine(a, b)` | Real (f64) | Cosine distance `1 − (a·b) / (‖a‖·‖b‖)`. Errors on zero-magnitude vectors (cosine is undefined). Smaller is closer; identical vectors return 0.0, orthogonal vectors return 1.0. *(Phase 7b)* |
205
206
  | `vec_distance_dot(a, b)` | Real (f64) | Negated dot product `−(a·b)`. Negation makes "smaller is closer" consistent with the others. For unit-norm vectors equals `vec_distance_cosine(a, b) - 1`. *(Phase 7b)* |
207
+ | `json_extract(json, path)` | Depends on the resolved node | Walks `path` over `json` and returns the resolved value coerced to the closest SQL type — JSON strings → `TEXT`, numbers → `INTEGER` / `REAL`, booleans → `BOOLEAN`, `null` → `NULL`, and composites (`object` / `array`) → their canonical JSON-text serialization. Path defaults to `$` when only one argument is supplied. A path that doesn't resolve returns `NULL`. *(Phase 7e)* |
208
+ | `json_type(json[, path])` | Text | One of `'object'`, `'array'`, `'string'`, `'integer'`, `'real'`, `'true'`, `'false'`, `'null'`. Path defaults to `$`. *(Phase 7e)* |
209
+ | `json_array_length(json[, path])` | Integer | Number of elements in the JSON array at `path`. Errors if the resolved node is not an array. Path defaults to `$`. *(Phase 7e)* |
210
+ | `json_object_keys(json[, path])` | Text (JSON-array string) | Returns the object's keys as a JSON-array text in insertion order — e.g. `'["a","b","c"]'`. Path defaults to `$`. **Diverges from SQLite**, which exposes keys as a *table-valued* function (one row per key). SQLRite has no set-returning functions yet, so we return the keys as a JSON array and let callers parse if needed. *(Phase 7e)* |
206
211
 
207
212
  All three vector-distance functions take exactly two arguments, both of which must be vectors of the same dimension. Either argument can be a column reference (`embedding`), a bracket-array literal (`[0.1, 0.2, 0.3]`), or any sub-expression that evaluates to a vector. Mismatched dimensions error with `vector dimensions don't match (lhs=N, rhs=M)`.
208
213
 
@@ -216,6 +221,34 @@ LIMIT 10;
216
221
 
217
222
  > **Operator forms (`<->` `<=>` `<#>`) are not supported yet.** They're the de facto pgvector convention but blocked on a sqlparser limitation — will land as a Phase 7b.1 follow-up. Use the function-call form for now.
218
223
 
224
+ #### JSON path syntax
225
+
226
+ The `json_*` functions accept a string path argument with a small subset of JSONPath:
227
+
228
+ | Token | Meaning |
229
+ |---|---|
230
+ | `$` | Root of the document (default if path is omitted). |
231
+ | `.key` | Object member access. Bare keys only — no quoted-string variant yet. |
232
+ | `[N]` | Array index (0-based). Negative indices are not supported. |
233
+
234
+ Tokens chain naturally: `$.user.tags[0]`, `$[2].name`, `$.matrix[1][0]`. A malformed path (unbalanced brackets, missing `$`) errors at runtime with a typed message; a well-formed path that simply doesn't resolve returns `NULL`.
235
+
236
+ ```sql
237
+ CREATE TABLE events (id INTEGER PRIMARY KEY, payload JSON);
238
+
239
+ INSERT INTO events (payload) VALUES
240
+ ('{"user": {"name": "alice", "tags": ["admin", "ops"]}, "score": 42}'),
241
+ ('{"user": {"name": "bob", "tags": []}, "score": 7}');
242
+
243
+ SELECT id,
244
+ json_extract(payload, '$.user.name') AS name,
245
+ json_extract(payload, '$.user.tags[0]') AS first_tag,
246
+ json_array_length(payload, '$.user.tags') AS tag_count,
247
+ json_type(payload, '$.score') AS score_type
248
+ FROM events
249
+ WHERE json_extract(payload, '$.user.name') = 'alice';
250
+ ```
251
+
219
252
  ### Type coercion in arithmetic
220
253
 
221
254
  - **Integer-only ops stay integer.** `1 + 2` → `3` (Integer).
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "sqlrite"
7
- version = "0.1.14"
7
+ version = "0.1.16"
8
8
  description = "Python bindings for SQLRite — a small, embeddable SQLite clone written in Rust."
9
9
  authors = [{ name = "Joao Henrique Machado Silva", email = "joaoh82@gmail.com" }]
10
10
  license = { text = "MIT" }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "sqlrite-python"
3
- version = "0.1.14"
3
+ version = "0.1.16"
4
4
  authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
5
5
  edition = "2024"
6
6
  rust-version = "1.85"
@@ -26,6 +26,16 @@ pub enum DataType {
26
26
  /// declared dimension; every value stored in the column must have
27
27
  /// exactly that many elements.
28
28
  Vector(usize),
29
+ /// Phase 7e — JSON column. Stored as canonical UTF-8 text (matches
30
+ /// SQLite's JSON1 extension), validated at INSERT time. The
31
+ /// `json_extract` family of functions parses on demand and returns
32
+ /// either a primitive `Value` (Integer / Real / Text / Bool / Null)
33
+ /// or a Text value carrying the JSON-encoded sub-object/array.
34
+ /// Q3 originally specified `bincoded serde_json::Value`, but bincode
35
+ /// was removed from the engine in Phase 3c — see the scope-correction
36
+ /// note in `docs/phase-7-plan.md` for the rationale on switching to
37
+ /// text storage.
38
+ Json,
29
39
  None,
30
40
  Invalid,
31
41
  }
@@ -44,6 +54,7 @@ impl DataType {
44
54
  "text" => DataType::Text,
45
55
  "real" => DataType::Real,
46
56
  "bool" => DataType::Bool,
57
+ "json" => DataType::Json,
47
58
  "none" => DataType::None,
48
59
  other if other.starts_with("vector(") && other.ends_with(')') => {
49
60
  // Strip the `vector(` prefix and trailing `)`, parse what's
@@ -77,6 +88,7 @@ impl DataType {
77
88
  DataType::Real => "Real".to_string(),
78
89
  DataType::Bool => "Bool".to_string(),
79
90
  DataType::Vector(dim) => format!("vector({dim})"),
91
+ DataType::Json => "Json".to_string(),
80
92
  DataType::None => "None".to_string(),
81
93
  DataType::Invalid => "Invalid".to_string(),
82
94
  }
@@ -91,6 +103,7 @@ impl fmt::Display for DataType {
91
103
  DataType::Real => f.write_str("Real"),
92
104
  DataType::Bool => f.write_str("Boolean"),
93
105
  DataType::Vector(dim) => write!(f, "Vector({dim})"),
106
+ DataType::Json => f.write_str("Json"),
94
107
  DataType::None => f.write_str("None"),
95
108
  DataType::Invalid => f.write_str("Invalid"),
96
109
  }
@@ -143,6 +156,11 @@ pub struct HnswIndexEntry {
143
156
  pub column_name: String,
144
157
  /// The graph itself.
145
158
  pub index: HnswIndex,
159
+ /// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
160
+ /// invalidated the graph since the last rebuild. INSERT maintains
161
+ /// the graph incrementally and leaves this false. The next save
162
+ /// rebuilds dirty indexes from current rows before serializing.
163
+ pub needs_rebuild: bool,
146
164
  }
147
165
 
148
166
  impl Table {
@@ -178,6 +196,12 @@ impl Table {
178
196
  // itself doesn't carry the dim — every stored Vec<f32>
179
197
  // already has it via .len().
180
198
  DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
199
+ // Phase 7e — JSON columns reuse Text storage (with
200
+ // INSERT-time validation that the bytes parse as JSON).
201
+ // No new Row variant; json_extract / json_type / etc.
202
+ // re-parse from text on demand. See `docs/phase-7-plan.md`
203
+ // Q3's scope-correction note for the storage choice.
204
+ DataType::Json => Row::Text(BTreeMap::new()),
181
205
  DataType::Invalid | DataType::None => Row::None,
182
206
  };
183
207
  table_rows
@@ -535,7 +559,16 @@ impl Table {
535
559
  (Row::Real(m), Value::Integer(v), _) => {
536
560
  m.insert(rowid, *v as f32);
537
561
  }
538
- (Row::Text(m), Value::Text(v), _) => {
562
+ (Row::Text(m), Value::Text(v), dt) => {
563
+ // Phase 7e — UPDATE on a JSON column also validates
564
+ // the new text is well-formed JSON, mirroring INSERT.
565
+ if matches!(dt, DataType::Json) {
566
+ if let Err(e) = serde_json::from_str::<serde_json::Value>(v) {
567
+ return Err(SQLRiteError::General(format!(
568
+ "Type mismatch: expected JSON for column '{column}', got '{v}': {e}"
569
+ )));
570
+ }
571
+ }
539
572
  m.insert(rowid, v.clone());
540
573
  }
541
574
  (Row::Bool(m), Value::Bool(v), _) => {
@@ -650,6 +683,14 @@ impl Table {
650
683
  }
651
684
  Value::Vector(parsed_vec)
652
685
  }
686
+ DataType::Json => {
687
+ // JSON values stored as Text. UNIQUE on a JSON column
688
+ // compares the canonical text representation
689
+ // verbatim — `{"a": 1}` and `{"a":1}` are distinct.
690
+ // Document this if anyone actually requests UNIQUE
691
+ // JSON; for MVP, treat-as-text is fine.
692
+ Value::Text(val.clone())
693
+ }
653
694
  DataType::None | DataType::Invalid => {
654
695
  return Err(SQLRiteError::Internal(format!(
655
696
  "column '{name}' has an unsupported datatype"
@@ -779,6 +820,19 @@ impl Table {
779
820
  Some(Value::Integer(parsed as i64))
780
821
  }
781
822
  Row::Text(tree) => {
823
+ // Phase 7e — JSON columns also reach here (they
824
+ // share Row::Text storage with TEXT columns).
825
+ // Validate the value parses as JSON before
826
+ // storing; otherwise we'd happily write
827
+ // `not-json-at-all` and only fail when
828
+ // json_extract tried to parse it later.
829
+ if matches!(self.columns[i].datatype, DataType::Json) && val != "Null" {
830
+ if let Err(e) = serde_json::from_str::<serde_json::Value>(&val) {
831
+ return Err(SQLRiteError::General(format!(
832
+ "Type mismatch: expected JSON for column '{key}', got '{val}': {e}"
833
+ )));
834
+ }
835
+ }
782
836
  tree.insert(next_rowid, val.to_string());
783
837
  // "Null" sentinel stays out of the index — it isn't a
784
838
  // real user value.