sqlrite 0.1.15__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {sqlrite-0.1.15 → sqlrite-0.1.16}/Cargo.lock +7 -5
  2. {sqlrite-0.1.15 → sqlrite-0.1.16}/Cargo.toml +9 -1
  3. {sqlrite-0.1.15 → sqlrite-0.1.16}/PKG-INFO +1 -1
  4. {sqlrite-0.1.15 → sqlrite-0.1.16}/desktop/package.json +1 -1
  5. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/phase-7-plan.md +9 -5
  6. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/roadmap.md +1 -1
  7. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/supported-sql.md +33 -0
  8. {sqlrite-0.1.15 → sqlrite-0.1.16}/pyproject.toml +1 -1
  9. {sqlrite-0.1.15 → sqlrite-0.1.16}/sdk/python/Cargo.toml +1 -1
  10. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/db/table.rs +50 -1
  11. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/executor.rs +292 -0
  12. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/mod.rs +228 -0
  13. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/mod.rs +55 -0
  14. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/parser/create.rs +5 -0
  15. {sqlrite-0.1.15 → sqlrite-0.1.16}/.github/workflows/ci.yml +0 -0
  16. {sqlrite-0.1.15 → sqlrite-0.1.16}/.github/workflows/release-pr.yml +0 -0
  17. {sqlrite-0.1.15 → sqlrite-0.1.16}/.github/workflows/release.yml +0 -0
  18. {sqlrite-0.1.15 → sqlrite-0.1.16}/.github/workflows/rust.yml +0 -0
  19. {sqlrite-0.1.15 → sqlrite-0.1.16}/.gitignore +0 -0
  20. {sqlrite-0.1.15 → sqlrite-0.1.16}/CODE_OF_CONDUCT.md +0 -0
  21. {sqlrite-0.1.15 → sqlrite-0.1.16}/LICENSE +0 -0
  22. {sqlrite-0.1.15 → sqlrite-0.1.16}/MAINTAINERS +0 -0
  23. {sqlrite-0.1.15 → sqlrite-0.1.16}/Makefile +0 -0
  24. {sqlrite-0.1.15 → sqlrite-0.1.16}/README.md +0 -0
  25. {sqlrite-0.1.15 → sqlrite-0.1.16}/desktop/index.html +0 -0
  26. {sqlrite-0.1.15 → sqlrite-0.1.16}/desktop/package-lock.json +0 -0
  27. {sqlrite-0.1.15 → sqlrite-0.1.16}/desktop/src/App.svelte +0 -0
  28. {sqlrite-0.1.15 → sqlrite-0.1.16}/desktop/src/app.css +0 -0
  29. {sqlrite-0.1.15 → sqlrite-0.1.16}/desktop/src/main.ts +0 -0
  30. {sqlrite-0.1.15 → sqlrite-0.1.16}/desktop/src/vite-env.d.ts +0 -0
  31. {sqlrite-0.1.15 → sqlrite-0.1.16}/desktop/svelte.config.js +0 -0
  32. {sqlrite-0.1.15 → sqlrite-0.1.16}/desktop/tsconfig.json +0 -0
  33. {sqlrite-0.1.15 → sqlrite-0.1.16}/desktop/vite.config.ts +0 -0
  34. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/_index.md +0 -0
  35. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/architecture.md +0 -0
  36. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/design-decisions.md +0 -0
  37. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/desktop.md +0 -0
  38. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/embedding.md +0 -0
  39. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/file-format.md +0 -0
  40. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/getting-started.md +0 -0
  41. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/pager.md +0 -0
  42. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/release-plan.md +0 -0
  43. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/release-secrets.md +0 -0
  44. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/smoke-test.md +0 -0
  45. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/sql-engine.md +0 -0
  46. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/storage-model.md +0 -0
  47. {sqlrite-0.1.15 → sqlrite-0.1.16}/docs/usage.md +0 -0
  48. {sqlrite-0.1.15 → sqlrite-0.1.16}/examples/README.md +0 -0
  49. {sqlrite-0.1.15 → sqlrite-0.1.16}/examples/c/Makefile +0 -0
  50. {sqlrite-0.1.15 → sqlrite-0.1.16}/examples/c/hello.c +0 -0
  51. {sqlrite-0.1.15 → sqlrite-0.1.16}/examples/go/go.mod +0 -0
  52. {sqlrite-0.1.15 → sqlrite-0.1.16}/examples/go/hello.go +0 -0
  53. {sqlrite-0.1.15 → sqlrite-0.1.16}/examples/nodejs/hello.mjs +0 -0
  54. {sqlrite-0.1.15 → sqlrite-0.1.16}/examples/python/hello.py +0 -0
  55. {sqlrite-0.1.15 → sqlrite-0.1.16}/examples/rust/quickstart.rs +0 -0
  56. {sqlrite-0.1.15 → sqlrite-0.1.16}/examples/wasm/Makefile +0 -0
  57. {sqlrite-0.1.15 → sqlrite-0.1.16}/examples/wasm/index.html +0 -0
  58. {sqlrite-0.1.15 → sqlrite-0.1.16}/images/SQLRite - Desktop.png +0 -0
  59. {sqlrite-0.1.15 → sqlrite-0.1.16}/images/SQLRite Data Structures.png +0 -0
  60. {sqlrite-0.1.15 → sqlrite-0.1.16}/images/SQLRite Simple SQL Execution High Level Diagram.png +0 -0
  61. {sqlrite-0.1.15 → sqlrite-0.1.16}/images/SQLRite Simple SQL INSERT Execution High Level Diagram (Insert Row).png +0 -0
  62. {sqlrite-0.1.15 → sqlrite-0.1.16}/images/SQLRite Simple SQL INSERT Execution High Level Diagram.png +0 -0
  63. {sqlrite-0.1.15 → sqlrite-0.1.16}/images/SQLRite_logo.png +0 -0
  64. {sqlrite-0.1.15 → sqlrite-0.1.16}/images/architecture.png +0 -0
  65. {sqlrite-0.1.15 → sqlrite-0.1.16}/rust-toolchain.toml +0 -0
  66. {sqlrite-0.1.15 → sqlrite-0.1.16}/samples/AST.delete.example +0 -0
  67. {sqlrite-0.1.15 → sqlrite-0.1.16}/samples/AST.insert.exemple +0 -0
  68. {sqlrite-0.1.15 → sqlrite-0.1.16}/samples/AST.select.example +0 -0
  69. {sqlrite-0.1.15 → sqlrite-0.1.16}/samples/AST.update.example +0 -0
  70. {sqlrite-0.1.15 → sqlrite-0.1.16}/samples/CREATE TABLE sqlrite_schema.sql +0 -0
  71. {sqlrite-0.1.15 → sqlrite-0.1.16}/samples/CREATE_TABLE with duplicate.sql +0 -0
  72. {sqlrite-0.1.15 → sqlrite-0.1.16}/samples/CREATE_TABLE.sql +0 -0
  73. {sqlrite-0.1.15 → sqlrite-0.1.16}/samples/INSERT.sql +0 -0
  74. {sqlrite-0.1.15 → sqlrite-0.1.16}/scripts/bump-version.sh +0 -0
  75. {sqlrite-0.1.15 → sqlrite-0.1.16}/sdk/go/README.md +0 -0
  76. {sqlrite-0.1.15 → sqlrite-0.1.16}/sdk/go/conn.go +0 -0
  77. {sqlrite-0.1.15 → sqlrite-0.1.16}/sdk/go/go.mod +0 -0
  78. {sqlrite-0.1.15 → sqlrite-0.1.16}/sdk/go/rows.go +0 -0
  79. {sqlrite-0.1.15 → sqlrite-0.1.16}/sdk/go/sqlrite.go +0 -0
  80. {sqlrite-0.1.15 → sqlrite-0.1.16}/sdk/go/sqlrite_test.go +0 -0
  81. {sqlrite-0.1.15 → sqlrite-0.1.16}/sdk/go/stmt.go +0 -0
  82. {sqlrite-0.1.15 → sqlrite-0.1.16}/sdk/python/README.md +0 -0
  83. {sqlrite-0.1.15 → sqlrite-0.1.16}/sdk/python/src/lib.rs +0 -0
  84. {sqlrite-0.1.15 → sqlrite-0.1.16}/sdk/python/tests/test_sqlrite.py +0 -0
  85. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/connection.rs +0 -0
  86. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/error.rs +0 -0
  87. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/lib.rs +0 -0
  88. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/main.rs +0 -0
  89. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/meta_command/mod.rs +0 -0
  90. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/repl/mod.rs +0 -0
  91. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/db/database.rs +0 -0
  92. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/db/mod.rs +0 -0
  93. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/db/secondary_index.rs +0 -0
  94. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/hnsw.rs +0 -0
  95. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/cell.rs +0 -0
  96. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/file.rs +0 -0
  97. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/header.rs +0 -0
  98. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/hnsw_cell.rs +0 -0
  99. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/index_cell.rs +0 -0
  100. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/interior_page.rs +0 -0
  101. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/overflow.rs +0 -0
  102. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/page.rs +0 -0
  103. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/pager.rs +0 -0
  104. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/table_page.rs +0 -0
  105. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/varint.rs +0 -0
  106. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/pager/wal.rs +0 -0
  107. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/parser/insert.rs +0 -0
  108. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/parser/mod.rs +0 -0
  109. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/parser/select.rs +0 -0
  110. {sqlrite-0.1.15 → sqlrite-0.1.16}/src/sql/tokenizer.rs +0 -0
@@ -3511,6 +3511,7 @@ version = "1.0.149"
3511
3511
  source = "registry+https://github.com/rust-lang/crates.io-index"
3512
3512
  checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
3513
3513
  dependencies = [
3514
+ "indexmap 2.14.0",
3514
3515
  "itoa",
3515
3516
  "memchr",
3516
3517
  "serde",
@@ -3736,7 +3737,7 @@ dependencies = [
3736
3737
 
3737
3738
  [[package]]
3738
3739
  name = "sqlrite-desktop"
3739
- version = "0.1.15"
3740
+ version = "0.1.16"
3740
3741
  dependencies = [
3741
3742
  "serde",
3742
3743
  "serde_json",
@@ -3748,7 +3749,7 @@ dependencies = [
3748
3749
 
3749
3750
  [[package]]
3750
3751
  name = "sqlrite-engine"
3751
- version = "0.1.15"
3752
+ version = "0.1.16"
3752
3753
  dependencies = [
3753
3754
  "clap",
3754
3755
  "env_logger",
@@ -3757,13 +3758,14 @@ dependencies = [
3757
3758
  "prettytable-rs",
3758
3759
  "rustyline",
3759
3760
  "rustyline-derive",
3761
+ "serde_json",
3760
3762
  "sqlparser",
3761
3763
  "thiserror 2.0.18",
3762
3764
  ]
3763
3765
 
3764
3766
  [[package]]
3765
3767
  name = "sqlrite-ffi"
3766
- version = "0.1.15"
3768
+ version = "0.1.16"
3767
3769
  dependencies = [
3768
3770
  "cbindgen",
3769
3771
  "sqlrite-engine",
@@ -3771,7 +3773,7 @@ dependencies = [
3771
3773
 
3772
3774
  [[package]]
3773
3775
  name = "sqlrite-nodejs"
3774
- version = "0.1.15"
3776
+ version = "0.1.16"
3775
3777
  dependencies = [
3776
3778
  "napi",
3777
3779
  "napi-build",
@@ -3781,7 +3783,7 @@ dependencies = [
3781
3783
 
3782
3784
  [[package]]
3783
3785
  name = "sqlrite-python"
3784
- version = "0.1.15"
3786
+ version = "0.1.16"
3785
3787
  dependencies = [
3786
3788
  "pyo3",
3787
3789
  "sqlrite-engine",
@@ -27,7 +27,7 @@ resolver = "3"
27
27
  # `package =` key so the import name stays `sqlrite` internally:
28
28
  # sqlrite = { package = "sqlrite-engine", path = "…" }
29
29
  name = "sqlrite-engine"
30
- version = "0.1.15"
30
+ version = "0.1.16"
31
31
  authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
32
32
  edition = "2024"
33
33
  rust-version = "1.85"
@@ -82,6 +82,14 @@ log = "0.4"
82
82
  sqlparser = "0.61"
83
83
  thiserror = "2.0"
84
84
  prettytable-rs = "0.10"
85
+ # Phase 7e: JSON column type. `serde_json` powers both the validation
86
+ # step at INSERT time (parse-and-discard to confirm the text is valid
87
+ # JSON) and the path extraction inside the json_extract / json_type
88
+ # / json_array_length / json_object_keys SQL functions. `preserve_order`
89
+ # keeps object keys in insertion order so json_object_keys output is
90
+ # stable; without it, BTreeMap-backed Maps would alphabetically sort,
91
+ # which surprises callers re-serializing the same JSON.
92
+ serde_json = { version = "1", features = ["preserve_order"] }
85
93
 
86
94
  # CLI-only deps (feature-gated). `optional = true` + the `cli`
87
95
  # feature above means these only land in the dep graph when
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlrite
3
- Version: 0.1.15
3
+ Version: 0.1.16
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "sqlrite-desktop-frontend",
3
3
  "private": true,
4
- "version": "0.1.15",
4
+ "version": "0.1.16",
5
5
  "type": "module",
6
6
  "scripts": {
7
7
  "dev": "vite",
@@ -170,14 +170,14 @@ SELECT id, title FROM docs ORDER BY embedding <-> [0.1, ...] LIMIT 10;
170
170
 
171
171
  ---
172
172
 
173
- ### 7e — JSON column type + path queries
173
+ ### 7e — JSON column type + path queries
174
174
 
175
- **What.** New `JSON` data type. Store as bincoded `serde_json::Value` (or as a parsed AST see open questions). Support a small set of extraction functions:
175
+ **What.** New `JSON` data type. Stored as canonical UTF-8 text and validated at INSERT/UPDATE time via `serde_json::from_str`. The four path-extraction functions parse on demand:
176
176
 
177
177
  - `json_extract(col, '$.path')` — returns the value at the path, NULL if absent
178
- - `json_array_length(col, '$.path')` — array length, NULL for non-array
179
- - `json_object_keys(col, '$.path')` — TEXT array of keys, NULL for non-object
180
- - `json_type(col, '$.path')` — `'null'`, `'bool'`, `'number'`, `'string'`, `'array'`, `'object'`
178
+ - `json_array_length(col, '$.path')` — array length, NULL for non-array, errors for non-array-with-path-resolved
179
+ - `json_object_keys(col, '$.path')` — JSON-array text of keys (see scope-correction note in Q3 below; SQLite's set-returning shape requires features we don't have)
180
+ - `json_type(col, '$.path')` — `'null'` / `'true'` / `'false'` / `'integer'` / `'real'` / `'text'` / `'array'` / `'object'` (matches SQLite JSON1 conventions)
181
181
 
182
182
  **Why this matters for AI-era specifically.** LLM tool-call outputs are JSON. RAG citation arrays are JSON. Agent scratchpads are JSON. Storing them as TEXT and re-parsing on every query is wasteful.
183
183
 
@@ -378,6 +378,10 @@ Q1–Q10 were resolved by the project owner on 2026-04-26. Each question keeps i
378
378
  ### Q3. JSON storage format
379
379
 
380
380
  > **Decided: bincoded `serde_json::Value`** for the MVP. JSON indexing remains a future phase.
381
+ >
382
+ > **Scope correction (2026-04-28, during 7e implementation):** Q3's "bincoded `Value`" answer was settled before remembering that bincode was removed from the engine in Phase 3c (cell-based encoding replaced it). Rather than re-add bincode for one column type, **7e ships JSON-as-canonical-text** — same as SQLite's JSON1 extension. INSERT/UPDATE call `serde_json::from_str` to validate; the four `json_*` functions re-parse on demand. Trade-off: ~2× storage vs. binary, plus per-call parse overhead — both acceptable for MVP and consistent with SQLite's choice. JSONB-style binary indexing remains a future-phase optimization, but doesn't block 7e.
383
+ >
384
+ > One additional 7e divergence from the original plan: `json_object_keys` is supposed to be a *table-valued function* (one row per key, like SQLite's). We don't yet support set-returning functions in the executor, so 7e returns the keys as a JSON-array text instead. Caller can iterate via `json_array_length` + `json_extract` indexing. Documented in `docs/supported-sql.md` so users see the divergence up front.
381
385
 
382
386
  - **bincoded `serde_json::Value`:** one-line implementation, fast read/write, opaque on disk.
383
387
  - **Parsed AST as cell-encoded structure:** more code, but lets us index into JSON without a full deserialize.
@@ -474,7 +474,7 @@ Approved sub-phases (Q1–Q10 resolved):
474
474
  - **✅ 7b — Distance functions** *(v0.1.11)* — `vec_distance_l2/cosine/dot`, plus the ORDER BY-expressions parser change so KNN queries work end-to-end. Operators (`<->` `<=>` `<#>`) deferred to **7b.1** — sqlparser doesn't parse them natively, contradicting Q6's "tiny parser change" assumption.
475
475
  - **✅ 7c — Brute-force KNN executor optimization** — bounded `BinaryHeap` of size k for `ORDER BY <expr> LIMIT k`. ~1.8× faster than full-sort at N=10k for cheap keys; bigger gains on expensive keys like `vec_distance_l2`.
476
476
  - **✅ 7d — HNSW ANN index** — three PRs: 7d.1 (algorithm w/ recall@10 ≥ 0.95), 7d.2 (SQL integration + query optimizer), 7d.3 (persistence + DELETE/UPDATE rebuild). `CREATE INDEX … USING hnsw (col)`; fixed defaults `M=16, ef_construction=200, ef_search=50` (Q2). New `KIND_HNSW` cell tag.
477
- - **7e — JSON column type + path queries** — `JSON` data type stored as bincoded `serde_json::Value` (Q3); `json_extract` / `json_array_length` / `json_object_keys` / `json_type`.
477
+ - **✅ 7e — JSON column type + path queries** — `JSON` data type stored as canonical text (validated via `serde_json::from_str` at INSERT/UPDATE time; SQLite-JSON1-style — Q3 scope correction since bincode was removed in Phase 3c). Functions: `json_extract` / `json_type` / `json_array_length` / `json_object_keys`. Path subset supports `$`, `.key`, `[N]`, chained. `json_object_keys` returns a JSON-array text rather than a table-valued result (no set-returning functions in the executor yet).
478
478
  - **7f — ~~Full-text search with BM25~~** — **deferred to Phase 8** (Q1).
479
479
  - **7g — `ask()` API across the product surface** — natural-language → SQL via Anthropic API (Q4), Anthropic-first then OpenAI + Ollama follow-ups. Foundational 7g.1 introduces a new `sqlrite-ask` crate (Q10 — separate crate, not a feature flag). Thin per-product adapters in 7g.2-7g.8 cover REPL, desktop, Python, Node.js, Go, WASM (JS-callback shape per Q9), and the MCP `ask` tool.
480
480
  - **7h — MCP server adapter** — new `sqlrite-mcp` binary, hand-rolled JSON-RPC + tool framework (Q5).
@@ -35,6 +35,7 @@ CREATE TABLE <name> (<col> <type> [column_constraint]* [, ...]);
35
35
  | `REAL`, `FLOAT`, `DOUBLE`, `DECIMAL` | Real (f64) | Double-precision; `DECIMAL(p,s)` precision/scale parsed and ignored |
36
36
  | `BOOLEAN` | Boolean | Stored compactly in the null bitmap's sibling bits; accepts `TRUE` / `FALSE` |
37
37
  | `VECTOR(N)` | Vector (Vec\<f32\>, fixed dim N) | **Phase 7a.** Dense f32 array of fixed dimension. `N` is required and must be ≥ 1. Inserted as bracket-array literals `[0.1, 0.2, ...]`. Dimension is enforced at INSERT/UPDATE; mismatched-length values are rejected. Distance functions and ANN indexing land in 7b–7d. |
38
+ | `JSON`, `JSONB` | Text (canonical JSON) | **Phase 7e.** JSON document stored as canonical UTF-8 text — same as SQLite's JSON1 extension (Q3 scope correction since bincode was removed in Phase 3c). INSERT/UPDATE values are validated via `serde_json::from_str`; malformed JSON is rejected with a typed error and no row is written. `JSONB` is accepted as an alias for `JSON` (PostgreSQL convention; both store as text in our case). Path-style read access via the `json_extract` / `json_type` / `json_array_length` / `json_object_keys` functions below. |
38
39
 
39
40
  ### Column constraints
40
41
 
@@ -203,6 +204,10 @@ Same set accepted by `INSERT` (see [Value literals accepted](#value-literals-acc
203
204
  | `vec_distance_l2(a, b)` | Real (f64) | Euclidean distance √Σ(aᵢ−bᵢ)². Smaller is closer. *(Phase 7b)* |
204
205
  | `vec_distance_cosine(a, b)` | Real (f64) | Cosine distance `1 − (a·b) / (‖a‖·‖b‖)`. Errors on zero-magnitude vectors (cosine is undefined). Smaller is closer; identical vectors return 0.0, orthogonal vectors return 1.0. *(Phase 7b)* |
205
206
  | `vec_distance_dot(a, b)` | Real (f64) | Negated dot product `−(a·b)`. Negation makes "smaller is closer" consistent with the others. For unit-norm vectors equals `vec_distance_cosine(a, b) - 1`. *(Phase 7b)* |
207
+ | `json_extract(json, path)` | Depends on the resolved node | Walks `path` over `json` and returns the resolved value coerced to the closest SQL type — JSON strings → `TEXT`, numbers → `INTEGER` / `REAL`, booleans → `BOOLEAN`, `null` → `NULL`, and composites (`object` / `array`) → their canonical JSON-text serialization. Path defaults to `$` when only one argument is supplied. A path that doesn't resolve returns `NULL`. *(Phase 7e)* |
208
+ | `json_type(json[, path])` | Text | One of `'object'`, `'array'`, `'string'`, `'integer'`, `'real'`, `'true'`, `'false'`, `'null'`. Path defaults to `$`. *(Phase 7e)* |
209
+ | `json_array_length(json[, path])` | Integer | Number of elements in the JSON array at `path`. Errors if the resolved node is not an array. Path defaults to `$`. *(Phase 7e)* |
210
+ | `json_object_keys(json[, path])` | Text (JSON-array string) | Returns the object's keys as a JSON-array text in insertion order — e.g. `'["a","b","c"]'`. Path defaults to `$`. **Diverges from SQLite**, which exposes keys as a *table-valued* function (one row per key). SQLRite has no set-returning functions yet, so we return the keys as a JSON array and let callers parse if needed. *(Phase 7e)* |
206
211
 
207
212
  All three vector-distance functions take exactly two arguments, both of which must be vectors of the same dimension. Either argument can be a column reference (`embedding`), a bracket-array literal (`[0.1, 0.2, 0.3]`), or any sub-expression that evaluates to a vector. Mismatched dimensions error with `vector dimensions don't match (lhs=N, rhs=M)`.
208
213
 
@@ -216,6 +221,34 @@ LIMIT 10;
216
221
 
217
222
  > **Operator forms (`<->` `<=>` `<#>`) are not supported yet.** They're the de facto pgvector convention but blocked on a sqlparser limitation — will land as a Phase 7b.1 follow-up. Use the function-call form for now.
218
223
 
224
+ #### JSON path syntax
225
+
226
+ The `json_*` functions accept a string path argument with a small subset of JSONPath:
227
+
228
+ | Token | Meaning |
229
+ |---|---|
230
+ | `$` | Root of the document (default if path is omitted). |
231
+ | `.key` | Object member access. Bare keys only — no quoted-string variant yet. |
232
+ | `[N]` | Array index (0-based). Negative indices are not supported. |
233
+
234
+ Tokens chain naturally: `$.user.tags[0]`, `$[2].name`, `$.matrix[1][0]`. A malformed path (unbalanced brackets, missing `$`) errors at runtime with a typed message; a well-formed path that simply doesn't resolve returns `NULL`.
235
+
236
+ ```sql
237
+ CREATE TABLE events (id INTEGER PRIMARY KEY, payload JSON);
238
+
239
+ INSERT INTO events (payload) VALUES
240
+ ('{"user": {"name": "alice", "tags": ["admin", "ops"]}, "score": 42}'),
241
+ ('{"user": {"name": "bob", "tags": []}, "score": 7}');
242
+
243
+ SELECT id,
244
+ json_extract(payload, '$.user.name') AS name,
245
+ json_extract(payload, '$.user.tags[0]') AS first_tag,
246
+ json_array_length(payload, '$.user.tags') AS tag_count,
247
+ json_type(payload, '$.score') AS score_type
248
+ FROM events
249
+ WHERE json_extract(payload, '$.user.name') = 'alice';
250
+ ```
251
+
219
252
  ### Type coercion in arithmetic
220
253
 
221
254
  - **Integer-only ops stay integer.** `1 + 2` → `3` (Integer).
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "sqlrite"
7
- version = "0.1.15"
7
+ version = "0.1.16"
8
8
  description = "Python bindings for SQLRite — a small, embeddable SQLite clone written in Rust."
9
9
  authors = [{ name = "Joao Henrique Machado Silva", email = "joaoh82@gmail.com" }]
10
10
  license = { text = "MIT" }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "sqlrite-python"
3
- version = "0.1.15"
3
+ version = "0.1.16"
4
4
  authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
5
5
  edition = "2024"
6
6
  rust-version = "1.85"
@@ -26,6 +26,16 @@ pub enum DataType {
26
26
  /// declared dimension; every value stored in the column must have
27
27
  /// exactly that many elements.
28
28
  Vector(usize),
29
+ /// Phase 7e — JSON column. Stored as canonical UTF-8 text (matches
30
+ /// SQLite's JSON1 extension), validated at INSERT time. The
31
+ /// `json_extract` family of functions parses on demand and returns
32
+ /// either a primitive `Value` (Integer / Real / Text / Bool / Null)
33
+ /// or a Text value carrying the JSON-encoded sub-object/array.
34
+ /// Q3 originally specified `bincoded serde_json::Value`, but bincode
35
+ /// was removed from the engine in Phase 3c — see the scope-correction
36
+ /// note in `docs/phase-7-plan.md` for the rationale on switching to
37
+ /// text storage.
38
+ Json,
29
39
  None,
30
40
  Invalid,
31
41
  }
@@ -44,6 +54,7 @@ impl DataType {
44
54
  "text" => DataType::Text,
45
55
  "real" => DataType::Real,
46
56
  "bool" => DataType::Bool,
57
+ "json" => DataType::Json,
47
58
  "none" => DataType::None,
48
59
  other if other.starts_with("vector(") && other.ends_with(')') => {
49
60
  // Strip the `vector(` prefix and trailing `)`, parse what's
@@ -77,6 +88,7 @@ impl DataType {
77
88
  DataType::Real => "Real".to_string(),
78
89
  DataType::Bool => "Bool".to_string(),
79
90
  DataType::Vector(dim) => format!("vector({dim})"),
91
+ DataType::Json => "Json".to_string(),
80
92
  DataType::None => "None".to_string(),
81
93
  DataType::Invalid => "Invalid".to_string(),
82
94
  }
@@ -91,6 +103,7 @@ impl fmt::Display for DataType {
91
103
  DataType::Real => f.write_str("Real"),
92
104
  DataType::Bool => f.write_str("Boolean"),
93
105
  DataType::Vector(dim) => write!(f, "Vector({dim})"),
106
+ DataType::Json => f.write_str("Json"),
94
107
  DataType::None => f.write_str("None"),
95
108
  DataType::Invalid => f.write_str("Invalid"),
96
109
  }
@@ -183,6 +196,12 @@ impl Table {
183
196
  // itself doesn't carry the dim — every stored Vec<f32>
184
197
  // already has it via .len().
185
198
  DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
199
+ // Phase 7e — JSON columns reuse Text storage (with
200
+ // INSERT-time validation that the bytes parse as JSON).
201
+ // No new Row variant; json_extract / json_type / etc.
202
+ // re-parse from text on demand. See `docs/phase-7-plan.md`
203
+ // Q3's scope-correction note for the storage choice.
204
+ DataType::Json => Row::Text(BTreeMap::new()),
186
205
  DataType::Invalid | DataType::None => Row::None,
187
206
  };
188
207
  table_rows
@@ -540,7 +559,16 @@ impl Table {
540
559
  (Row::Real(m), Value::Integer(v), _) => {
541
560
  m.insert(rowid, *v as f32);
542
561
  }
543
- (Row::Text(m), Value::Text(v), _) => {
562
+ (Row::Text(m), Value::Text(v), dt) => {
563
+ // Phase 7e — UPDATE on a JSON column also validates
564
+ // the new text is well-formed JSON, mirroring INSERT.
565
+ if matches!(dt, DataType::Json) {
566
+ if let Err(e) = serde_json::from_str::<serde_json::Value>(v) {
567
+ return Err(SQLRiteError::General(format!(
568
+ "Type mismatch: expected JSON for column '{column}', got '{v}': {e}"
569
+ )));
570
+ }
571
+ }
544
572
  m.insert(rowid, v.clone());
545
573
  }
546
574
  (Row::Bool(m), Value::Bool(v), _) => {
@@ -655,6 +683,14 @@ impl Table {
655
683
  }
656
684
  Value::Vector(parsed_vec)
657
685
  }
686
+ DataType::Json => {
687
+ // JSON values stored as Text. UNIQUE on a JSON column
688
+ // compares the canonical text representation
689
+ // verbatim — `{"a": 1}` and `{"a":1}` are distinct.
690
+ // Document this if anyone actually requests UNIQUE
691
+ // JSON; for MVP, treat-as-text is fine.
692
+ Value::Text(val.clone())
693
+ }
658
694
  DataType::None | DataType::Invalid => {
659
695
  return Err(SQLRiteError::Internal(format!(
660
696
  "column '{name}' has an unsupported datatype"
@@ -784,6 +820,19 @@ impl Table {
784
820
  Some(Value::Integer(parsed as i64))
785
821
  }
786
822
  Row::Text(tree) => {
823
+ // Phase 7e — JSON columns also reach here (they
824
+ // share Row::Text storage with TEXT columns).
825
+ // Validate the value parses as JSON before
826
+ // storing; otherwise we'd happily write
827
+ // `not-json-at-all` and only fail when
828
+ // json_extract tried to parse it later.
829
+ if matches!(self.columns[i].datatype, DataType::Json) && val != "Null" {
830
+ if let Err(e) = serde_json::from_str::<serde_json::Value>(&val) {
831
+ return Err(SQLRiteError::General(format!(
832
+ "Type mismatch: expected JSON for column '{key}', got '{val}': {e}"
833
+ )));
834
+ }
835
+ }
787
836
  tree.insert(next_rowid, val.to_string());
788
837
  // "Null" sentinel stays out of the index — it isn't a
789
838
  // real user value.
@@ -624,6 +624,7 @@ fn clone_datatype(dt: &DataType) -> DataType {
624
624
  DataType::Real => DataType::Real,
625
625
  DataType::Bool => DataType::Bool,
626
626
  DataType::Vector(dim) => DataType::Vector(*dim),
627
+ DataType::Json => DataType::Json,
627
628
  DataType::None => DataType::None,
628
629
  DataType::Invalid => DataType::Invalid,
629
630
  }
@@ -1201,12 +1202,303 @@ fn eval_function(func: &sqlparser::ast::Function, table: &Table, rowid: i64) ->
1201
1202
  // other reals via the existing arithmetic + comparison paths.
1202
1203
  Ok(Value::Real(dist as f64))
1203
1204
  }
1205
+ // Phase 7e — JSON functions. All four parse the JSON text on
1206
+ // demand (we don't cache parsed values), then resolve a path
1207
+ // (default `$` = root). The path resolver handles `.key` for
1208
+ // object access and `[N]` for array index. SQLite-style.
1209
+ "json_extract" => json_fn_extract(&name, &func.args, table, rowid),
1210
+ "json_type" => json_fn_type(&name, &func.args, table, rowid),
1211
+ "json_array_length" => json_fn_array_length(&name, &func.args, table, rowid),
1212
+ "json_object_keys" => json_fn_object_keys(&name, &func.args, table, rowid),
1204
1213
  other => Err(SQLRiteError::NotImplemented(format!(
1205
1214
  "unknown function: {other}(...)"
1206
1215
  ))),
1207
1216
  }
1208
1217
  }
1209
1218
 
1219
+ // -----------------------------------------------------------------
1220
+ // Phase 7e — JSON path-extraction functions
1221
+ // -----------------------------------------------------------------
1222
+
1223
+ /// Extracts the JSON-typed text + optional path string out of a
1224
+ /// function call's args. Used by all four json_* functions.
1225
+ ///
1226
+ /// Arity rules (matching SQLite JSON1):
1227
+ /// - 1 arg → JSON value, path defaults to `$` (root)
1228
+ /// - 2 args → (JSON value, path text)
1229
+ ///
1230
+ /// Returns `(json_text, path)` so caller can serde_json::from_str
1231
+ /// + walk_json_path on it.
1232
+ fn extract_json_and_path(
1233
+ fn_name: &str,
1234
+ args: &FunctionArguments,
1235
+ table: &Table,
1236
+ rowid: i64,
1237
+ ) -> Result<(String, String)> {
1238
+ let arg_list = match args {
1239
+ FunctionArguments::List(l) => &l.args,
1240
+ _ => {
1241
+ return Err(SQLRiteError::General(format!(
1242
+ "{fn_name}() expects 1 or 2 arguments"
1243
+ )));
1244
+ }
1245
+ };
1246
+ if !(arg_list.len() == 1 || arg_list.len() == 2) {
1247
+ return Err(SQLRiteError::General(format!(
1248
+ "{fn_name}() expects 1 or 2 arguments, got {}",
1249
+ arg_list.len()
1250
+ )));
1251
+ }
1252
+ // Evaluate first arg → must produce text.
1253
+ let first_expr = match &arg_list[0] {
1254
+ FunctionArg::Unnamed(FunctionArgExpr::Expr(e)) => e,
1255
+ other => {
1256
+ return Err(SQLRiteError::NotImplemented(format!(
1257
+ "{fn_name}() argument 0 has unsupported shape: {other:?}"
1258
+ )));
1259
+ }
1260
+ };
1261
+ let json_text = match eval_expr(first_expr, table, rowid)? {
1262
+ Value::Text(s) => s,
1263
+ Value::Null => {
1264
+ return Err(SQLRiteError::General(format!(
1265
+ "{fn_name}() called on NULL — JSON column has no value for this row"
1266
+ )));
1267
+ }
1268
+ other => {
1269
+ return Err(SQLRiteError::General(format!(
1270
+ "{fn_name}() argument 0 is not JSON-typed: got {}",
1271
+ other.to_display_string()
1272
+ )));
1273
+ }
1274
+ };
1275
+
1276
+ // Path defaults to root `$` when omitted.
1277
+ let path = if arg_list.len() == 2 {
1278
+ let path_expr = match &arg_list[1] {
1279
+ FunctionArg::Unnamed(FunctionArgExpr::Expr(e)) => e,
1280
+ other => {
1281
+ return Err(SQLRiteError::NotImplemented(format!(
1282
+ "{fn_name}() argument 1 has unsupported shape: {other:?}"
1283
+ )));
1284
+ }
1285
+ };
1286
+ match eval_expr(path_expr, table, rowid)? {
1287
+ Value::Text(s) => s,
1288
+ other => {
1289
+ return Err(SQLRiteError::General(format!(
1290
+ "{fn_name}() path argument must be a string literal, got {}",
1291
+ other.to_display_string()
1292
+ )));
1293
+ }
1294
+ }
1295
+ } else {
1296
+ "$".to_string()
1297
+ };
1298
+
1299
+ Ok((json_text, path))
1300
+ }
1301
+
1302
+ /// Walks a `serde_json::Value` along a JSONPath subset:
1303
+ /// - `$` is the root
1304
+ /// - `.key` for object access (key may not contain `.` or `[`)
1305
+ /// - `[N]` for array index (N a non-negative integer)
1306
+ /// - chains arbitrarily: `$.foo.bar[0].baz`
1307
+ ///
1308
+ /// Returns `Ok(None)` for "path didn't match anything" (NULL in SQL),
1309
+ /// `Err` for malformed paths. Matches SQLite JSON1's semantic
1310
+ /// distinction: missing-key = NULL, malformed-path = error.
1311
+ fn walk_json_path<'a>(
1312
+ value: &'a serde_json::Value,
1313
+ path: &str,
1314
+ ) -> Result<Option<&'a serde_json::Value>> {
1315
+ let mut chars = path.chars().peekable();
1316
+ if chars.next() != Some('$') {
1317
+ return Err(SQLRiteError::General(format!(
1318
+ "JSON path must start with '$', got `{path}`"
1319
+ )));
1320
+ }
1321
+ let mut current = value;
1322
+ while let Some(&c) = chars.peek() {
1323
+ match c {
1324
+ '.' => {
1325
+ chars.next();
1326
+ let mut key = String::new();
1327
+ while let Some(&c) = chars.peek() {
1328
+ if c == '.' || c == '[' {
1329
+ break;
1330
+ }
1331
+ key.push(c);
1332
+ chars.next();
1333
+ }
1334
+ if key.is_empty() {
1335
+ return Err(SQLRiteError::General(format!(
1336
+ "JSON path has empty key after '.' in `{path}`"
1337
+ )));
1338
+ }
1339
+ match current.get(&key) {
1340
+ Some(v) => current = v,
1341
+ None => return Ok(None),
1342
+ }
1343
+ }
1344
+ '[' => {
1345
+ chars.next();
1346
+ let mut idx_str = String::new();
1347
+ while let Some(&c) = chars.peek() {
1348
+ if c == ']' {
1349
+ break;
1350
+ }
1351
+ idx_str.push(c);
1352
+ chars.next();
1353
+ }
1354
+ if chars.next() != Some(']') {
1355
+ return Err(SQLRiteError::General(format!(
1356
+ "JSON path has unclosed `[` in `{path}`"
1357
+ )));
1358
+ }
1359
+ let idx: usize = idx_str.trim().parse().map_err(|_| {
1360
+ SQLRiteError::General(format!(
1361
+ "JSON path has non-integer index `[{idx_str}]` in `{path}`"
1362
+ ))
1363
+ })?;
1364
+ match current.get(idx) {
1365
+ Some(v) => current = v,
1366
+ None => return Ok(None),
1367
+ }
1368
+ }
1369
+ other => {
1370
+ return Err(SQLRiteError::General(format!(
1371
+ "JSON path has unexpected character `{other}` in `{path}` \
1372
+ (expected `.`, `[`, or end-of-path)"
1373
+ )));
1374
+ }
1375
+ }
1376
+ }
1377
+ Ok(Some(current))
1378
+ }
1379
+
1380
+ /// Converts a serde_json scalar to a SQLRite Value. For composite
1381
+ /// types (object, array) returns the JSON-encoded text — callers
1382
+ /// pattern-match on shape from the calling json_* function.
1383
+ fn json_value_to_sql(v: &serde_json::Value) -> Value {
1384
+ match v {
1385
+ serde_json::Value::Null => Value::Null,
1386
+ serde_json::Value::Bool(b) => Value::Bool(*b),
1387
+ serde_json::Value::Number(n) => {
1388
+ // Match SQLite: integer if it fits an i64, else f64.
1389
+ if let Some(i) = n.as_i64() {
1390
+ Value::Integer(i)
1391
+ } else if let Some(f) = n.as_f64() {
1392
+ Value::Real(f)
1393
+ } else {
1394
+ Value::Null
1395
+ }
1396
+ }
1397
+ serde_json::Value::String(s) => Value::Text(s.clone()),
1398
+ // Objects + arrays come out as JSON-encoded text. Same as
1399
+ // SQLite's json_extract: composite results round-trip through
1400
+ // text rather than being modeled as a richer Value type.
1401
+ composite => Value::Text(composite.to_string()),
1402
+ }
1403
+ }
1404
+
1405
+ fn json_fn_extract(
1406
+ name: &str,
1407
+ args: &FunctionArguments,
1408
+ table: &Table,
1409
+ rowid: i64,
1410
+ ) -> Result<Value> {
1411
+ let (json_text, path) = extract_json_and_path(name, args, table, rowid)?;
1412
+ let parsed: serde_json::Value = serde_json::from_str(&json_text).map_err(|e| {
1413
+ SQLRiteError::General(format!("{name}() got invalid JSON `{json_text}`: {e}"))
1414
+ })?;
1415
+ match walk_json_path(&parsed, &path)? {
1416
+ Some(v) => Ok(json_value_to_sql(v)),
1417
+ None => Ok(Value::Null),
1418
+ }
1419
+ }
1420
+
1421
+ fn json_fn_type(name: &str, args: &FunctionArguments, table: &Table, rowid: i64) -> Result<Value> {
1422
+ let (json_text, path) = extract_json_and_path(name, args, table, rowid)?;
1423
+ let parsed: serde_json::Value = serde_json::from_str(&json_text).map_err(|e| {
1424
+ SQLRiteError::General(format!("{name}() got invalid JSON `{json_text}`: {e}"))
1425
+ })?;
1426
+ let resolved = match walk_json_path(&parsed, &path)? {
1427
+ Some(v) => v,
1428
+ None => return Ok(Value::Null),
1429
+ };
1430
+ let ty = match resolved {
1431
+ serde_json::Value::Null => "null",
1432
+ serde_json::Value::Bool(true) => "true",
1433
+ serde_json::Value::Bool(false) => "false",
1434
+ serde_json::Value::Number(n) => {
1435
+ if n.is_i64() || n.is_u64() {
1436
+ "integer"
1437
+ } else {
1438
+ "real"
1439
+ }
1440
+ }
1441
+ serde_json::Value::String(_) => "text",
1442
+ serde_json::Value::Array(_) => "array",
1443
+ serde_json::Value::Object(_) => "object",
1444
+ };
1445
+ Ok(Value::Text(ty.to_string()))
1446
+ }
1447
+
1448
+ fn json_fn_array_length(
1449
+ name: &str,
1450
+ args: &FunctionArguments,
1451
+ table: &Table,
1452
+ rowid: i64,
1453
+ ) -> Result<Value> {
1454
+ let (json_text, path) = extract_json_and_path(name, args, table, rowid)?;
1455
+ let parsed: serde_json::Value = serde_json::from_str(&json_text).map_err(|e| {
1456
+ SQLRiteError::General(format!("{name}() got invalid JSON `{json_text}`: {e}"))
1457
+ })?;
1458
+ let resolved = match walk_json_path(&parsed, &path)? {
1459
+ Some(v) => v,
1460
+ None => return Ok(Value::Null),
1461
+ };
1462
+ match resolved.as_array() {
1463
+ Some(arr) => Ok(Value::Integer(arr.len() as i64)),
1464
+ None => Err(SQLRiteError::General(format!(
1465
+ "{name}() resolved to a non-array value at path `{path}`"
1466
+ ))),
1467
+ }
1468
+ }
1469
+
1470
+ fn json_fn_object_keys(
1471
+ name: &str,
1472
+ args: &FunctionArguments,
1473
+ table: &Table,
1474
+ rowid: i64,
1475
+ ) -> Result<Value> {
1476
+ let (json_text, path) = extract_json_and_path(name, args, table, rowid)?;
1477
+ let parsed: serde_json::Value = serde_json::from_str(&json_text).map_err(|e| {
1478
+ SQLRiteError::General(format!("{name}() got invalid JSON `{json_text}`: {e}"))
1479
+ })?;
1480
+ let resolved = match walk_json_path(&parsed, &path)? {
1481
+ Some(v) => v,
1482
+ None => return Ok(Value::Null),
1483
+ };
1484
+ let obj = resolved.as_object().ok_or_else(|| {
1485
+ SQLRiteError::General(format!(
1486
+ "{name}() resolved to a non-object value at path `{path}`"
1487
+ ))
1488
+ })?;
1489
+ // SQLite's json_object_keys is a table-valued function (one row
1490
+ // per key). Without set-returning function support we can't
1491
+ // reproduce that shape; instead return the keys as a JSON array
1492
+ // text. Caller can iterate via json_array_length + json_extract,
1493
+ // or just treat it as a serialized list. Document this divergence
1494
+ // in supported-sql.md.
1495
+ let keys: Vec<serde_json::Value> = obj
1496
+ .keys()
1497
+ .map(|k| serde_json::Value::String(k.clone()))
1498
+ .collect();
1499
+ Ok(Value::Text(serde_json::Value::Array(keys).to_string()))
1500
+ }
1501
+
1210
1502
  /// Extracts exactly two `Vec<f32>` arguments from a function call,
1211
1503
  /// validating arity and that both sides are Vector-typed with matching
1212
1504
  /// dimensions. Used by all three vec_distance_* functions.