sqlrite 0.1.10__tar.gz → 0.1.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlrite-0.1.10 → sqlrite-0.1.11}/Cargo.lock +5 -5
- {sqlrite-0.1.10 → sqlrite-0.1.11}/Cargo.toml +1 -1
- {sqlrite-0.1.10 → sqlrite-0.1.11}/PKG-INFO +1 -1
- {sqlrite-0.1.10 → sqlrite-0.1.11}/desktop/package.json +1 -1
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/phase-7-plan.md +12 -6
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/supported-sql.md +22 -2
- {sqlrite-0.1.10 → sqlrite-0.1.11}/pyproject.toml +1 -1
- {sqlrite-0.1.10 → sqlrite-0.1.11}/sdk/python/Cargo.toml +1 -1
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/executor.rs +311 -13
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/mod.rs +130 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/parser/select.rs +15 -14
- {sqlrite-0.1.10 → sqlrite-0.1.11}/.github/workflows/ci.yml +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/.github/workflows/release-pr.yml +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/.github/workflows/release.yml +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/.github/workflows/rust.yml +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/.gitignore +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/CODE_OF_CONDUCT.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/LICENSE +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/MAINTAINERS +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/Makefile +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/README.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/desktop/index.html +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/desktop/package-lock.json +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/desktop/src/App.svelte +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/desktop/src/app.css +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/desktop/src/main.ts +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/desktop/src/vite-env.d.ts +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/desktop/svelte.config.js +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/desktop/tsconfig.json +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/desktop/vite.config.ts +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/_index.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/architecture.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/design-decisions.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/desktop.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/embedding.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/file-format.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/getting-started.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/pager.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/release-plan.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/release-secrets.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/roadmap.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/smoke-test.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/sql-engine.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/storage-model.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/docs/usage.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/examples/README.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/examples/c/Makefile +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/examples/c/hello.c +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/examples/go/go.mod +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/examples/go/hello.go +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/examples/nodejs/hello.mjs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/examples/python/hello.py +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/examples/rust/quickstart.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/examples/wasm/Makefile +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/examples/wasm/index.html +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/images/SQLRite - Desktop.png +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/images/SQLRite Data Structures.png +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/images/SQLRite Simple SQL Execution High Level Diagram.png +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/images/SQLRite Simple SQL INSERT Execution High Level Diagram (Insert Row).png +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/images/SQLRite Simple SQL INSERT Execution High Level Diagram.png +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/images/SQLRite_logo.png +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/images/architecture.png +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/rust-toolchain.toml +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/samples/AST.delete.example +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/samples/AST.insert.exemple +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/samples/AST.select.example +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/samples/AST.update.example +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/samples/CREATE TABLE sqlrite_schema.sql +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/samples/CREATE_TABLE with duplicate.sql +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/samples/CREATE_TABLE.sql +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/samples/INSERT.sql +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/scripts/bump-version.sh +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/sdk/go/README.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/sdk/go/conn.go +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/sdk/go/go.mod +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/sdk/go/rows.go +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/sdk/go/sqlrite.go +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/sdk/go/sqlrite_test.go +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/sdk/go/stmt.go +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/sdk/python/README.md +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/sdk/python/src/lib.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/sdk/python/tests/test_sqlrite.py +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/connection.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/error.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/lib.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/main.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/meta_command/mod.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/repl/mod.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/db/database.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/db/mod.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/db/secondary_index.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/db/table.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/cell.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/file.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/header.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/index_cell.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/interior_page.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/mod.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/overflow.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/page.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/pager.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/table_page.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/varint.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/pager/wal.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/parser/create.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/parser/insert.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/parser/mod.rs +0 -0
- {sqlrite-0.1.10 → sqlrite-0.1.11}/src/sql/tokenizer.rs +0 -0
|
@@ -3736,7 +3736,7 @@ dependencies = [
|
|
|
3736
3736
|
|
|
3737
3737
|
[[package]]
|
|
3738
3738
|
name = "sqlrite-desktop"
|
|
3739
|
-
version = "0.1.
|
|
3739
|
+
version = "0.1.11"
|
|
3740
3740
|
dependencies = [
|
|
3741
3741
|
"serde",
|
|
3742
3742
|
"serde_json",
|
|
@@ -3748,7 +3748,7 @@ dependencies = [
|
|
|
3748
3748
|
|
|
3749
3749
|
[[package]]
|
|
3750
3750
|
name = "sqlrite-engine"
|
|
3751
|
-
version = "0.1.
|
|
3751
|
+
version = "0.1.11"
|
|
3752
3752
|
dependencies = [
|
|
3753
3753
|
"clap",
|
|
3754
3754
|
"env_logger",
|
|
@@ -3763,7 +3763,7 @@ dependencies = [
|
|
|
3763
3763
|
|
|
3764
3764
|
[[package]]
|
|
3765
3765
|
name = "sqlrite-ffi"
|
|
3766
|
-
version = "0.1.
|
|
3766
|
+
version = "0.1.11"
|
|
3767
3767
|
dependencies = [
|
|
3768
3768
|
"cbindgen",
|
|
3769
3769
|
"sqlrite-engine",
|
|
@@ -3771,7 +3771,7 @@ dependencies = [
|
|
|
3771
3771
|
|
|
3772
3772
|
[[package]]
|
|
3773
3773
|
name = "sqlrite-nodejs"
|
|
3774
|
-
version = "0.1.
|
|
3774
|
+
version = "0.1.11"
|
|
3775
3775
|
dependencies = [
|
|
3776
3776
|
"napi",
|
|
3777
3777
|
"napi-build",
|
|
@@ -3781,7 +3781,7 @@ dependencies = [
|
|
|
3781
3781
|
|
|
3782
3782
|
[[package]]
|
|
3783
3783
|
name = "sqlrite-python"
|
|
3784
|
-
version = "0.1.
|
|
3784
|
+
version = "0.1.11"
|
|
3785
3785
|
dependencies = [
|
|
3786
3786
|
"pyo3",
|
|
3787
3787
|
"sqlrite-engine",
|
|
@@ -27,7 +27,7 @@ resolver = "3"
|
|
|
27
27
|
# `package =` key so the import name stays `sqlrite` internally:
|
|
28
28
|
# sqlrite = { package = "sqlrite-engine", path = "…" }
|
|
29
29
|
name = "sqlrite-engine"
|
|
30
|
-
version = "0.1.
|
|
30
|
+
version = "0.1.11"
|
|
31
31
|
authors = ["Joao Henrique Machado Silva <joaoh82@gmail.com>"]
|
|
32
32
|
edition = "2024"
|
|
33
33
|
rust-version = "1.85"
|
|
@@ -106,12 +106,17 @@ SELECT id, title FROM docs ORDER BY embedding <-> [0.1, ...] LIMIT 10;
|
|
|
106
106
|
- `<=>` → `vec_distance_cosine`
|
|
107
107
|
- `<#>` → `vec_distance_dot`
|
|
108
108
|
|
|
109
|
+
> **Scope correction (2026-04-27, during 7b implementation):** Operators turned out to be a much bigger parser change than Q6 anticipated. sqlparser-rs (current pinned version) **fails outright** on `<->` and `<#>` ("Expected: an expression, found: ->"). Only `<=>` parses, as MySQL's `Spaceship` (null-safe equality). Supporting all three operators requires either a fork of sqlparser to extend the SQLite dialect, or a string-preprocessing pass that rewrites operators to function calls before handing SQL to the parser — neither is the "tiny parser change" Q6 estimated.
|
|
110
|
+
>
|
|
111
|
+
> **Decision:** ship 7b with **functions only**. Operators are deferred to a follow-up sub-phase **7b.1**. The KNN use case (`ORDER BY vec_distance_l2(col, [...]) LIMIT k`) still works — just verbose. When 7b.1 lands, queries can switch from function-call form to operator form without any other behavior change.
|
|
112
|
+
|
|
109
113
|
**Decisions:**
|
|
110
114
|
|
|
111
115
|
- **Dispatch in the existing expression evaluator.** No new function-registration framework — these are built-in functions like `||` is.
|
|
112
|
-
- **Operators land in
|
|
116
|
+
- **Operators land in 7b.1, not 7b.** See scope-correction note above.
|
|
117
|
+
- **`ORDER BY` widened to accept arbitrary expressions** as part of 7b. Pre-7b, the parser restricted ORDER BY to bare column refs; without expression support, KNN queries would have been impossible. New shape: `eval_expr` is called per-row to produce sort keys. This is a strict superset — `ORDER BY col` still works because `Expr::Identifier` takes the same path.
|
|
113
118
|
|
|
114
|
-
**LOC estimate:** ~250 lines.
|
|
119
|
+
**LOC estimate:** ~250 lines for the functions; another ~50 for the ORDER BY parser extension. Total ~300 LOC, slightly over Q-time estimate.
|
|
115
120
|
|
|
116
121
|
**Tests:** all three distance metrics against hand-computed values; operator parsing; KNN result ordering.
|
|
117
122
|
|
|
@@ -296,10 +301,11 @@ let rows = conn.execute(&resp.sql)?;
|
|
|
296
301
|
## Implementation order + dependencies
|
|
297
302
|
|
|
298
303
|
```
|
|
299
|
-
7a (VECTOR type)
|
|
300
|
-
└── 7b (
|
|
301
|
-
└──
|
|
302
|
-
|
|
304
|
+
7a (VECTOR type) — independent, foundational
|
|
305
|
+
└── 7b (distance functions) — needs 7a
|
|
306
|
+
└── 7b.1 (operators) — sugar over 7b; deferred from 7b per scope correction
|
|
307
|
+
└── 7c (KNN exec opt) — needs 7b (operators not required)
|
|
308
|
+
└── 7d (HNSW) — needs 7b/7c
|
|
303
309
|
|
|
304
310
|
7e (JSON) — independent, can interleave anywhere
|
|
305
311
|
|
|
@@ -125,7 +125,7 @@ FROM <table>
|
|
|
125
125
|
|
|
126
126
|
- **Projection**: `*` (all columns in declaration order) or a bare column list. Columns not declared on the table are rejected.
|
|
127
127
|
- **`WHERE`**: any [expression](#expressions). Evaluated per row; NULL-as-false in WHERE context (three-valued logic collapsed to two-valued for filtering).
|
|
128
|
-
- **`ORDER BY`**: single
|
|
128
|
+
- **`ORDER BY`**: single sort key, `ASC` (default) or `DESC`. The sort key can be a bare column reference OR any expression — including function calls — so KNN queries like `ORDER BY vec_distance_l2(embedding, [...]) LIMIT k` work end-to-end *(Phase 7b)*. Sort key types must match; mixing `INTEGER` and `TEXT` across rows under a single `ORDER BY` is a runtime error.
|
|
129
129
|
- **`LIMIT`**: non-negative integer literal. `LIMIT 0` is valid (returns zero rows).
|
|
130
130
|
|
|
131
131
|
### Index probing
|
|
@@ -140,7 +140,7 @@ The executor includes a tiny optimizer: if the `WHERE` is exactly `<indexed_col>
|
|
|
140
140
|
- **`DISTINCT`**
|
|
141
141
|
- **`LIKE`**, **`IN`**, **`IS NULL`** / **`IS NOT NULL`**, `BETWEEN`
|
|
142
142
|
- **Expressions in the projection list** (`SELECT age + 1 FROM users`) — projection is bare column references only
|
|
143
|
-
- **Multi-column `ORDER BY`**, `NULLS FIRST/LAST`
|
|
143
|
+
- **Multi-column `ORDER BY`**, `NULLS FIRST/LAST` (single sort key only; the sort key itself can be an expression as of Phase 7b)
|
|
144
144
|
- **`OFFSET`**
|
|
145
145
|
- **Column aliases** (`SELECT name AS n FROM users`)
|
|
146
146
|
|
|
@@ -196,6 +196,26 @@ Expressions work inside `WHERE` (both in `SELECT`, `UPDATE`, `DELETE`) and on th
|
|
|
196
196
|
|
|
197
197
|
Same set accepted by `INSERT` (see [Value literals accepted](#value-literals-accepted)).
|
|
198
198
|
|
|
199
|
+
### Built-in functions
|
|
200
|
+
|
|
201
|
+
| Function | Returns | Notes |
|
|
202
|
+
|---|---|---|
|
|
203
|
+
| `vec_distance_l2(a, b)` | Real (f64) | Euclidean distance √Σ(aᵢ−bᵢ)². Smaller is closer. *(Phase 7b)* |
|
|
204
|
+
| `vec_distance_cosine(a, b)` | Real (f64) | Cosine distance `1 − (a·b) / (‖a‖·‖b‖)`. Errors on zero-magnitude vectors (cosine is undefined). Smaller is closer; identical vectors return 0.0, orthogonal vectors return 1.0. *(Phase 7b)* |
|
|
205
|
+
| `vec_distance_dot(a, b)` | Real (f64) | Negated dot product `−(a·b)`. Negation makes "smaller is closer" consistent with the others. For unit-norm vectors equals `vec_distance_cosine(a, b) - 1`. *(Phase 7b)* |
|
|
206
|
+
|
|
207
|
+
All three vector-distance functions take exactly two arguments, both of which must be vectors of the same dimension. Either argument can be a column reference (`embedding`), a bracket-array literal (`[0.1, 0.2, 0.3]`), or any sub-expression that evaluates to a vector. Mismatched dimensions error with `vector dimensions don't match (lhs=N, rhs=M)`.
|
|
208
|
+
|
|
209
|
+
The KNN ranking pattern that motivates this set:
|
|
210
|
+
|
|
211
|
+
```sql
|
|
212
|
+
SELECT id, title FROM docs
|
|
213
|
+
ORDER BY vec_distance_l2(embedding, [0.1, 0.2, ..., 0.0])
|
|
214
|
+
LIMIT 10;
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
> **Operator forms (`<->` `<=>` `<#>`) are not supported yet.** They're the de facto pgvector convention but blocked on a sqlparser limitation — will land as a Phase 7b.1 follow-up. Use the function-call form for now.
|
|
218
|
+
|
|
199
219
|
### Type coercion in arithmetic
|
|
200
220
|
|
|
201
221
|
- **Integer-only ops stay integer.** `1 + 2` → `3` (Integer).
|
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sqlrite"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.11"
|
|
8
8
|
description = "Python bindings for SQLRite — a small, embeddable SQLite clone written in Rust."
|
|
9
9
|
authors = [{ name = "Joao Henrique Machado Silva", email = "joaoh82@gmail.com" }]
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -5,14 +5,15 @@ use std::cmp::Ordering;
|
|
|
5
5
|
|
|
6
6
|
use prettytable::{Cell as PrintCell, Row as PrintRow, Table as PrintTable};
|
|
7
7
|
use sqlparser::ast::{
|
|
8
|
-
AssignmentTarget, BinaryOperator, CreateIndex, Delete, Expr, FromTable,
|
|
9
|
-
|
|
8
|
+
AssignmentTarget, BinaryOperator, CreateIndex, Delete, Expr, FromTable, FunctionArg,
|
|
9
|
+
FunctionArgExpr, FunctionArguments, ObjectNamePart, Statement, TableFactor, TableWithJoins,
|
|
10
|
+
UnaryOperator, Update,
|
|
10
11
|
};
|
|
11
12
|
|
|
12
13
|
use crate::error::{Result, SQLRiteError};
|
|
13
14
|
use crate::sql::db::database::Database;
|
|
14
15
|
use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
|
|
15
|
-
use crate::sql::db::table::{DataType, Table, Value};
|
|
16
|
+
use crate::sql::db::table::{DataType, Table, Value, parse_vector_literal};
|
|
16
17
|
use crate::sql::parser::select::{OrderByClause, Projection, SelectQuery};
|
|
17
18
|
|
|
18
19
|
/// Executes a parsed `SelectQuery` against the database and returns a
|
|
@@ -500,18 +501,42 @@ fn try_extract_equality(expr: &Expr) -> Option<(String, sqlparser::ast::Value)>
|
|
|
500
501
|
}
|
|
501
502
|
|
|
502
503
|
fn sort_rowids(rowids: &mut [i64], table: &Table, order: &OrderByClause) -> Result<()> {
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
504
|
+
// Phase 7b: ORDER BY now accepts any expression (column ref,
|
|
505
|
+
// arithmetic, function call, …). Pre-compute the sort key for
|
|
506
|
+
// every rowid up front so the comparator is called O(N log N)
|
|
507
|
+
// times against pre-evaluated Values rather than re-evaluating
|
|
508
|
+
// the expression O(N log N) times. Not strictly necessary today,
|
|
509
|
+
// but vital once 7d's HNSW index lands and this same code path
|
|
510
|
+
// could be running tens of millions of distance computations.
|
|
511
|
+
let mut keys: Vec<(i64, Result<Value>)> = rowids
|
|
512
|
+
.iter()
|
|
513
|
+
.map(|r| (*r, eval_expr(&order.expr, table, *r)))
|
|
514
|
+
.collect();
|
|
515
|
+
|
|
516
|
+
// Surface the FIRST evaluation error if any. We could be lazy
|
|
517
|
+
// and let sort_by encounter it, but `Ord::cmp` can't return a
|
|
518
|
+
// Result and we'd have to swallow errors silently.
|
|
519
|
+
for (_, k) in &keys {
|
|
520
|
+
if let Err(e) = k {
|
|
521
|
+
return Err(SQLRiteError::General(format!(
|
|
522
|
+
"ORDER BY expression failed: {e}"
|
|
523
|
+
)));
|
|
524
|
+
}
|
|
508
525
|
}
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
526
|
+
|
|
527
|
+
keys.sort_by(|(_, ka), (_, kb)| {
|
|
528
|
+
// Both unwrap()s are safe — we just verified above that
|
|
529
|
+
// every key Result is Ok.
|
|
530
|
+
let va = ka.as_ref().unwrap();
|
|
531
|
+
let vb = kb.as_ref().unwrap();
|
|
532
|
+
let ord = compare_values(Some(va), Some(vb));
|
|
513
533
|
if order.ascending { ord } else { ord.reverse() }
|
|
514
534
|
});
|
|
535
|
+
|
|
536
|
+
// Write the sorted rowids back into the caller's slice.
|
|
537
|
+
for (i, (rowid, _)) in keys.into_iter().enumerate() {
|
|
538
|
+
rowids[i] = rowid;
|
|
539
|
+
}
|
|
515
540
|
Ok(())
|
|
516
541
|
}
|
|
517
542
|
|
|
@@ -558,7 +583,23 @@ fn eval_expr(expr: &Expr, table: &Table, rowid: i64) -> Result<Value> {
|
|
|
558
583
|
match expr {
|
|
559
584
|
Expr::Nested(inner) => eval_expr(inner, table, rowid),
|
|
560
585
|
|
|
561
|
-
Expr::Identifier(ident) =>
|
|
586
|
+
Expr::Identifier(ident) => {
|
|
587
|
+
// Phase 7b — sqlparser parses bracket-array literals like
|
|
588
|
+
// `[0.1, 0.2, 0.3]` as bracket-quoted identifiers (it inherits
|
|
589
|
+
// MSSQL `[name]` syntax). When we see `quote_style == Some('[')`
|
|
590
|
+
// in expression-evaluation position (SELECT projection, WHERE,
|
|
591
|
+
// ORDER BY, function args), parse the bracketed content as a
|
|
592
|
+
// vector literal so the rest of the executor can compare /
|
|
593
|
+
// distance-compute against it. Same trick the INSERT parser
|
|
594
|
+
// uses; the executor needed its own copy because expression
|
|
595
|
+
// eval runs on a different code path.
|
|
596
|
+
if ident.quote_style == Some('[') {
|
|
597
|
+
let raw = format!("[{}]", ident.value);
|
|
598
|
+
let v = parse_vector_literal(&raw)?;
|
|
599
|
+
return Ok(Value::Vector(v));
|
|
600
|
+
}
|
|
601
|
+
Ok(table.get_value(&ident.value, rowid).unwrap_or(Value::Null))
|
|
602
|
+
}
|
|
562
603
|
|
|
563
604
|
Expr::CompoundIdentifier(parts) => {
|
|
564
605
|
// Accept `table.col` — we only have one table in scope, so ignore the qualifier.
|
|
@@ -659,12 +700,171 @@ fn eval_expr(expr: &Expr, table: &Table, rowid: i64) -> Result<Value> {
|
|
|
659
700
|
))),
|
|
660
701
|
},
|
|
661
702
|
|
|
703
|
+
// Phase 7b — function-call dispatch. Currently only the three
|
|
704
|
+
// vector-distance functions; this match arm becomes the single
|
|
705
|
+
// place to register more SQL functions later (e.g. abs(),
|
|
706
|
+
// length(), …) without re-touching the rest of the executor.
|
|
707
|
+
//
|
|
708
|
+
// Operator forms (`<->` `<=>` `<#>`) are NOT plumbed here: two
|
|
709
|
+
// of three don't parse natively in sqlparser (we'd need a
|
|
710
|
+
// string-preprocessing pass or a sqlparser fork). Deferred to
|
|
711
|
+
// a follow-up sub-phase; see docs/phase-7-plan.md's "Scope
|
|
712
|
+
// corrections" note.
|
|
713
|
+
Expr::Function(func) => eval_function(func, table, rowid),
|
|
714
|
+
|
|
662
715
|
other => Err(SQLRiteError::NotImplemented(format!(
|
|
663
716
|
"unsupported expression in WHERE/projection: {other:?}"
|
|
664
717
|
))),
|
|
665
718
|
}
|
|
666
719
|
}
|
|
667
720
|
|
|
721
|
+
/// Dispatches an `Expr::Function` to its built-in implementation.
|
|
722
|
+
/// Currently only the three vec_distance_* functions; other functions
|
|
723
|
+
/// surface as `NotImplemented` errors with the function name in the
|
|
724
|
+
/// message so users see what they tried.
|
|
725
|
+
fn eval_function(func: &sqlparser::ast::Function, table: &Table, rowid: i64) -> Result<Value> {
|
|
726
|
+
// Function name lives in `name.0[0]` for unqualified calls. Anything
|
|
727
|
+
// qualified (e.g. `pkg.fn(...)`) falls through to NotImplemented.
|
|
728
|
+
let name = match func.name.0.as_slice() {
|
|
729
|
+
[ObjectNamePart::Identifier(ident)] => ident.value.to_lowercase(),
|
|
730
|
+
_ => {
|
|
731
|
+
return Err(SQLRiteError::NotImplemented(format!(
|
|
732
|
+
"qualified function names not supported: {:?}",
|
|
733
|
+
func.name
|
|
734
|
+
)));
|
|
735
|
+
}
|
|
736
|
+
};
|
|
737
|
+
|
|
738
|
+
match name.as_str() {
|
|
739
|
+
"vec_distance_l2" | "vec_distance_cosine" | "vec_distance_dot" => {
|
|
740
|
+
let (a, b) = extract_two_vector_args(&name, &func.args, table, rowid)?;
|
|
741
|
+
let dist = match name.as_str() {
|
|
742
|
+
"vec_distance_l2" => vec_distance_l2(&a, &b),
|
|
743
|
+
"vec_distance_cosine" => vec_distance_cosine(&a, &b)?,
|
|
744
|
+
"vec_distance_dot" => vec_distance_dot(&a, &b),
|
|
745
|
+
_ => unreachable!(),
|
|
746
|
+
};
|
|
747
|
+
// Widen f32 → f64 for the runtime Value. Vectors are stored
|
|
748
|
+
// as f32 (consistent with industry convention for embeddings),
|
|
749
|
+
// but the executor's numeric type is f64 so distances slot
|
|
750
|
+
// into Value::Real cleanly and can be compared / ordered with
|
|
751
|
+
// other reals via the existing arithmetic + comparison paths.
|
|
752
|
+
Ok(Value::Real(dist as f64))
|
|
753
|
+
}
|
|
754
|
+
other => Err(SQLRiteError::NotImplemented(format!(
|
|
755
|
+
"unknown function: {other}(...)"
|
|
756
|
+
))),
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
/// Extracts exactly two `Vec<f32>` arguments from a function call,
|
|
761
|
+
/// validating arity and that both sides are Vector-typed with matching
|
|
762
|
+
/// dimensions. Used by all three vec_distance_* functions.
|
|
763
|
+
fn extract_two_vector_args(
|
|
764
|
+
fn_name: &str,
|
|
765
|
+
args: &FunctionArguments,
|
|
766
|
+
table: &Table,
|
|
767
|
+
rowid: i64,
|
|
768
|
+
) -> Result<(Vec<f32>, Vec<f32>)> {
|
|
769
|
+
let arg_list = match args {
|
|
770
|
+
FunctionArguments::List(l) => &l.args,
|
|
771
|
+
_ => {
|
|
772
|
+
return Err(SQLRiteError::General(format!(
|
|
773
|
+
"{fn_name}() expects exactly two vector arguments"
|
|
774
|
+
)));
|
|
775
|
+
}
|
|
776
|
+
};
|
|
777
|
+
if arg_list.len() != 2 {
|
|
778
|
+
return Err(SQLRiteError::General(format!(
|
|
779
|
+
"{fn_name}() expects exactly 2 arguments, got {}",
|
|
780
|
+
arg_list.len()
|
|
781
|
+
)));
|
|
782
|
+
}
|
|
783
|
+
let mut out: Vec<Vec<f32>> = Vec::with_capacity(2);
|
|
784
|
+
for (i, arg) in arg_list.iter().enumerate() {
|
|
785
|
+
let expr = match arg {
|
|
786
|
+
FunctionArg::Unnamed(FunctionArgExpr::Expr(e)) => e,
|
|
787
|
+
other => {
|
|
788
|
+
return Err(SQLRiteError::NotImplemented(format!(
|
|
789
|
+
"{fn_name}() argument {i} has unsupported shape: {other:?}"
|
|
790
|
+
)));
|
|
791
|
+
}
|
|
792
|
+
};
|
|
793
|
+
let val = eval_expr(expr, table, rowid)?;
|
|
794
|
+
match val {
|
|
795
|
+
Value::Vector(v) => out.push(v),
|
|
796
|
+
other => {
|
|
797
|
+
return Err(SQLRiteError::General(format!(
|
|
798
|
+
"{fn_name}() argument {i} is not a vector: got {}",
|
|
799
|
+
other.to_display_string()
|
|
800
|
+
)));
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
let b = out.pop().unwrap();
|
|
805
|
+
let a = out.pop().unwrap();
|
|
806
|
+
if a.len() != b.len() {
|
|
807
|
+
return Err(SQLRiteError::General(format!(
|
|
808
|
+
"{fn_name}(): vector dimensions don't match (lhs={}, rhs={})",
|
|
809
|
+
a.len(),
|
|
810
|
+
b.len()
|
|
811
|
+
)));
|
|
812
|
+
}
|
|
813
|
+
Ok((a, b))
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
/// Euclidean (L2) distance: √Σ(aᵢ − bᵢ)².
|
|
817
|
+
/// Smaller-is-closer; identical vectors return 0.0.
|
|
818
|
+
pub(crate) fn vec_distance_l2(a: &[f32], b: &[f32]) -> f32 {
|
|
819
|
+
debug_assert_eq!(a.len(), b.len());
|
|
820
|
+
let mut sum = 0.0f32;
|
|
821
|
+
for i in 0..a.len() {
|
|
822
|
+
let d = a[i] - b[i];
|
|
823
|
+
sum += d * d;
|
|
824
|
+
}
|
|
825
|
+
sum.sqrt()
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
/// Cosine distance: 1 − (a·b) / (‖a‖·‖b‖).
|
|
829
|
+
/// Smaller-is-closer; identical (non-zero) vectors return 0.0,
|
|
830
|
+
/// orthogonal vectors return 1.0, opposite-direction vectors return 2.0.
|
|
831
|
+
///
|
|
832
|
+
/// Errors if either vector has zero magnitude — cosine similarity is
|
|
833
|
+
/// undefined for the zero vector and silently returning NaN would
|
|
834
|
+
/// poison `ORDER BY` ranking. Callers who want the silent-NaN
|
|
835
|
+
/// behavior can compute `vec_distance_dot(a, b) / (norm(a) * norm(b))`
|
|
836
|
+
/// themselves.
|
|
837
|
+
pub(crate) fn vec_distance_cosine(a: &[f32], b: &[f32]) -> Result<f32> {
|
|
838
|
+
debug_assert_eq!(a.len(), b.len());
|
|
839
|
+
let mut dot = 0.0f32;
|
|
840
|
+
let mut norm_a_sq = 0.0f32;
|
|
841
|
+
let mut norm_b_sq = 0.0f32;
|
|
842
|
+
for i in 0..a.len() {
|
|
843
|
+
dot += a[i] * b[i];
|
|
844
|
+
norm_a_sq += a[i] * a[i];
|
|
845
|
+
norm_b_sq += b[i] * b[i];
|
|
846
|
+
}
|
|
847
|
+
let denom = (norm_a_sq * norm_b_sq).sqrt();
|
|
848
|
+
if denom == 0.0 {
|
|
849
|
+
return Err(SQLRiteError::General(
|
|
850
|
+
"vec_distance_cosine() is undefined for zero-magnitude vectors".to_string(),
|
|
851
|
+
));
|
|
852
|
+
}
|
|
853
|
+
Ok(1.0 - dot / denom)
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
/// Negated dot product: −(a·b).
|
|
857
|
+
/// pgvector convention — negated so smaller-is-closer like L2 / cosine.
|
|
858
|
+
/// For unit-norm vectors `vec_distance_dot(a, b) == vec_distance_cosine(a, b) - 1`.
|
|
859
|
+
pub(crate) fn vec_distance_dot(a: &[f32], b: &[f32]) -> f32 {
|
|
860
|
+
debug_assert_eq!(a.len(), b.len());
|
|
861
|
+
let mut dot = 0.0f32;
|
|
862
|
+
for i in 0..a.len() {
|
|
863
|
+
dot += a[i] * b[i];
|
|
864
|
+
}
|
|
865
|
+
-dot
|
|
866
|
+
}
|
|
867
|
+
|
|
668
868
|
/// Evaluates an integer/real arithmetic op. NULL on either side propagates.
|
|
669
869
|
/// Mixed Integer/Real promotes to Real. Divide/Modulo by zero → error.
|
|
670
870
|
fn eval_arith(op: &BinaryOperator, l: &Value, r: &Value) -> Result<Value> {
|
|
@@ -766,3 +966,101 @@ fn convert_literal(v: &sqlparser::ast::Value) -> Result<Value> {
|
|
|
766
966
|
))),
|
|
767
967
|
}
|
|
768
968
|
}
|
|
969
|
+
|
|
970
|
+
#[cfg(test)]
|
|
971
|
+
mod tests {
|
|
972
|
+
use super::*;
|
|
973
|
+
|
|
974
|
+
// -----------------------------------------------------------------
|
|
975
|
+
// Phase 7b — Vector distance function math
|
|
976
|
+
// -----------------------------------------------------------------
|
|
977
|
+
|
|
978
|
+
/// Float comparison helper — distance results need a small epsilon
|
|
979
|
+
/// because we accumulate sums across many f32 multiplies.
|
|
980
|
+
fn approx_eq(a: f32, b: f32, eps: f32) -> bool {
|
|
981
|
+
(a - b).abs() < eps
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
#[test]
|
|
985
|
+
fn vec_distance_l2_identical_is_zero() {
|
|
986
|
+
let v = vec![0.1, 0.2, 0.3];
|
|
987
|
+
assert_eq!(vec_distance_l2(&v, &v), 0.0);
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
#[test]
|
|
991
|
+
fn vec_distance_l2_unit_basis_is_sqrt2() {
|
|
992
|
+
// [1, 0] vs [0, 1]: distance = √((1-0)² + (0-1)²) = √2 ≈ 1.414
|
|
993
|
+
let a = vec![1.0, 0.0];
|
|
994
|
+
let b = vec![0.0, 1.0];
|
|
995
|
+
assert!(approx_eq(vec_distance_l2(&a, &b), 2.0_f32.sqrt(), 1e-6));
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
#[test]
|
|
999
|
+
fn vec_distance_l2_known_value() {
|
|
1000
|
+
// [0, 0, 0] vs [3, 4, 0]: √(9 + 16 + 0) = 5 (the classic 3-4-5 triangle).
|
|
1001
|
+
let a = vec![0.0, 0.0, 0.0];
|
|
1002
|
+
let b = vec![3.0, 4.0, 0.0];
|
|
1003
|
+
assert!(approx_eq(vec_distance_l2(&a, &b), 5.0, 1e-6));
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
#[test]
|
|
1007
|
+
fn vec_distance_cosine_identical_is_zero() {
|
|
1008
|
+
let v = vec![0.1, 0.2, 0.3];
|
|
1009
|
+
let d = vec_distance_cosine(&v, &v).unwrap();
|
|
1010
|
+
assert!(approx_eq(d, 0.0, 1e-6), "cos(v,v) = {d}, expected ≈ 0");
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
#[test]
|
|
1014
|
+
fn vec_distance_cosine_orthogonal_is_one() {
|
|
1015
|
+
// Two orthogonal unit vectors should have cosine distance = 1.0
|
|
1016
|
+
// (cosine similarity = 0 → distance = 1 - 0 = 1).
|
|
1017
|
+
let a = vec![1.0, 0.0];
|
|
1018
|
+
let b = vec![0.0, 1.0];
|
|
1019
|
+
assert!(approx_eq(vec_distance_cosine(&a, &b).unwrap(), 1.0, 1e-6));
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
#[test]
|
|
1023
|
+
fn vec_distance_cosine_opposite_is_two() {
|
|
1024
|
+
// a and -a have cosine similarity = -1 → distance = 1 - (-1) = 2.
|
|
1025
|
+
let a = vec![1.0, 0.0, 0.0];
|
|
1026
|
+
let b = vec![-1.0, 0.0, 0.0];
|
|
1027
|
+
assert!(approx_eq(vec_distance_cosine(&a, &b).unwrap(), 2.0, 1e-6));
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
#[test]
|
|
1031
|
+
fn vec_distance_cosine_zero_magnitude_errors() {
|
|
1032
|
+
// Cosine is undefined for the zero vector — error rather than NaN.
|
|
1033
|
+
let a = vec![0.0, 0.0];
|
|
1034
|
+
let b = vec![1.0, 0.0];
|
|
1035
|
+
let err = vec_distance_cosine(&a, &b).unwrap_err();
|
|
1036
|
+
assert!(format!("{err}").contains("zero-magnitude"));
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
#[test]
|
|
1040
|
+
fn vec_distance_dot_negates() {
|
|
1041
|
+
// a·b = 1*4 + 2*5 + 3*6 = 32. Negated → -32.
|
|
1042
|
+
let a = vec![1.0, 2.0, 3.0];
|
|
1043
|
+
let b = vec![4.0, 5.0, 6.0];
|
|
1044
|
+
assert!(approx_eq(vec_distance_dot(&a, &b), -32.0, 1e-6));
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
#[test]
|
|
1048
|
+
fn vec_distance_dot_orthogonal_is_zero() {
|
|
1049
|
+
// Orthogonal vectors have dot product 0 → negated is also 0.
|
|
1050
|
+
let a = vec![1.0, 0.0];
|
|
1051
|
+
let b = vec![0.0, 1.0];
|
|
1052
|
+
assert_eq!(vec_distance_dot(&a, &b), 0.0);
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
#[test]
|
|
1056
|
+
fn vec_distance_dot_unit_norm_matches_cosine_minus_one() {
|
|
1057
|
+
// For unit-norm vectors: dot(a,b) = cos(a,b)
|
|
1058
|
+
// → -dot(a,b) = -cos(a,b) = (1 - cos(a,b)) - 1 = vec_distance_cosine(a,b) - 1.
|
|
1059
|
+
// Useful sanity check that the two functions agree on unit vectors.
|
|
1060
|
+
let a = vec![0.6f32, 0.8]; // unit norm: √(0.36+0.64) = 1
|
|
1061
|
+
let b = vec![0.8f32, 0.6]; // unit norm too
|
|
1062
|
+
let dot = vec_distance_dot(&a, &b);
|
|
1063
|
+
let cos = vec_distance_cosine(&a, &b).unwrap();
|
|
1064
|
+
assert!(approx_eq(dot, cos - 1.0, 1e-5));
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
@@ -1269,4 +1269,134 @@ mod tests {
|
|
|
1269
1269
|
assert_eq!(vectors[1], vec![1.0f32, 2.0]);
|
|
1270
1270
|
assert_eq!(vectors[2], vec![2.0f32, 3.0]);
|
|
1271
1271
|
}
|
|
1272
|
+
|
|
1273
|
+
// -----------------------------------------------------------------
|
|
1274
|
+
// Phase 7b — vector distance functions through process_command
|
|
1275
|
+
// -----------------------------------------------------------------
|
|
1276
|
+
|
|
1277
|
+
/// Builds a 3-row docs table with 2-dim vectors aligned along the
|
|
1278
|
+
/// axes so the expected distances are easy to reason about:
|
|
1279
|
+
/// id=1: [1, 0]
|
|
1280
|
+
/// id=2: [0, 1]
|
|
1281
|
+
/// id=3: [1, 1]
|
|
1282
|
+
fn seed_vector_docs() -> Database {
|
|
1283
|
+
let mut db = Database::new("tempdb".to_string());
|
|
1284
|
+
process_command(
|
|
1285
|
+
"CREATE TABLE docs (id INTEGER PRIMARY KEY, e VECTOR(2));",
|
|
1286
|
+
&mut db,
|
|
1287
|
+
)
|
|
1288
|
+
.expect("create");
|
|
1289
|
+
process_command("INSERT INTO docs (e) VALUES ([1.0, 0.0]);", &mut db).expect("insert 1");
|
|
1290
|
+
process_command("INSERT INTO docs (e) VALUES ([0.0, 1.0]);", &mut db).expect("insert 2");
|
|
1291
|
+
process_command("INSERT INTO docs (e) VALUES ([1.0, 1.0]);", &mut db).expect("insert 3");
|
|
1292
|
+
db
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
#[test]
|
|
1296
|
+
fn vec_distance_l2_in_where_filters_correctly() {
|
|
1297
|
+
// Distance from [1,0]:
|
|
1298
|
+
// id=1 [1,0]: 0
|
|
1299
|
+
// id=2 [0,1]: √2 ≈ 1.414
|
|
1300
|
+
// id=3 [1,1]: 1
|
|
1301
|
+
// WHERE distance < 1.1 should match id=1 and id=3 (2 rows).
|
|
1302
|
+
let mut db = seed_vector_docs();
|
|
1303
|
+
let resp = process_command(
|
|
1304
|
+
"SELECT * FROM docs WHERE vec_distance_l2(e, [1.0, 0.0]) < 1.1;",
|
|
1305
|
+
&mut db,
|
|
1306
|
+
)
|
|
1307
|
+
.expect("select");
|
|
1308
|
+
assert!(
|
|
1309
|
+
resp.contains("2 rows returned"),
|
|
1310
|
+
"expected 2 rows, got: {resp}"
|
|
1311
|
+
);
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
#[test]
|
|
1315
|
+
fn vec_distance_cosine_in_where() {
|
|
1316
|
+
// [1,0] vs [1,0]: cosine distance = 0
|
|
1317
|
+
// [1,0] vs [0,1]: cosine distance = 1 (orthogonal)
|
|
1318
|
+
// [1,0] vs [1,1]: cosine distance = 1 - 1/√2 ≈ 0.293
|
|
1319
|
+
// WHERE distance < 0.5 → id=1 and id=3 (2 rows).
|
|
1320
|
+
let mut db = seed_vector_docs();
|
|
1321
|
+
let resp = process_command(
|
|
1322
|
+
"SELECT * FROM docs WHERE vec_distance_cosine(e, [1.0, 0.0]) < 0.5;",
|
|
1323
|
+
&mut db,
|
|
1324
|
+
)
|
|
1325
|
+
.expect("select");
|
|
1326
|
+
assert!(
|
|
1327
|
+
resp.contains("2 rows returned"),
|
|
1328
|
+
"expected 2 rows, got: {resp}"
|
|
1329
|
+
);
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
#[test]
|
|
1333
|
+
fn vec_distance_dot_negated() {
|
|
1334
|
+
// [1,0]·[1,0] = 1 → -1
|
|
1335
|
+
// [1,0]·[0,1] = 0 → 0
|
|
1336
|
+
// [1,0]·[1,1] = 1 → -1
|
|
1337
|
+
// WHERE -dot < 0 (i.e. dot > 0) → id=1 and id=3 (2 rows).
|
|
1338
|
+
let mut db = seed_vector_docs();
|
|
1339
|
+
let resp = process_command(
|
|
1340
|
+
"SELECT * FROM docs WHERE vec_distance_dot(e, [1.0, 0.0]) < 0.0;",
|
|
1341
|
+
&mut db,
|
|
1342
|
+
)
|
|
1343
|
+
.expect("select");
|
|
1344
|
+
assert!(
|
|
1345
|
+
resp.contains("2 rows returned"),
|
|
1346
|
+
"expected 2 rows, got: {resp}"
|
|
1347
|
+
);
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
#[test]
|
|
1351
|
+
fn knn_via_order_by_distance_limit() {
|
|
1352
|
+
// Classic KNN shape: ORDER BY distance LIMIT k.
|
|
1353
|
+
// Distances from [1,0]: id=1=0, id=3=1, id=2=√2.
|
|
1354
|
+
// LIMIT 2 should return id=1 then id=3 in that order.
|
|
1355
|
+
let mut db = seed_vector_docs();
|
|
1356
|
+
let resp = process_command(
|
|
1357
|
+
"SELECT id FROM docs ORDER BY vec_distance_l2(e, [1.0, 0.0]) ASC LIMIT 2;",
|
|
1358
|
+
&mut db,
|
|
1359
|
+
)
|
|
1360
|
+
.expect("select");
|
|
1361
|
+
assert!(
|
|
1362
|
+
resp.contains("2 rows returned"),
|
|
1363
|
+
"expected 2 rows, got: {resp}"
|
|
1364
|
+
);
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
#[test]
|
|
1368
|
+
fn distance_function_dim_mismatch_errors() {
|
|
1369
|
+
// 2-dim column queried with a 3-dim probe → clean error.
|
|
1370
|
+
let mut db = seed_vector_docs();
|
|
1371
|
+
let err = process_command(
|
|
1372
|
+
"SELECT * FROM docs WHERE vec_distance_l2(e, [1.0, 0.0, 0.0]) < 1.0;",
|
|
1373
|
+
&mut db,
|
|
1374
|
+
)
|
|
1375
|
+
.unwrap_err();
|
|
1376
|
+
let msg = format!("{err}");
|
|
1377
|
+
assert!(
|
|
1378
|
+
msg.to_lowercase().contains("dimension")
|
|
1379
|
+
&& msg.contains("lhs=2")
|
|
1380
|
+
&& msg.contains("rhs=3"),
|
|
1381
|
+
"expected dim mismatch error, got: {msg}"
|
|
1382
|
+
);
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
#[test]
|
|
1386
|
+
fn unknown_function_errors_with_name() {
|
|
1387
|
+
// Use the function in WHERE, not projection — the projection
|
|
1388
|
+
// parser still requires bare column references; function calls
|
|
1389
|
+
// there are a future enhancement (with `AS alias` support).
|
|
1390
|
+
let mut db = seed_vector_docs();
|
|
1391
|
+
let err = process_command(
|
|
1392
|
+
"SELECT * FROM docs WHERE vec_does_not_exist(e, [1.0, 0.0]) < 1.0;",
|
|
1393
|
+
&mut db,
|
|
1394
|
+
)
|
|
1395
|
+
.unwrap_err();
|
|
1396
|
+
let msg = format!("{err}");
|
|
1397
|
+
assert!(
|
|
1398
|
+
msg.contains("vec_does_not_exist"),
|
|
1399
|
+
"expected error mentioning function name, got: {msg}"
|
|
1400
|
+
);
|
|
1401
|
+
}
|
|
1272
1402
|
}
|
|
@@ -14,10 +14,16 @@ pub enum Projection {
|
|
|
14
14
|
Columns(Vec<String>),
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
///
|
|
17
|
+
/// A parsed `ORDER BY` clause: a single sort key (expression), ascending
|
|
18
|
+
/// by default. Phase 7b widened this from "bare column name" to
|
|
19
|
+
/// "arbitrary expression" so KNN queries of the form
|
|
20
|
+
/// `ORDER BY vec_distance_l2(col, [...]) LIMIT k` work end-to-end. The
|
|
21
|
+
/// expression is evaluated per-row at execution time via `eval_expr`;
|
|
22
|
+
/// the simple `ORDER BY col` form still works because that's just an
|
|
23
|
+
/// `Expr::Identifier` taking the same path.
|
|
18
24
|
#[derive(Debug, Clone)]
|
|
19
25
|
pub struct OrderByClause {
|
|
20
|
-
pub
|
|
26
|
+
pub expr: Expr,
|
|
21
27
|
pub ascending: bool,
|
|
22
28
|
}
|
|
23
29
|
|
|
@@ -174,20 +180,15 @@ fn parse_order_by(order_by: Option<&sqlparser::ast::OrderBy>) -> Result<Option<O
|
|
|
174
180
|
));
|
|
175
181
|
}
|
|
176
182
|
let obe = &exprs[0];
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
return Err(SQLRiteError::NotImplemented(
|
|
184
|
-
"ORDER BY only supports a bare column name for now".to_string(),
|
|
185
|
-
));
|
|
186
|
-
}
|
|
187
|
-
};
|
|
183
|
+
// Phase 7b: accept arbitrary expressions, not just bare column refs.
|
|
184
|
+
// The executor's `sort_rowids` evaluates this expression per row via
|
|
185
|
+
// `eval_expr`, which handles Identifier (column lookup), Function
|
|
186
|
+
// (vec_distance_*), arithmetic, etc. uniformly. The previous
|
|
187
|
+
// column-name-only restriction has been lifted.
|
|
188
|
+
let expr = obe.expr.clone();
|
|
188
189
|
// `asc == None` is the dialect default (ASC).
|
|
189
190
|
let ascending = obe.options.asc.unwrap_or(true);
|
|
190
|
-
Ok(Some(OrderByClause {
|
|
191
|
+
Ok(Some(OrderByClause { expr, ascending }))
|
|
191
192
|
}
|
|
192
193
|
|
|
193
194
|
fn parse_limit(limit: Option<&LimitClause>) -> Result<Option<usize>> {
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sqlrite-0.1.10 → sqlrite-0.1.11}/images/SQLRite Simple SQL Execution High Level Diagram.png
RENAMED
|
File without changes
|
|
File without changes
|
{sqlrite-0.1.10 → sqlrite-0.1.11}/images/SQLRite Simple SQL INSERT Execution High Level Diagram.png
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|