ydb-sqlglot-plugin 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ydb_sqlglot_plugin-0.1.1/PKG-INFO +230 -0
- ydb_sqlglot_plugin-0.1.1/README.md +204 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.1.1}/pyproject.toml +2 -1
- ydb_sqlglot_plugin-0.1.1/ydb_sqlglot/version.py +1 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.1.1}/ydb_sqlglot/ydb.py +655 -56
- ydb_sqlglot_plugin-0.1.1/ydb_sqlglot_plugin.egg-info/PKG-INFO +230 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.1.1}/ydb_sqlglot_plugin.egg-info/SOURCES.txt +0 -1
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.1.1}/ydb_sqlglot_plugin.egg-info/requires.txt +1 -0
- ydb_sqlglot_plugin-0.1.0/PKG-INFO +0 -92
- ydb_sqlglot_plugin-0.1.0/README.md +0 -67
- ydb_sqlglot_plugin-0.1.0/tests/test_ydb.py +0 -709
- ydb_sqlglot_plugin-0.1.0/ydb_sqlglot/version.py +0 -1
- ydb_sqlglot_plugin-0.1.0/ydb_sqlglot_plugin.egg-info/PKG-INFO +0 -92
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.1.1}/LICENSE +0 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.1.1}/setup.cfg +0 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.1.1}/ydb_sqlglot/__init__.py +0 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.1.1}/ydb_sqlglot_plugin.egg-info/dependency_links.txt +0 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.1.1}/ydb_sqlglot_plugin.egg-info/entry_points.txt +0 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.1.1}/ydb_sqlglot_plugin.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ydb-sqlglot-plugin
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: YDB dialect plugin for sqlglot
|
|
5
|
+
Author: YDB Team
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/ydb-platform/ydb-sqlglot-plugin
|
|
8
|
+
Project-URL: Repository, https://github.com/ydb-platform/ydb-sqlglot-plugin
|
|
9
|
+
Keywords: sql,sqlglot,ydb,dialect,parser,transpiler
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Database
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: sqlglot>=28.6.0
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
23
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
24
|
+
Requires-Dist: ydb<4,>=3.28.0; extra == "dev"
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# ydb-sqlglot-plugin
|
|
28
|
+
|
|
29
|
+
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) — transpiles SQL from any dialect into YDB/YQL.
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install ydb-sqlglot-plugin
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Usage
|
|
38
|
+
|
|
39
|
+
After installing the package, the `ydb` dialect is available in sqlglot automatically — no extra imports needed:
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import sqlglot
|
|
43
|
+
|
|
44
|
+
# Transpile from any dialect
|
|
45
|
+
result = sqlglot.transpile("SELECT * FROM users WHERE id = 1", read="mysql", write="ydb")[0]
|
|
46
|
+
# → SELECT * FROM `users` WHERE id = 1
|
|
47
|
+
|
|
48
|
+
# Or parse first, then generate
|
|
49
|
+
query = "SELECT * FROM orders WHERE user_id = 1"
|
|
50
|
+
parsed = sqlglot.parse_one(query, dialect="postgres")
|
|
51
|
+
yql = parsed.sql(dialect="ydb")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## What the plugin does
|
|
55
|
+
|
|
56
|
+
### Table names
|
|
57
|
+
|
|
58
|
+
Database-qualified names are rewritten to the YDB path format and wrapped in backticks:
|
|
59
|
+
|
|
60
|
+
```sql
|
|
61
|
+
-- input
|
|
62
|
+
SELECT * FROM analytics.events
|
|
63
|
+
|
|
64
|
+
-- output
|
|
65
|
+
SELECT * FROM `analytics/events`
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### CTEs → YDB variables
|
|
69
|
+
|
|
70
|
+
```sql
|
|
71
|
+
-- input
|
|
72
|
+
WITH active AS (SELECT * FROM users WHERE status = 'active')
|
|
73
|
+
SELECT * FROM active
|
|
74
|
+
|
|
75
|
+
-- output
|
|
76
|
+
$active = (SELECT * FROM `users` WHERE status = 'active');
|
|
77
|
+
|
|
78
|
+
SELECT * FROM $active AS active
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Subquery decorrelation
|
|
82
|
+
|
|
83
|
+
Correlated subqueries (which YQL does not support) are rewritten as JOINs:
|
|
84
|
+
|
|
85
|
+
```sql
|
|
86
|
+
-- input
|
|
87
|
+
SELECT id, (SELECT MAX(amount) FROM orders WHERE orders.user_id = users.id) AS max_order
|
|
88
|
+
FROM users
|
|
89
|
+
|
|
90
|
+
-- output
|
|
91
|
+
SELECT users.id AS id, _u_0._u_2 AS max_order
|
|
92
|
+
FROM `users`
|
|
93
|
+
LEFT JOIN (
|
|
94
|
+
SELECT MAX(amount) AS _u_2, user_id AS _u_1
|
|
95
|
+
FROM `orders`
|
|
96
|
+
WHERE TRUE
|
|
97
|
+
GROUP BY user_id AS _u_1
|
|
98
|
+
) AS _u_0 ON users.id = _u_0._u_1
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subqueries.
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Function reference
|
|
106
|
+
|
|
107
|
+
Functions below are recognized by sqlglot as standard SQL expressions and translated to their YQL equivalents. Dialect-specific functions that sqlglot does not parse into typed AST nodes are **passed through unchanged** — see [Limitations](#limitations).
|
|
108
|
+
|
|
109
|
+
### Date / time
|
|
110
|
+
|
|
111
|
+
| Input | YQL output |
|
|
112
|
+
|---|---|
|
|
113
|
+
| `DATE_TRUNC('day', x)` | `DATE(x)` |
|
|
114
|
+
| `DATE_TRUNC('week', x)` | `DateTime::MakeDate(DateTime::StartOfWeek(x))` |
|
|
115
|
+
| `DATE_TRUNC('month', x)` | `DateTime::MakeDate(DateTime::StartOfMonth(x))` |
|
|
116
|
+
| `DATE_TRUNC('quarter', x)` | `DateTime::MakeDate(DateTime::StartOfQuarter(x))` |
|
|
117
|
+
| `DATE_TRUNC('year', x)` | `DateTime::MakeDate(DateTime::StartOfYear(x))` |
|
|
118
|
+
| `EXTRACT(WEEK FROM x)` | `DateTime::GetWeekOfYear(x)` |
|
|
119
|
+
| `EXTRACT(MONTH FROM x)` | `DateTime::GetMonth(x)` |
|
|
120
|
+
| `EXTRACT(YEAR FROM x)` | `DateTime::GetYear(x)` |
|
|
121
|
+
| `CURRENT_TIMESTAMP` | `CurrentUtcTimestamp()` |
|
|
122
|
+
| `STR_TO_DATE(str, fmt)` / `TO_DATE(str, fmt)` | `DateTime::MakeTimestamp(DateTime::Parse(fmt)(str))` |
|
|
123
|
+
| `DATE_ADD(x, INTERVAL n MONTH)` | `DateTime::MakeDate(DateTime::ShiftMonths(x, n))` |
|
|
124
|
+
| `DATE_ADD(x, INTERVAL n YEAR)` | `DateTime::MakeDate(DateTime::ShiftYears(x, n))` |
|
|
125
|
+
| `DATE_ADD(x, INTERVAL n DAY)` | `x + DateTime::IntervalFromDays(n)` |
|
|
126
|
+
| `DATE_ADD(x, INTERVAL n HOUR)` | `x + DateTime::IntervalFromHours(n)` |
|
|
127
|
+
| `DATE_ADD(x, INTERVAL n MINUTE)` | `x + DateTime::IntervalFromMinutes(n)` |
|
|
128
|
+
| `DATE_ADD(x, INTERVAL n SECOND)` | `x + DateTime::IntervalFromSeconds(n)` |
|
|
129
|
+
| `DATE_SUB(x, INTERVAL n ...)` | same as `DATE_ADD` with `−` |
|
|
130
|
+
| `INTERVAL n DAY` (literal) | `DateTime::IntervalFromDays(n)` |
|
|
131
|
+
| `INTERVAL n HOUR` (literal) | `DateTime::IntervalFromHours(n)` |
|
|
132
|
+
| `INTERVAL n MINUTE` (literal) | `DateTime::IntervalFromMinutes(n)` |
|
|
133
|
+
| `INTERVAL n SECOND` (literal) | `DateTime::IntervalFromSeconds(n)` |
|
|
134
|
+
| `dateDiff('minute', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 60000000` |
|
|
135
|
+
| `dateDiff('hour', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 3600000000` |
|
|
136
|
+
| `dateDiff('day', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 86400000000` |
|
|
137
|
+
| `dateDiff('week', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 604800000000` |
|
|
138
|
+
|
|
139
|
+
> **Note on `dateDiff`:** YDB stores `Timestamp` as microseconds since epoch. The formula above gives exact integer units assuming both arguments are `Timestamp`. Results for `Date`-typed columns will differ.
|
|
140
|
+
|
|
141
|
+
### Strings
|
|
142
|
+
|
|
143
|
+
| Input | YQL output |
|
|
144
|
+
|---|---|
|
|
145
|
+
| `CONCAT(a, b, ...)` | `a \|\| b \|\| ...` |
|
|
146
|
+
| `UPPER(x)` | `Unicode::ToUpper(x)` |
|
|
147
|
+
| `LOWER(x)` | `Unicode::ToLower(x)` |
|
|
148
|
+
| `LENGTH(x)` / `CHAR_LENGTH(x)` | `Unicode::GetLength(x)` |
|
|
149
|
+
| `POSITION(sub IN x)` / `STRPOS(x, sub)` | `Find(x, sub)` |
|
|
150
|
+
| `STRING_TO_ARRAY(x, delim)` | `String::SplitToList(x, delim)` |
|
|
151
|
+
| `ARRAY_TO_STRING(arr, delim)` | `String::JoinFromList(arr, delim)` |
|
|
152
|
+
|
|
153
|
+
### Arrays / collections
|
|
154
|
+
|
|
155
|
+
| Input | YQL output |
|
|
156
|
+
|---|---|
|
|
157
|
+
| `ARRAY(v1, v2, ...)` | `AsList(v1, v2, ...)` |
|
|
158
|
+
| `ARRAY_LENGTH(x)` / `ARRAY_SIZE(x)` | `ListLength(x)` |
|
|
159
|
+
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) -> {RETURN cond})` |
|
|
160
|
+
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) -> {RETURN cond}))` |
|
|
161
|
+
| `ARRAY_AGG(x)` | `AGGREGATE_LIST(x)` |
|
|
162
|
+
| `UNNEST(x)` | `FLATTEN BY x` |
|
|
163
|
+
|
|
164
|
+
### Conditional / math
|
|
165
|
+
|
|
166
|
+
| Input | YQL output |
|
|
167
|
+
|---|---|
|
|
168
|
+
| `NULLIF(x, y)` | `IF(x = y, NULL, x)` |
|
|
169
|
+
| `ROUND(x, n)` | `Math::Round(x, -n)` |
|
|
170
|
+
| `COUNT()` *(zero-argument form)* | `COUNT(*)` |
|
|
171
|
+
|
|
172
|
+
### JSON
|
|
173
|
+
|
|
174
|
+
| Input | YQL output |
|
|
175
|
+
|---|---|
|
|
176
|
+
| `jsonb_col @> value` (PostgreSQL) | `Yson::Contains(jsonb_col, value)` |
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Type mapping
|
|
181
|
+
|
|
182
|
+
| SQL type | YQL type |
|
|
183
|
+
|---|---|
|
|
184
|
+
| `TINYINT` | `INT8` |
|
|
185
|
+
| `SMALLINT` | `INT16` |
|
|
186
|
+
| `INT` / `INTEGER` | `INT32` |
|
|
187
|
+
| `BIGINT` | `INT64` |
|
|
188
|
+
| `FLOAT` | `Float` |
|
|
189
|
+
| `DOUBLE` / `DOUBLE PRECISION` | `Double` |
|
|
190
|
+
| `DECIMAL(p, s)` | `Decimal(p, s)` |
|
|
191
|
+
| `BOOLEAN` / `BIT` | `Uint8` |
|
|
192
|
+
| `TIMESTAMP` | `Timestamp` |
|
|
193
|
+
| `VARCHAR` / `NVARCHAR` / `CHAR` | `Utf8` |
|
|
194
|
+
| `TEXT` / `TINYTEXT` / `MEDIUMTEXT` / `LONGTEXT` | `Utf8` |
|
|
195
|
+
| `BLOB` / `TINYBLOB` / `MEDIUMBLOB` / `LONGBLOB` / `VARBINARY` | `String` |
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Limitations
|
|
200
|
+
|
|
201
|
+
### Correlated subqueries in DML
|
|
202
|
+
|
|
203
|
+
Correlated subqueries inside `UPDATE` or `INSERT` statements cannot be automatically decorrelated — YDB does not support them natively, and rewriting requires knowledge of the table's primary key. Rewrite manually using a `$variable`:
|
|
204
|
+
|
|
205
|
+
```sql
|
|
206
|
+
-- not supported (will raise an error)
|
|
207
|
+
UPDATE t SET col = (SELECT val FROM other WHERE other.id = t.id)
|
|
208
|
+
|
|
209
|
+
-- workaround
|
|
210
|
+
$vals = (SELECT id, val FROM other);
|
|
211
|
+
UPDATE t SET col = (SELECT val FROM $vals WHERE id = t.id)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
Correlated subqueries inside `SELECT` are handled automatically via JOIN rewriting.
|
|
215
|
+
|
|
216
|
+
### `dateDiff` with month granularity
|
|
217
|
+
|
|
218
|
+
`dateDiff('month', a, b)` has no exact equivalent in YDB because months have variable length. Use `DateTime::ShiftMonths` for date arithmetic instead.
|
|
219
|
+
|
|
220
|
+
---
|
|
221
|
+
|
|
222
|
+
## Development
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
git clone https://github.com/ydb-platform/ydb-sqlglot-plugin.git
|
|
226
|
+
cd ydb-sqlglot-plugin
|
|
227
|
+
python -m venv .venv && source .venv/bin/activate
|
|
228
|
+
pip install -e ".[dev]"
|
|
229
|
+
python -m pytest tests/
|
|
230
|
+
```
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# ydb-sqlglot-plugin
|
|
2
|
+
|
|
3
|
+
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) — transpiles SQL from any dialect into YDB/YQL.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install ydb-sqlglot-plugin
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
After installing the package, the `ydb` dialect is available in sqlglot automatically — no extra imports needed:
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import sqlglot
|
|
17
|
+
|
|
18
|
+
# Transpile from any dialect
|
|
19
|
+
result = sqlglot.transpile("SELECT * FROM users WHERE id = 1", read="mysql", write="ydb")[0]
|
|
20
|
+
# → SELECT * FROM `users` WHERE id = 1
|
|
21
|
+
|
|
22
|
+
# Or parse first, then generate
|
|
23
|
+
query = "SELECT * FROM orders WHERE user_id = 1"
|
|
24
|
+
parsed = sqlglot.parse_one(query, dialect="postgres")
|
|
25
|
+
yql = parsed.sql(dialect="ydb")
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## What the plugin does
|
|
29
|
+
|
|
30
|
+
### Table names
|
|
31
|
+
|
|
32
|
+
Database-qualified names are rewritten to the YDB path format and wrapped in backticks:
|
|
33
|
+
|
|
34
|
+
```sql
|
|
35
|
+
-- input
|
|
36
|
+
SELECT * FROM analytics.events
|
|
37
|
+
|
|
38
|
+
-- output
|
|
39
|
+
SELECT * FROM `analytics/events`
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### CTEs → YDB variables
|
|
43
|
+
|
|
44
|
+
```sql
|
|
45
|
+
-- input
|
|
46
|
+
WITH active AS (SELECT * FROM users WHERE status = 'active')
|
|
47
|
+
SELECT * FROM active
|
|
48
|
+
|
|
49
|
+
-- output
|
|
50
|
+
$active = (SELECT * FROM `users` WHERE status = 'active');
|
|
51
|
+
|
|
52
|
+
SELECT * FROM $active AS active
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Subquery decorrelation
|
|
56
|
+
|
|
57
|
+
Correlated subqueries (which YQL does not support) are rewritten as JOINs:
|
|
58
|
+
|
|
59
|
+
```sql
|
|
60
|
+
-- input
|
|
61
|
+
SELECT id, (SELECT MAX(amount) FROM orders WHERE orders.user_id = users.id) AS max_order
|
|
62
|
+
FROM users
|
|
63
|
+
|
|
64
|
+
-- output
|
|
65
|
+
SELECT users.id AS id, _u_0._u_2 AS max_order
|
|
66
|
+
FROM `users`
|
|
67
|
+
LEFT JOIN (
|
|
68
|
+
SELECT MAX(amount) AS _u_2, user_id AS _u_1
|
|
69
|
+
FROM `orders`
|
|
70
|
+
WHERE TRUE
|
|
71
|
+
GROUP BY user_id AS _u_1
|
|
72
|
+
) AS _u_0 ON users.id = _u_0._u_1
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subqueries.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Function reference
|
|
80
|
+
|
|
81
|
+
Functions below are recognized by sqlglot as standard SQL expressions and translated to their YQL equivalents. Dialect-specific functions that sqlglot does not parse into typed AST nodes are **passed through unchanged** — see [Limitations](#limitations).
|
|
82
|
+
|
|
83
|
+
### Date / time
|
|
84
|
+
|
|
85
|
+
| Input | YQL output |
|
|
86
|
+
|---|---|
|
|
87
|
+
| `DATE_TRUNC('day', x)` | `DATE(x)` |
|
|
88
|
+
| `DATE_TRUNC('week', x)` | `DateTime::MakeDate(DateTime::StartOfWeek(x))` |
|
|
89
|
+
| `DATE_TRUNC('month', x)` | `DateTime::MakeDate(DateTime::StartOfMonth(x))` |
|
|
90
|
+
| `DATE_TRUNC('quarter', x)` | `DateTime::MakeDate(DateTime::StartOfQuarter(x))` |
|
|
91
|
+
| `DATE_TRUNC('year', x)` | `DateTime::MakeDate(DateTime::StartOfYear(x))` |
|
|
92
|
+
| `EXTRACT(WEEK FROM x)` | `DateTime::GetWeekOfYear(x)` |
|
|
93
|
+
| `EXTRACT(MONTH FROM x)` | `DateTime::GetMonth(x)` |
|
|
94
|
+
| `EXTRACT(YEAR FROM x)` | `DateTime::GetYear(x)` |
|
|
95
|
+
| `CURRENT_TIMESTAMP` | `CurrentUtcTimestamp()` |
|
|
96
|
+
| `STR_TO_DATE(str, fmt)` / `TO_DATE(str, fmt)` | `DateTime::MakeTimestamp(DateTime::Parse(fmt)(str))` |
|
|
97
|
+
| `DATE_ADD(x, INTERVAL n MONTH)` | `DateTime::MakeDate(DateTime::ShiftMonths(x, n))` |
|
|
98
|
+
| `DATE_ADD(x, INTERVAL n YEAR)` | `DateTime::MakeDate(DateTime::ShiftYears(x, n))` |
|
|
99
|
+
| `DATE_ADD(x, INTERVAL n DAY)` | `x + DateTime::IntervalFromDays(n)` |
|
|
100
|
+
| `DATE_ADD(x, INTERVAL n HOUR)` | `x + DateTime::IntervalFromHours(n)` |
|
|
101
|
+
| `DATE_ADD(x, INTERVAL n MINUTE)` | `x + DateTime::IntervalFromMinutes(n)` |
|
|
102
|
+
| `DATE_ADD(x, INTERVAL n SECOND)` | `x + DateTime::IntervalFromSeconds(n)` |
|
|
103
|
+
| `DATE_SUB(x, INTERVAL n ...)` | same as `DATE_ADD` with `−` |
|
|
104
|
+
| `INTERVAL n DAY` (literal) | `DateTime::IntervalFromDays(n)` |
|
|
105
|
+
| `INTERVAL n HOUR` (literal) | `DateTime::IntervalFromHours(n)` |
|
|
106
|
+
| `INTERVAL n MINUTE` (literal) | `DateTime::IntervalFromMinutes(n)` |
|
|
107
|
+
| `INTERVAL n SECOND` (literal) | `DateTime::IntervalFromSeconds(n)` |
|
|
108
|
+
| `dateDiff('minute', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 60000000` |
|
|
109
|
+
| `dateDiff('hour', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 3600000000` |
|
|
110
|
+
| `dateDiff('day', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 86400000000` |
|
|
111
|
+
| `dateDiff('week', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 604800000000` |
|
|
112
|
+
|
|
113
|
+
> **Note on `dateDiff`:** YDB stores `Timestamp` as microseconds since epoch. The formula above gives exact integer units assuming both arguments are `Timestamp`. Results for `Date`-typed columns will differ.
|
|
114
|
+
|
|
115
|
+
### Strings
|
|
116
|
+
|
|
117
|
+
| Input | YQL output |
|
|
118
|
+
|---|---|
|
|
119
|
+
| `CONCAT(a, b, ...)` | `a \|\| b \|\| ...` |
|
|
120
|
+
| `UPPER(x)` | `Unicode::ToUpper(x)` |
|
|
121
|
+
| `LOWER(x)` | `Unicode::ToLower(x)` |
|
|
122
|
+
| `LENGTH(x)` / `CHAR_LENGTH(x)` | `Unicode::GetLength(x)` |
|
|
123
|
+
| `POSITION(sub IN x)` / `STRPOS(x, sub)` | `Find(x, sub)` |
|
|
124
|
+
| `STRING_TO_ARRAY(x, delim)` | `String::SplitToList(x, delim)` |
|
|
125
|
+
| `ARRAY_TO_STRING(arr, delim)` | `String::JoinFromList(arr, delim)` |
|
|
126
|
+
|
|
127
|
+
### Arrays / collections
|
|
128
|
+
|
|
129
|
+
| Input | YQL output |
|
|
130
|
+
|---|---|
|
|
131
|
+
| `ARRAY(v1, v2, ...)` | `AsList(v1, v2, ...)` |
|
|
132
|
+
| `ARRAY_LENGTH(x)` / `ARRAY_SIZE(x)` | `ListLength(x)` |
|
|
133
|
+
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) -> {RETURN cond})` |
|
|
134
|
+
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) -> {RETURN cond}))` |
|
|
135
|
+
| `ARRAY_AGG(x)` | `AGGREGATE_LIST(x)` |
|
|
136
|
+
| `UNNEST(x)` | `FLATTEN BY x` |
|
|
137
|
+
|
|
138
|
+
### Conditional / math
|
|
139
|
+
|
|
140
|
+
| Input | YQL output |
|
|
141
|
+
|---|---|
|
|
142
|
+
| `NULLIF(x, y)` | `IF(x = y, NULL, x)` |
|
|
143
|
+
| `ROUND(x, n)` | `Math::Round(x, -n)` |
|
|
144
|
+
| `COUNT()` *(zero-argument form)* | `COUNT(*)` |
|
|
145
|
+
|
|
146
|
+
### JSON
|
|
147
|
+
|
|
148
|
+
| Input | YQL output |
|
|
149
|
+
|---|---|
|
|
150
|
+
| `jsonb_col @> value` (PostgreSQL) | `Yson::Contains(jsonb_col, value)` |
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## Type mapping
|
|
155
|
+
|
|
156
|
+
| SQL type | YQL type |
|
|
157
|
+
|---|---|
|
|
158
|
+
| `TINYINT` | `INT8` |
|
|
159
|
+
| `SMALLINT` | `INT16` |
|
|
160
|
+
| `INT` / `INTEGER` | `INT32` |
|
|
161
|
+
| `BIGINT` | `INT64` |
|
|
162
|
+
| `FLOAT` | `Float` |
|
|
163
|
+
| `DOUBLE` / `DOUBLE PRECISION` | `Double` |
|
|
164
|
+
| `DECIMAL(p, s)` | `Decimal(p, s)` |
|
|
165
|
+
| `BOOLEAN` / `BIT` | `Uint8` |
|
|
166
|
+
| `TIMESTAMP` | `Timestamp` |
|
|
167
|
+
| `VARCHAR` / `NVARCHAR` / `CHAR` | `Utf8` |
|
|
168
|
+
| `TEXT` / `TINYTEXT` / `MEDIUMTEXT` / `LONGTEXT` | `Utf8` |
|
|
169
|
+
| `BLOB` / `TINYBLOB` / `MEDIUMBLOB` / `LONGBLOB` / `VARBINARY` | `String` |
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## Limitations
|
|
174
|
+
|
|
175
|
+
### Correlated subqueries in DML
|
|
176
|
+
|
|
177
|
+
Correlated subqueries inside `UPDATE` or `INSERT` statements cannot be automatically decorrelated — YDB does not support them natively, and rewriting requires knowledge of the table's primary key. Rewrite manually using a `$variable`:
|
|
178
|
+
|
|
179
|
+
```sql
|
|
180
|
+
-- not supported (will raise an error)
|
|
181
|
+
UPDATE t SET col = (SELECT val FROM other WHERE other.id = t.id)
|
|
182
|
+
|
|
183
|
+
-- workaround
|
|
184
|
+
$vals = (SELECT id, val FROM other);
|
|
185
|
+
UPDATE t SET col = (SELECT val FROM $vals WHERE id = t.id)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Correlated subqueries inside `SELECT` are handled automatically via JOIN rewriting.
|
|
189
|
+
|
|
190
|
+
### `dateDiff` with month granularity
|
|
191
|
+
|
|
192
|
+
`dateDiff('month', a, b)` has no exact equivalent in YDB because months have variable length. Use `DateTime::ShiftMonths` for date arithmetic instead.
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
## Development
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
git clone https://github.com/ydb-platform/ydb-sqlglot-plugin.git
|
|
200
|
+
cd ydb-sqlglot-plugin
|
|
201
|
+
python -m venv .venv && source .venv/bin/activate
|
|
202
|
+
pip install -e ".[dev]"
|
|
203
|
+
python -m pytest tests/
|
|
204
|
+
```
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ydb-sqlglot-plugin"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.1" # AUTOVERSION
|
|
8
8
|
description = "YDB dialect plugin for sqlglot"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "Apache-2.0"}
|
|
@@ -30,6 +30,7 @@ dependencies = [
|
|
|
30
30
|
dev = [
|
|
31
31
|
"pytest>=7.0",
|
|
32
32
|
"pytest-cov>=4.0",
|
|
33
|
+
"ydb>=3.28.0,<4",
|
|
33
34
|
]
|
|
34
35
|
|
|
35
36
|
[project.urls]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = "0.1.1"
|