ydb-sqlglot-plugin 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ydb_sqlglot_plugin-0.2.0/PKG-INFO +300 -0
- ydb_sqlglot_plugin-0.2.0/README.md +273 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.2.0}/pyproject.toml +11 -1
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot/__init__.py +1 -2
- ydb_sqlglot_plugin-0.2.0/ydb_sqlglot/version.py +1 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot/ydb.py +999 -83
- ydb_sqlglot_plugin-0.2.0/ydb_sqlglot_plugin.egg-info/PKG-INFO +300 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/SOURCES.txt +0 -1
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/requires.txt +2 -0
- ydb_sqlglot_plugin-0.1.0/PKG-INFO +0 -92
- ydb_sqlglot_plugin-0.1.0/README.md +0 -67
- ydb_sqlglot_plugin-0.1.0/tests/test_ydb.py +0 -709
- ydb_sqlglot_plugin-0.1.0/ydb_sqlglot/version.py +0 -1
- ydb_sqlglot_plugin-0.1.0/ydb_sqlglot_plugin.egg-info/PKG-INFO +0 -92
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.2.0}/LICENSE +0 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.2.0}/setup.cfg +0 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/dependency_links.txt +0 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/entry_points.txt +0 -0
- {ydb_sqlglot_plugin-0.1.0 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ydb-sqlglot-plugin
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: YDB dialect plugin for sqlglot
|
|
5
|
+
Author: YDB Team
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/ydb-platform/ydb-sqlglot-plugin
|
|
8
|
+
Project-URL: Repository, https://github.com/ydb-platform/ydb-sqlglot-plugin
|
|
9
|
+
Keywords: sql,sqlglot,ydb,dialect,parser,transpiler
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Database
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: sqlglot>=28.6.0
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
23
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
24
|
+
Requires-Dist: ydb<4,>=3.28.0; extra == "dev"
|
|
25
|
+
Requires-Dist: ruff>=0.9.0; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# ydb-sqlglot-plugin
|
|
29
|
+
|
|
30
|
+
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) — bidirectional transpilation between YDB/YQL and any SQL dialect.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install ydb-sqlglot-plugin
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Usage
|
|
39
|
+
|
|
40
|
+
After installing the package, the `ydb` dialect is available in sqlglot automatically — no extra imports needed:
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
import sqlglot
|
|
44
|
+
|
|
45
|
+
# Any dialect → YDB
|
|
46
|
+
result = sqlglot.transpile("SELECT * FROM users WHERE id = 1", read="mysql", write="ydb")[0]
|
|
47
|
+
# → SELECT * FROM `users` WHERE id = 1
|
|
48
|
+
|
|
49
|
+
# YDB → any dialect
|
|
50
|
+
result = sqlglot.transpile("$t = (SELECT id FROM users); SELECT * FROM $t AS t", read="ydb", write="postgres")[0]
|
|
51
|
+
# → WITH t AS (SELECT id FROM users) SELECT * FROM t AS t
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## What the plugin does
|
|
55
|
+
|
|
56
|
+
### Any SQL → YDB
|
|
57
|
+
|
|
58
|
+
#### Table names
|
|
59
|
+
|
|
60
|
+
Database-qualified names are rewritten to the YDB path format and wrapped in backticks:
|
|
61
|
+
|
|
62
|
+
```sql
|
|
63
|
+
-- input
|
|
64
|
+
SELECT * FROM analytics.events
|
|
65
|
+
|
|
66
|
+
-- output
|
|
67
|
+
SELECT * FROM `analytics/events`
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
#### CTEs → YDB variables
|
|
71
|
+
|
|
72
|
+
```sql
|
|
73
|
+
-- input
|
|
74
|
+
WITH active AS (SELECT * FROM users WHERE status = 'active')
|
|
75
|
+
SELECT * FROM active
|
|
76
|
+
|
|
77
|
+
-- output
|
|
78
|
+
$active = (SELECT * FROM `users` WHERE status = 'active');
|
|
79
|
+
|
|
80
|
+
SELECT * FROM $active AS active
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
#### Subquery decorrelation
|
|
84
|
+
|
|
85
|
+
Correlated subqueries (which YQL does not support) are rewritten as JOINs:
|
|
86
|
+
|
|
87
|
+
```sql
|
|
88
|
+
-- input
|
|
89
|
+
SELECT id, (SELECT MAX(amount) FROM orders WHERE orders.user_id = users.id) AS max_order
|
|
90
|
+
FROM users
|
|
91
|
+
|
|
92
|
+
-- output
|
|
93
|
+
SELECT users.id AS id, _u_0._u_2 AS max_order
|
|
94
|
+
FROM `users`
|
|
95
|
+
LEFT JOIN (
|
|
96
|
+
SELECT MAX(amount) AS _u_2, user_id AS _u_1
|
|
97
|
+
FROM `orders`
|
|
98
|
+
WHERE TRUE
|
|
99
|
+
GROUP BY user_id AS _u_1
|
|
100
|
+
) AS _u_0 ON users.id = _u_0._u_1
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subqueries.
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
### YDB → any SQL
|
|
108
|
+
|
|
109
|
+
The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-YDB transformations, and transpilation to other dialects.
|
|
110
|
+
|
|
111
|
+
#### Supported YQL constructs
|
|
112
|
+
|
|
113
|
+
| Construct | Example |
|
|
114
|
+
|---|---|
|
|
115
|
+
| `$variable` references | `SELECT * FROM $t AS t` |
|
|
116
|
+
| `Module::Function()` | `DateTime::GetYear(ts)` |
|
|
117
|
+
| `DECLARE $p AS Type` | `DECLARE $p AS Int32` |
|
|
118
|
+
| `FLATTEN [LIST\|DICT] BY col` | `FROM t FLATTEN LIST BY col` |
|
|
119
|
+
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
120
|
+
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
121
|
+
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
122
|
+
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
123
|
+
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
124
|
+
|
|
125
|
+
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
126
|
+
|
|
127
|
+
#### CTEs reassembly
|
|
128
|
+
|
|
129
|
+
YDB-style named expressions are automatically reassembled into standard `WITH` CTEs when targeting other dialects:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
ydb_sql = "$t = (SELECT 1 AS x); SELECT * FROM $t AS t"
|
|
133
|
+
parse_one(ydb_sql, dialect="ydb").sql(dialect="postgres")
|
|
134
|
+
# → WITH t AS (SELECT 1 AS x) SELECT * FROM t AS t
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
### Column lineage
|
|
140
|
+
|
|
141
|
+
Because YDB SQL is fully parsed into sqlglot's AST, column-level lineage works out of the box:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from sqlglot.lineage import lineage
|
|
145
|
+
|
|
146
|
+
node = lineage("total", "$orders = (SELECT user_id, amount FROM orders); SELECT SUM(amount) AS total FROM $orders AS o", dialect="ydb")
|
|
147
|
+
for dep in node.walk():
|
|
148
|
+
print(dep.name, "→", dep.source)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## Function reference
|
|
154
|
+
|
|
155
|
+
Functions below are recognized by sqlglot as standard SQL expressions and translated to their YQL equivalents. Dialect-specific functions that sqlglot does not parse into typed AST nodes are **passed through unchanged** — see [Limitations](#limitations).
|
|
156
|
+
|
|
157
|
+
### Date / time
|
|
158
|
+
|
|
159
|
+
| Input | YQL output |
|
|
160
|
+
|---|---|
|
|
161
|
+
| `DATE_TRUNC('day', x)` | `DATE(x)` |
|
|
162
|
+
| `DATE_TRUNC('week', x)` | `DateTime::MakeDate(DateTime::StartOfWeek(x))` |
|
|
163
|
+
| `DATE_TRUNC('month', x)` | `DateTime::MakeDate(DateTime::StartOfMonth(x))` |
|
|
164
|
+
| `DATE_TRUNC('quarter', x)` | `DateTime::MakeDate(DateTime::StartOfQuarter(x))` |
|
|
165
|
+
| `DATE_TRUNC('year', x)` | `DateTime::MakeDate(DateTime::StartOfYear(x))` |
|
|
166
|
+
| `EXTRACT(WEEK FROM x)` | `DateTime::GetWeekOfYear(x)` |
|
|
167
|
+
| `EXTRACT(MONTH FROM x)` | `DateTime::GetMonth(x)` |
|
|
168
|
+
| `EXTRACT(YEAR FROM x)` | `DateTime::GetYear(x)` |
|
|
169
|
+
| `CURRENT_TIMESTAMP` | `CurrentUtcTimestamp()` |
|
|
170
|
+
| `STR_TO_DATE(str, fmt)` / `TO_DATE(str, fmt)` | `DateTime::MakeTimestamp(DateTime::Parse(fmt)(str))` |
|
|
171
|
+
| `DATE_ADD(x, INTERVAL n MONTH)` | `DateTime::MakeDate(DateTime::ShiftMonths(x, n))` |
|
|
172
|
+
| `DATE_ADD(x, INTERVAL n YEAR)` | `DateTime::MakeDate(DateTime::ShiftYears(x, n))` |
|
|
173
|
+
| `DATE_ADD(x, INTERVAL n DAY)` | `x + DateTime::IntervalFromDays(n)` |
|
|
174
|
+
| `DATE_ADD(x, INTERVAL n HOUR)` | `x + DateTime::IntervalFromHours(n)` |
|
|
175
|
+
| `DATE_ADD(x, INTERVAL n MINUTE)` | `x + DateTime::IntervalFromMinutes(n)` |
|
|
176
|
+
| `DATE_ADD(x, INTERVAL n SECOND)` | `x + DateTime::IntervalFromSeconds(n)` |
|
|
177
|
+
| `DATE_SUB(x, INTERVAL n ...)` | same as `DATE_ADD` with `−` |
|
|
178
|
+
| `INTERVAL n DAY` (literal) | `DateTime::IntervalFromDays(n)` |
|
|
179
|
+
| `INTERVAL n HOUR` (literal) | `DateTime::IntervalFromHours(n)` |
|
|
180
|
+
| `INTERVAL n MINUTE` (literal) | `DateTime::IntervalFromMinutes(n)` |
|
|
181
|
+
| `INTERVAL n SECOND` (literal) | `DateTime::IntervalFromSeconds(n)` |
|
|
182
|
+
| `dateDiff('minute', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 60000000` |
|
|
183
|
+
| `dateDiff('hour', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 3600000000` |
|
|
184
|
+
| `dateDiff('day', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 86400000000` |
|
|
185
|
+
| `dateDiff('week', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 604800000000` |
|
|
186
|
+
|
|
187
|
+
> **Note on `dateDiff`:** YDB stores `Timestamp` as microseconds since epoch. The formula above gives exact integer units assuming both arguments are `Timestamp`. Results for `Date`-typed columns will differ.
|
|
188
|
+
|
|
189
|
+
### Strings
|
|
190
|
+
|
|
191
|
+
| Input | YQL output |
|
|
192
|
+
|---|---|
|
|
193
|
+
| `CONCAT(a, b, ...)` | `a \|\| b \|\| ...` |
|
|
194
|
+
| `UPPER(x)` | `Unicode::ToUpper(x)` |
|
|
195
|
+
| `LOWER(x)` | `Unicode::ToLower(x)` |
|
|
196
|
+
| `LENGTH(x)` / `CHAR_LENGTH(x)` | `Unicode::GetLength(x)` |
|
|
197
|
+
| `POSITION(sub IN x)` / `STRPOS(x, sub)` | `Find(x, sub)` |
|
|
198
|
+
| `STRING_TO_ARRAY(x, delim)` | `String::SplitToList(x, delim)` |
|
|
199
|
+
| `ARRAY_TO_STRING(arr, delim)` | `String::JoinFromList(arr, delim)` |
|
|
200
|
+
|
|
201
|
+
### Arrays / collections
|
|
202
|
+
|
|
203
|
+
| Input | YQL output |
|
|
204
|
+
|---|---|
|
|
205
|
+
| `ARRAY(v1, v2, ...)` | `AsList(v1, v2, ...)` |
|
|
206
|
+
| `ARRAY_LENGTH(x)` / `ARRAY_SIZE(x)` | `ListLength(x)` |
|
|
207
|
+
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) -> {RETURN cond})` |
|
|
208
|
+
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) -> {RETURN cond}))` |
|
|
209
|
+
| `ARRAY_AGG(x)` | `AGGREGATE_LIST(x)` |
|
|
210
|
+
| `UNNEST(x)` | `FLATTEN BY x` |
|
|
211
|
+
|
|
212
|
+
### Conditional / math
|
|
213
|
+
|
|
214
|
+
| Input | YQL output |
|
|
215
|
+
|---|---|
|
|
216
|
+
| `NULLIF(x, y)` | `IF(x = y, NULL, x)` |
|
|
217
|
+
| `ROUND(x, n)` | `Math::Round(x, -n)` |
|
|
218
|
+
| `COUNT()` *(zero-argument form)* | `COUNT(*)` |
|
|
219
|
+
|
|
220
|
+
### JSON
|
|
221
|
+
|
|
222
|
+
| Input | YQL output |
|
|
223
|
+
|---|---|
|
|
224
|
+
| `jsonb_col @> value` (PostgreSQL) | `Yson::Contains(jsonb_col, value)` |
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## Type mapping
|
|
229
|
+
|
|
230
|
+
### Standard SQL → YDB
|
|
231
|
+
|
|
232
|
+
| SQL type | YDB type |
|
|
233
|
+
|---|---|
|
|
234
|
+
| `TINYINT` | `Int8` |
|
|
235
|
+
| `SMALLINT` | `Int16` |
|
|
236
|
+
| `INT` / `INTEGER` | `Int32` |
|
|
237
|
+
| `BIGINT` | `Int64` |
|
|
238
|
+
| `FLOAT` | `Float` |
|
|
239
|
+
| `DOUBLE` / `DOUBLE PRECISION` | `Double` |
|
|
240
|
+
| `DECIMAL(p, s)` | `Decimal(p, s)` |
|
|
241
|
+
| `BOOLEAN` / `BIT` | `Uint8` |
|
|
242
|
+
| `TIMESTAMP` | `Timestamp` |
|
|
243
|
+
| `VARCHAR` / `NVARCHAR` / `CHAR` / `TEXT` | `Utf8` |
|
|
244
|
+
| `BLOB` / `BINARY` / `VARBINARY` | `String` |
|
|
245
|
+
|
|
246
|
+
### YDB types → standard SQL
|
|
247
|
+
|
|
248
|
+
| YDB type | Standard SQL | Postgres | ClickHouse |
|
|
249
|
+
|---|---|---|---|
|
|
250
|
+
| `Utf8` | `TEXT` | `TEXT` | `String` |
|
|
251
|
+
| `String` | `BLOB` | `BYTEA` | `String` |
|
|
252
|
+
| `Int32` | `INT` | `INT` | `Int32` |
|
|
253
|
+
| `Int64` | `BIGINT` | `BIGINT` | `Int64` |
|
|
254
|
+
| `Optional<T>` | `T` (nullable) | `T` | `Nullable(T)` |
|
|
255
|
+
| `List<T>` | `LIST<T>` | `LIST<T>` | `Array(T)` |
|
|
256
|
+
| `Dict<K,V>` | `MAP<K,V>` | `MAP<K,V>` | `Map(K,V)` |
|
|
257
|
+
| `Tuple<T1,T2>` | `STRUCT<...>` | `STRUCT<...>` | `Tuple(T1,T2)` |
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## Limitations
|
|
262
|
+
|
|
263
|
+
### Dialect-specific functions
|
|
264
|
+
|
|
265
|
+
Functions that sqlglot does not parse into typed AST nodes are passed through unchanged and must be replaced manually. Common examples from ClickHouse: `now()`, `today()`, `parseDateTimeBestEffort()`, `toDate()`, `toFloat64()`, `toString()`, `countDistinct()`, `groupArray()`.
|
|
266
|
+
|
|
267
|
+
### Correlated subqueries in DML
|
|
268
|
+
|
|
269
|
+
Correlated subqueries inside `UPDATE` or `INSERT` statements cannot be automatically decorrelated — YDB does not support them natively, and rewriting requires knowledge of the table's primary key. Rewrite manually using a `$variable`:
|
|
270
|
+
|
|
271
|
+
```sql
|
|
272
|
+
-- not supported (will raise an error)
|
|
273
|
+
UPDATE t SET col = (SELECT val FROM other WHERE other.id = t.id)
|
|
274
|
+
|
|
275
|
+
-- workaround
|
|
276
|
+
$vals = (SELECT id, val FROM other);
|
|
277
|
+
UPDATE t SET col = (SELECT val FROM $vals WHERE id = t.id)
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
Correlated subqueries inside `SELECT` are handled automatically via JOIN rewriting.
|
|
281
|
+
|
|
282
|
+
### `dateDiff` with month granularity
|
|
283
|
+
|
|
284
|
+
`dateDiff('month', a, b)` has no exact equivalent in YDB because months have variable length. Use `DateTime::ShiftMonths` for date arithmetic instead.
|
|
285
|
+
|
|
286
|
+
### YDB container types in other dialects
|
|
287
|
+
|
|
288
|
+
`Uint8`/`Uint16`/`Uint32`/`Uint64` and YDB-specific container types (`Struct<...>`, `Variant<...>`, `Enum<...>`) do not have direct equivalents in standard SQL and are passed through as-is when targeting other dialects.
|
|
289
|
+
|
|
290
|
+
---
|
|
291
|
+
|
|
292
|
+
## Development
|
|
293
|
+
|
|
294
|
+
```bash
|
|
295
|
+
git clone https://github.com/ydb-platform/ydb-sqlglot-plugin.git
|
|
296
|
+
cd ydb-sqlglot-plugin
|
|
297
|
+
python -m venv .venv && source .venv/bin/activate
|
|
298
|
+
pip install -e ".[dev]"
|
|
299
|
+
python -m pytest tests/
|
|
300
|
+
```
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
# ydb-sqlglot-plugin
|
|
2
|
+
|
|
3
|
+
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) — bidirectional transpilation between YDB/YQL and any SQL dialect.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install ydb-sqlglot-plugin
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
After installing the package, the `ydb` dialect is available in sqlglot automatically — no extra imports needed:
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import sqlglot
|
|
17
|
+
|
|
18
|
+
# Any dialect → YDB
|
|
19
|
+
result = sqlglot.transpile("SELECT * FROM users WHERE id = 1", read="mysql", write="ydb")[0]
|
|
20
|
+
# → SELECT * FROM `users` WHERE id = 1
|
|
21
|
+
|
|
22
|
+
# YDB → any dialect
|
|
23
|
+
result = sqlglot.transpile("$t = (SELECT id FROM users); SELECT * FROM $t AS t", read="ydb", write="postgres")[0]
|
|
24
|
+
# → WITH t AS (SELECT id FROM users) SELECT * FROM t AS t
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## What the plugin does
|
|
28
|
+
|
|
29
|
+
### Any SQL → YDB
|
|
30
|
+
|
|
31
|
+
#### Table names
|
|
32
|
+
|
|
33
|
+
Database-qualified names are rewritten to the YDB path format and wrapped in backticks:
|
|
34
|
+
|
|
35
|
+
```sql
|
|
36
|
+
-- input
|
|
37
|
+
SELECT * FROM analytics.events
|
|
38
|
+
|
|
39
|
+
-- output
|
|
40
|
+
SELECT * FROM `analytics/events`
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
#### CTEs → YDB variables
|
|
44
|
+
|
|
45
|
+
```sql
|
|
46
|
+
-- input
|
|
47
|
+
WITH active AS (SELECT * FROM users WHERE status = 'active')
|
|
48
|
+
SELECT * FROM active
|
|
49
|
+
|
|
50
|
+
-- output
|
|
51
|
+
$active = (SELECT * FROM `users` WHERE status = 'active');
|
|
52
|
+
|
|
53
|
+
SELECT * FROM $active AS active
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
#### Subquery decorrelation
|
|
57
|
+
|
|
58
|
+
Correlated subqueries (which YQL does not support) are rewritten as JOINs:
|
|
59
|
+
|
|
60
|
+
```sql
|
|
61
|
+
-- input
|
|
62
|
+
SELECT id, (SELECT MAX(amount) FROM orders WHERE orders.user_id = users.id) AS max_order
|
|
63
|
+
FROM users
|
|
64
|
+
|
|
65
|
+
-- output
|
|
66
|
+
SELECT users.id AS id, _u_0._u_2 AS max_order
|
|
67
|
+
FROM `users`
|
|
68
|
+
LEFT JOIN (
|
|
69
|
+
SELECT MAX(amount) AS _u_2, user_id AS _u_1
|
|
70
|
+
FROM `orders`
|
|
71
|
+
WHERE TRUE
|
|
72
|
+
GROUP BY user_id AS _u_1
|
|
73
|
+
) AS _u_0 ON users.id = _u_0._u_1
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subqueries.
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
### YDB → any SQL
|
|
81
|
+
|
|
82
|
+
The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-YDB transformations, and transpilation to other dialects.
|
|
83
|
+
|
|
84
|
+
#### Supported YQL constructs
|
|
85
|
+
|
|
86
|
+
| Construct | Example |
|
|
87
|
+
|---|---|
|
|
88
|
+
| `$variable` references | `SELECT * FROM $t AS t` |
|
|
89
|
+
| `Module::Function()` | `DateTime::GetYear(ts)` |
|
|
90
|
+
| `DECLARE $p AS Type` | `DECLARE $p AS Int32` |
|
|
91
|
+
| `FLATTEN [LIST\|DICT] BY col` | `FROM t FLATTEN LIST BY col` |
|
|
92
|
+
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
93
|
+
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
94
|
+
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
95
|
+
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
96
|
+
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
97
|
+
|
|
98
|
+
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
99
|
+
|
|
100
|
+
#### CTEs reassembly
|
|
101
|
+
|
|
102
|
+
YDB-style named expressions are automatically reassembled into standard `WITH` CTEs when targeting other dialects:
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
ydb_sql = "$t = (SELECT 1 AS x); SELECT * FROM $t AS t"
|
|
106
|
+
parse_one(ydb_sql, dialect="ydb").sql(dialect="postgres")
|
|
107
|
+
# → WITH t AS (SELECT 1 AS x) SELECT * FROM t AS t
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
### Column lineage
|
|
113
|
+
|
|
114
|
+
Because YDB SQL is fully parsed into sqlglot's AST, column-level lineage works out of the box:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from sqlglot.lineage import lineage
|
|
118
|
+
|
|
119
|
+
node = lineage("total", "$orders = (SELECT user_id, amount FROM orders); SELECT SUM(amount) AS total FROM $orders AS o", dialect="ydb")
|
|
120
|
+
for dep in node.walk():
|
|
121
|
+
print(dep.name, "→", dep.source)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Function reference
|
|
127
|
+
|
|
128
|
+
Functions below are recognized by sqlglot as standard SQL expressions and translated to their YQL equivalents. Dialect-specific functions that sqlglot does not parse into typed AST nodes are **passed through unchanged** — see [Limitations](#limitations).
|
|
129
|
+
|
|
130
|
+
### Date / time
|
|
131
|
+
|
|
132
|
+
| Input | YQL output |
|
|
133
|
+
|---|---|
|
|
134
|
+
| `DATE_TRUNC('day', x)` | `DATE(x)` |
|
|
135
|
+
| `DATE_TRUNC('week', x)` | `DateTime::MakeDate(DateTime::StartOfWeek(x))` |
|
|
136
|
+
| `DATE_TRUNC('month', x)` | `DateTime::MakeDate(DateTime::StartOfMonth(x))` |
|
|
137
|
+
| `DATE_TRUNC('quarter', x)` | `DateTime::MakeDate(DateTime::StartOfQuarter(x))` |
|
|
138
|
+
| `DATE_TRUNC('year', x)` | `DateTime::MakeDate(DateTime::StartOfYear(x))` |
|
|
139
|
+
| `EXTRACT(WEEK FROM x)` | `DateTime::GetWeekOfYear(x)` |
|
|
140
|
+
| `EXTRACT(MONTH FROM x)` | `DateTime::GetMonth(x)` |
|
|
141
|
+
| `EXTRACT(YEAR FROM x)` | `DateTime::GetYear(x)` |
|
|
142
|
+
| `CURRENT_TIMESTAMP` | `CurrentUtcTimestamp()` |
|
|
143
|
+
| `STR_TO_DATE(str, fmt)` / `TO_DATE(str, fmt)` | `DateTime::MakeTimestamp(DateTime::Parse(fmt)(str))` |
|
|
144
|
+
| `DATE_ADD(x, INTERVAL n MONTH)` | `DateTime::MakeDate(DateTime::ShiftMonths(x, n))` |
|
|
145
|
+
| `DATE_ADD(x, INTERVAL n YEAR)` | `DateTime::MakeDate(DateTime::ShiftYears(x, n))` |
|
|
146
|
+
| `DATE_ADD(x, INTERVAL n DAY)` | `x + DateTime::IntervalFromDays(n)` |
|
|
147
|
+
| `DATE_ADD(x, INTERVAL n HOUR)` | `x + DateTime::IntervalFromHours(n)` |
|
|
148
|
+
| `DATE_ADD(x, INTERVAL n MINUTE)` | `x + DateTime::IntervalFromMinutes(n)` |
|
|
149
|
+
| `DATE_ADD(x, INTERVAL n SECOND)` | `x + DateTime::IntervalFromSeconds(n)` |
|
|
150
|
+
| `DATE_SUB(x, INTERVAL n ...)` | same as `DATE_ADD` with `−` |
|
|
151
|
+
| `INTERVAL n DAY` (literal) | `DateTime::IntervalFromDays(n)` |
|
|
152
|
+
| `INTERVAL n HOUR` (literal) | `DateTime::IntervalFromHours(n)` |
|
|
153
|
+
| `INTERVAL n MINUTE` (literal) | `DateTime::IntervalFromMinutes(n)` |
|
|
154
|
+
| `INTERVAL n SECOND` (literal) | `DateTime::IntervalFromSeconds(n)` |
|
|
155
|
+
| `dateDiff('minute', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 60000000` |
|
|
156
|
+
| `dateDiff('hour', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 3600000000` |
|
|
157
|
+
| `dateDiff('day', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 86400000000` |
|
|
158
|
+
| `dateDiff('week', a, b)` | `(CAST(b AS Int64) - CAST(a AS Int64)) / 604800000000` |
|
|
159
|
+
|
|
160
|
+
> **Note on `dateDiff`:** YDB stores `Timestamp` as microseconds since epoch. The formula above gives exact integer units assuming both arguments are `Timestamp`. Results for `Date`-typed columns will differ.
|
|
161
|
+
|
|
162
|
+
### Strings
|
|
163
|
+
|
|
164
|
+
| Input | YQL output |
|
|
165
|
+
|---|---|
|
|
166
|
+
| `CONCAT(a, b, ...)` | `a \|\| b \|\| ...` |
|
|
167
|
+
| `UPPER(x)` | `Unicode::ToUpper(x)` |
|
|
168
|
+
| `LOWER(x)` | `Unicode::ToLower(x)` |
|
|
169
|
+
| `LENGTH(x)` / `CHAR_LENGTH(x)` | `Unicode::GetLength(x)` |
|
|
170
|
+
| `POSITION(sub IN x)` / `STRPOS(x, sub)` | `Find(x, sub)` |
|
|
171
|
+
| `STRING_TO_ARRAY(x, delim)` | `String::SplitToList(x, delim)` |
|
|
172
|
+
| `ARRAY_TO_STRING(arr, delim)` | `String::JoinFromList(arr, delim)` |
|
|
173
|
+
|
|
174
|
+
### Arrays / collections
|
|
175
|
+
|
|
176
|
+
| Input | YQL output |
|
|
177
|
+
|---|---|
|
|
178
|
+
| `ARRAY(v1, v2, ...)` | `AsList(v1, v2, ...)` |
|
|
179
|
+
| `ARRAY_LENGTH(x)` / `ARRAY_SIZE(x)` | `ListLength(x)` |
|
|
180
|
+
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) -> {RETURN cond})` |
|
|
181
|
+
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) -> {RETURN cond}))` |
|
|
182
|
+
| `ARRAY_AGG(x)` | `AGGREGATE_LIST(x)` |
|
|
183
|
+
| `UNNEST(x)` | `FLATTEN BY x` |
|
|
184
|
+
|
|
185
|
+
### Conditional / math
|
|
186
|
+
|
|
187
|
+
| Input | YQL output |
|
|
188
|
+
|---|---|
|
|
189
|
+
| `NULLIF(x, y)` | `IF(x = y, NULL, x)` |
|
|
190
|
+
| `ROUND(x, n)` | `Math::Round(x, -n)` |
|
|
191
|
+
| `COUNT()` *(zero-argument form)* | `COUNT(*)` |
|
|
192
|
+
|
|
193
|
+
### JSON
|
|
194
|
+
|
|
195
|
+
| Input | YQL output |
|
|
196
|
+
|---|---|
|
|
197
|
+
| `jsonb_col @> value` (PostgreSQL) | `Yson::Contains(jsonb_col, value)` |
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## Type mapping
|
|
202
|
+
|
|
203
|
+
### Standard SQL → YDB
|
|
204
|
+
|
|
205
|
+
| SQL type | YDB type |
|
|
206
|
+
|---|---|
|
|
207
|
+
| `TINYINT` | `Int8` |
|
|
208
|
+
| `SMALLINT` | `Int16` |
|
|
209
|
+
| `INT` / `INTEGER` | `Int32` |
|
|
210
|
+
| `BIGINT` | `Int64` |
|
|
211
|
+
| `FLOAT` | `Float` |
|
|
212
|
+
| `DOUBLE` / `DOUBLE PRECISION` | `Double` |
|
|
213
|
+
| `DECIMAL(p, s)` | `Decimal(p, s)` |
|
|
214
|
+
| `BOOLEAN` / `BIT` | `Uint8` |
|
|
215
|
+
| `TIMESTAMP` | `Timestamp` |
|
|
216
|
+
| `VARCHAR` / `NVARCHAR` / `CHAR` / `TEXT` | `Utf8` |
|
|
217
|
+
| `BLOB` / `BINARY` / `VARBINARY` | `String` |
|
|
218
|
+
|
|
219
|
+
### YDB types → standard SQL
|
|
220
|
+
|
|
221
|
+
| YDB type | Standard SQL | Postgres | ClickHouse |
|
|
222
|
+
|---|---|---|---|
|
|
223
|
+
| `Utf8` | `TEXT` | `TEXT` | `String` |
|
|
224
|
+
| `String` | `BLOB` | `BYTEA` | `String` |
|
|
225
|
+
| `Int32` | `INT` | `INT` | `Int32` |
|
|
226
|
+
| `Int64` | `BIGINT` | `BIGINT` | `Int64` |
|
|
227
|
+
| `Optional<T>` | `T` (nullable) | `T` | `Nullable(T)` |
|
|
228
|
+
| `List<T>` | `LIST<T>` | `LIST<T>` | `Array(T)` |
|
|
229
|
+
| `Dict<K,V>` | `MAP<K,V>` | `MAP<K,V>` | `Map(K,V)` |
|
|
230
|
+
| `Tuple<T1,T2>` | `STRUCT<...>` | `STRUCT<...>` | `Tuple(T1,T2)` |
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## Limitations
|
|
235
|
+
|
|
236
|
+
### Dialect-specific functions
|
|
237
|
+
|
|
238
|
+
Functions that sqlglot does not parse into typed AST nodes are passed through unchanged and must be replaced manually. Common examples from ClickHouse: `now()`, `today()`, `parseDateTimeBestEffort()`, `toDate()`, `toFloat64()`, `toString()`, `countDistinct()`, `groupArray()`.
|
|
239
|
+
|
|
240
|
+
### Correlated subqueries in DML
|
|
241
|
+
|
|
242
|
+
Correlated subqueries inside `UPDATE` or `INSERT` statements cannot be automatically decorrelated — YDB does not support them natively, and rewriting requires knowledge of the table's primary key. Rewrite manually using a `$variable`:
|
|
243
|
+
|
|
244
|
+
```sql
|
|
245
|
+
-- not supported (will raise an error)
|
|
246
|
+
UPDATE t SET col = (SELECT val FROM other WHERE other.id = t.id)
|
|
247
|
+
|
|
248
|
+
-- workaround
|
|
249
|
+
$vals = (SELECT id, val FROM other);
|
|
250
|
+
UPDATE t SET col = (SELECT val FROM $vals WHERE id = t.id)
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
Correlated subqueries inside `SELECT` are handled automatically via JOIN rewriting.
|
|
254
|
+
|
|
255
|
+
### `dateDiff` with month granularity
|
|
256
|
+
|
|
257
|
+
`dateDiff('month', a, b)` has no exact equivalent in YDB because months have variable length. Use `DateTime::ShiftMonths` for date arithmetic instead.
|
|
258
|
+
|
|
259
|
+
### YDB container types in other dialects
|
|
260
|
+
|
|
261
|
+
`Uint8`/`Uint16`/`Uint32`/`Uint64` and YDB-specific container types (`Struct<...>`, `Variant<...>`, `Enum<...>`) do not have direct equivalents in standard SQL and are passed through as-is when targeting other dialects.
|
|
262
|
+
|
|
263
|
+
---
|
|
264
|
+
|
|
265
|
+
## Development
|
|
266
|
+
|
|
267
|
+
```bash
|
|
268
|
+
git clone https://github.com/ydb-platform/ydb-sqlglot-plugin.git
|
|
269
|
+
cd ydb-sqlglot-plugin
|
|
270
|
+
python -m venv .venv && source .venv/bin/activate
|
|
271
|
+
pip install -e ".[dev]"
|
|
272
|
+
python -m pytest tests/
|
|
273
|
+
```
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ydb-sqlglot-plugin"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0" # AUTOVERSION
|
|
8
8
|
description = "YDB dialect plugin for sqlglot"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "Apache-2.0"}
|
|
@@ -30,6 +30,8 @@ dependencies = [
|
|
|
30
30
|
dev = [
|
|
31
31
|
"pytest>=7.0",
|
|
32
32
|
"pytest-cov>=4.0",
|
|
33
|
+
"ydb>=3.28.0,<4",
|
|
34
|
+
"ruff>=0.9.0",
|
|
33
35
|
]
|
|
34
36
|
|
|
35
37
|
[project.urls]
|
|
@@ -47,3 +49,11 @@ include = ["ydb_sqlglot*"]
|
|
|
47
49
|
testpaths = ["tests"]
|
|
48
50
|
python_files = ["test_*.py"]
|
|
49
51
|
python_functions = ["test_*"]
|
|
52
|
+
|
|
53
|
+
[tool.ruff]
|
|
54
|
+
target-version = "py39"
|
|
55
|
+
line-length = 120
|
|
56
|
+
|
|
57
|
+
[tool.ruff.lint]
|
|
58
|
+
select = ["E", "F", "I", "W"]
|
|
59
|
+
ignore = ["E501"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = "0.2.0"
|