ydb-sqlglot-plugin 0.1.1__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/PKG-INFO +88 -18
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/README.md +86 -17
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/pyproject.toml +10 -1
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot/__init__.py +1 -2
- ydb_sqlglot_plugin-0.2.0/ydb_sqlglot/version.py +1 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot/ydb.py +345 -28
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/PKG-INFO +88 -18
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/requires.txt +1 -0
- ydb_sqlglot_plugin-0.1.1/ydb_sqlglot/version.py +0 -1
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/LICENSE +0 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/setup.cfg +0 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/SOURCES.txt +0 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/dependency_links.txt +0 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/entry_points.txt +0 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ydb-sqlglot-plugin
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: YDB dialect plugin for sqlglot
|
|
5
5
|
Author: YDB Team
|
|
6
6
|
License: Apache-2.0
|
|
@@ -22,11 +22,12 @@ Provides-Extra: dev
|
|
|
22
22
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
23
23
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
24
24
|
Requires-Dist: ydb<4,>=3.28.0; extra == "dev"
|
|
25
|
+
Requires-Dist: ruff>=0.9.0; extra == "dev"
|
|
25
26
|
Dynamic: license-file
|
|
26
27
|
|
|
27
28
|
# ydb-sqlglot-plugin
|
|
28
29
|
|
|
29
|
-
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) —
|
|
30
|
+
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) — bidirectional transpilation between YDB/YQL and any SQL dialect.
|
|
30
31
|
|
|
31
32
|
## Installation
|
|
32
33
|
|
|
@@ -41,19 +42,20 @@ After installing the package, the `ydb` dialect is available in sqlglot automati
|
|
|
41
42
|
```python
|
|
42
43
|
import sqlglot
|
|
43
44
|
|
|
44
|
-
#
|
|
45
|
+
# Any dialect → YDB
|
|
45
46
|
result = sqlglot.transpile("SELECT * FROM users WHERE id = 1", read="mysql", write="ydb")[0]
|
|
46
47
|
# → SELECT * FROM `users` WHERE id = 1
|
|
47
48
|
|
|
48
|
-
#
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
yql = parsed.sql(dialect="ydb")
|
|
49
|
+
# YDB → any dialect
|
|
50
|
+
result = sqlglot.transpile("$t = (SELECT id FROM users); SELECT * FROM $t AS t", read="ydb", write="postgres")[0]
|
|
51
|
+
# → WITH t AS (SELECT id FROM users) SELECT * FROM t AS t
|
|
52
52
|
```
|
|
53
53
|
|
|
54
54
|
## What the plugin does
|
|
55
55
|
|
|
56
|
-
###
|
|
56
|
+
### Any SQL → YDB
|
|
57
|
+
|
|
58
|
+
#### Table names
|
|
57
59
|
|
|
58
60
|
Database-qualified names are rewritten to the YDB path format and wrapped in backticks:
|
|
59
61
|
|
|
@@ -65,7 +67,7 @@ SELECT * FROM analytics.events
|
|
|
65
67
|
SELECT * FROM `analytics/events`
|
|
66
68
|
```
|
|
67
69
|
|
|
68
|
-
|
|
70
|
+
#### CTEs → YDB variables
|
|
69
71
|
|
|
70
72
|
```sql
|
|
71
73
|
-- input
|
|
@@ -78,7 +80,7 @@ $active = (SELECT * FROM `users` WHERE status = 'active');
|
|
|
78
80
|
SELECT * FROM $active AS active
|
|
79
81
|
```
|
|
80
82
|
|
|
81
|
-
|
|
83
|
+
#### Subquery decorrelation
|
|
82
84
|
|
|
83
85
|
Correlated subqueries (which YQL does not support) are rewritten as JOINs:
|
|
84
86
|
|
|
@@ -102,6 +104,52 @@ The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subquerie
|
|
|
102
104
|
|
|
103
105
|
---
|
|
104
106
|
|
|
107
|
+
### YDB → any SQL
|
|
108
|
+
|
|
109
|
+
The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-YDB transformations, and transpilation to other dialects.
|
|
110
|
+
|
|
111
|
+
#### Supported YQL constructs
|
|
112
|
+
|
|
113
|
+
| Construct | Example |
|
|
114
|
+
|---|---|
|
|
115
|
+
| `$variable` references | `SELECT * FROM $t AS t` |
|
|
116
|
+
| `Module::Function()` | `DateTime::GetYear(ts)` |
|
|
117
|
+
| `DECLARE $p AS Type` | `DECLARE $p AS Int32` |
|
|
118
|
+
| `FLATTEN [LIST\|DICT] BY col` | `FROM t FLATTEN LIST BY col` |
|
|
119
|
+
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
120
|
+
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
121
|
+
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
122
|
+
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
123
|
+
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
124
|
+
|
|
125
|
+
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
126
|
+
|
|
127
|
+
#### CTEs reassembly
|
|
128
|
+
|
|
129
|
+
YDB-style named expressions are automatically reassembled into standard `WITH` CTEs when targeting other dialects:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
ydb_sql = "$t = (SELECT 1 AS x); SELECT * FROM $t AS t"
|
|
133
|
+
parse_one(ydb_sql, dialect="ydb").sql(dialect="postgres")
|
|
134
|
+
# → WITH t AS (SELECT 1 AS x) SELECT * FROM t AS t
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
### Column lineage
|
|
140
|
+
|
|
141
|
+
Because YDB SQL is fully parsed into sqlglot's AST, column-level lineage works out of the box:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from sqlglot.lineage import lineage
|
|
145
|
+
|
|
146
|
+
node = lineage("total", "$orders = (SELECT user_id, amount FROM orders); SELECT SUM(amount) AS total FROM $orders AS o", dialect="ydb")
|
|
147
|
+
for dep in node.walk():
|
|
148
|
+
print(dep.name, "→", dep.source)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
105
153
|
## Function reference
|
|
106
154
|
|
|
107
155
|
Functions below are recognized by sqlglot as standard SQL expressions and translated to their YQL equivalents. Dialect-specific functions that sqlglot does not parse into typed AST nodes are **passed through unchanged** — see [Limitations](#limitations).
|
|
@@ -179,25 +227,43 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
179
227
|
|
|
180
228
|
## Type mapping
|
|
181
229
|
|
|
182
|
-
|
|
230
|
+
### Standard SQL → YDB
|
|
231
|
+
|
|
232
|
+
| SQL type | YDB type |
|
|
183
233
|
|---|---|
|
|
184
|
-
| `TINYINT` | `
|
|
185
|
-
| `SMALLINT` | `
|
|
186
|
-
| `INT` / `INTEGER` | `
|
|
187
|
-
| `BIGINT` | `
|
|
234
|
+
| `TINYINT` | `Int8` |
|
|
235
|
+
| `SMALLINT` | `Int16` |
|
|
236
|
+
| `INT` / `INTEGER` | `Int32` |
|
|
237
|
+
| `BIGINT` | `Int64` |
|
|
188
238
|
| `FLOAT` | `Float` |
|
|
189
239
|
| `DOUBLE` / `DOUBLE PRECISION` | `Double` |
|
|
190
240
|
| `DECIMAL(p, s)` | `Decimal(p, s)` |
|
|
191
241
|
| `BOOLEAN` / `BIT` | `Uint8` |
|
|
192
242
|
| `TIMESTAMP` | `Timestamp` |
|
|
193
|
-
| `VARCHAR` / `NVARCHAR` / `CHAR` | `Utf8` |
|
|
194
|
-
| `
|
|
195
|
-
|
|
243
|
+
| `VARCHAR` / `NVARCHAR` / `CHAR` / `TEXT` | `Utf8` |
|
|
244
|
+
| `BLOB` / `BINARY` / `VARBINARY` | `String` |
|
|
245
|
+
|
|
246
|
+
### YDB types → standard SQL
|
|
247
|
+
|
|
248
|
+
| YDB type | Standard SQL | Postgres | ClickHouse |
|
|
249
|
+
|---|---|---|---|
|
|
250
|
+
| `Utf8` | `TEXT` | `TEXT` | `String` |
|
|
251
|
+
| `String` | `BLOB` | `BYTEA` | `String` |
|
|
252
|
+
| `Int32` | `INT` | `INT` | `Int32` |
|
|
253
|
+
| `Int64` | `BIGINT` | `BIGINT` | `Int64` |
|
|
254
|
+
| `Optional<T>` | `T` (nullable) | `T` | `Nullable(T)` |
|
|
255
|
+
| `List<T>` | `LIST<T>` | `LIST<T>` | `Array(T)` |
|
|
256
|
+
| `Dict<K,V>` | `MAP<K,V>` | `MAP<K,V>` | `Map(K,V)` |
|
|
257
|
+
| `Tuple<T1,T2>` | `STRUCT<...>` | `STRUCT<...>` | `Tuple(T1,T2)` |
|
|
196
258
|
|
|
197
259
|
---
|
|
198
260
|
|
|
199
261
|
## Limitations
|
|
200
262
|
|
|
263
|
+
### Dialect-specific functions
|
|
264
|
+
|
|
265
|
+
Functions that sqlglot does not parse into typed AST nodes are passed through unchanged and must be replaced manually. Common examples from ClickHouse: `now()`, `today()`, `parseDateTimeBestEffort()`, `toDate()`, `toFloat64()`, `toString()`, `countDistinct()`, `groupArray()`.
|
|
266
|
+
|
|
201
267
|
### Correlated subqueries in DML
|
|
202
268
|
|
|
203
269
|
Correlated subqueries inside `UPDATE` or `INSERT` statements cannot be automatically decorrelated — YDB does not support them natively, and rewriting requires knowledge of the table's primary key. Rewrite manually using a `$variable`:
|
|
@@ -217,6 +283,10 @@ Correlated subqueries inside `SELECT` are handled automatically via JOIN rewriti
|
|
|
217
283
|
|
|
218
284
|
`dateDiff('month', a, b)` has no exact equivalent in YDB because months have variable length. Use `DateTime::ShiftMonths` for date arithmetic instead.
|
|
219
285
|
|
|
286
|
+
### YDB container types in other dialects
|
|
287
|
+
|
|
288
|
+
`Uint8`/`Uint16`/`Uint32`/`Uint64` and YDB-specific container types (`Struct<...>`, `Variant<...>`, `Enum<...>`) do not have direct equivalents in standard SQL and are passed through as-is when targeting other dialects.
|
|
289
|
+
|
|
220
290
|
---
|
|
221
291
|
|
|
222
292
|
## Development
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ydb-sqlglot-plugin
|
|
2
2
|
|
|
3
|
-
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) —
|
|
3
|
+
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) — bidirectional transpilation between YDB/YQL and any SQL dialect.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -15,19 +15,20 @@ After installing the package, the `ydb` dialect is available in sqlglot automati
|
|
|
15
15
|
```python
|
|
16
16
|
import sqlglot
|
|
17
17
|
|
|
18
|
-
#
|
|
18
|
+
# Any dialect → YDB
|
|
19
19
|
result = sqlglot.transpile("SELECT * FROM users WHERE id = 1", read="mysql", write="ydb")[0]
|
|
20
20
|
# → SELECT * FROM `users` WHERE id = 1
|
|
21
21
|
|
|
22
|
-
#
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
yql = parsed.sql(dialect="ydb")
|
|
22
|
+
# YDB → any dialect
|
|
23
|
+
result = sqlglot.transpile("$t = (SELECT id FROM users); SELECT * FROM $t AS t", read="ydb", write="postgres")[0]
|
|
24
|
+
# → WITH t AS (SELECT id FROM users) SELECT * FROM t AS t
|
|
26
25
|
```
|
|
27
26
|
|
|
28
27
|
## What the plugin does
|
|
29
28
|
|
|
30
|
-
###
|
|
29
|
+
### Any SQL → YDB
|
|
30
|
+
|
|
31
|
+
#### Table names
|
|
31
32
|
|
|
32
33
|
Database-qualified names are rewritten to the YDB path format and wrapped in backticks:
|
|
33
34
|
|
|
@@ -39,7 +40,7 @@ SELECT * FROM analytics.events
|
|
|
39
40
|
SELECT * FROM `analytics/events`
|
|
40
41
|
```
|
|
41
42
|
|
|
42
|
-
|
|
43
|
+
#### CTEs → YDB variables
|
|
43
44
|
|
|
44
45
|
```sql
|
|
45
46
|
-- input
|
|
@@ -52,7 +53,7 @@ $active = (SELECT * FROM `users` WHERE status = 'active');
|
|
|
52
53
|
SELECT * FROM $active AS active
|
|
53
54
|
```
|
|
54
55
|
|
|
55
|
-
|
|
56
|
+
#### Subquery decorrelation
|
|
56
57
|
|
|
57
58
|
Correlated subqueries (which YQL does not support) are rewritten as JOINs:
|
|
58
59
|
|
|
@@ -76,6 +77,52 @@ The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subquerie
|
|
|
76
77
|
|
|
77
78
|
---
|
|
78
79
|
|
|
80
|
+
### YDB → any SQL
|
|
81
|
+
|
|
82
|
+
The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-YDB transformations, and transpilation to other dialects.
|
|
83
|
+
|
|
84
|
+
#### Supported YQL constructs
|
|
85
|
+
|
|
86
|
+
| Construct | Example |
|
|
87
|
+
|---|---|
|
|
88
|
+
| `$variable` references | `SELECT * FROM $t AS t` |
|
|
89
|
+
| `Module::Function()` | `DateTime::GetYear(ts)` |
|
|
90
|
+
| `DECLARE $p AS Type` | `DECLARE $p AS Int32` |
|
|
91
|
+
| `FLATTEN [LIST\|DICT] BY col` | `FROM t FLATTEN LIST BY col` |
|
|
92
|
+
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
93
|
+
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
94
|
+
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
95
|
+
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
96
|
+
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
97
|
+
|
|
98
|
+
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
99
|
+
|
|
100
|
+
#### CTEs reassembly
|
|
101
|
+
|
|
102
|
+
YDB-style named expressions are automatically reassembled into standard `WITH` CTEs when targeting other dialects:
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
ydb_sql = "$t = (SELECT 1 AS x); SELECT * FROM $t AS t"
|
|
106
|
+
parse_one(ydb_sql, dialect="ydb").sql(dialect="postgres")
|
|
107
|
+
# → WITH t AS (SELECT 1 AS x) SELECT * FROM t AS t
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
### Column lineage
|
|
113
|
+
|
|
114
|
+
Because YDB SQL is fully parsed into sqlglot's AST, column-level lineage works out of the box:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from sqlglot.lineage import lineage
|
|
118
|
+
|
|
119
|
+
node = lineage("total", "$orders = (SELECT user_id, amount FROM orders); SELECT SUM(amount) AS total FROM $orders AS o", dialect="ydb")
|
|
120
|
+
for dep in node.walk():
|
|
121
|
+
print(dep.name, "→", dep.source)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
79
126
|
## Function reference
|
|
80
127
|
|
|
81
128
|
Functions below are recognized by sqlglot as standard SQL expressions and translated to their YQL equivalents. Dialect-specific functions that sqlglot does not parse into typed AST nodes are **passed through unchanged** — see [Limitations](#limitations).
|
|
@@ -153,25 +200,43 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
153
200
|
|
|
154
201
|
## Type mapping
|
|
155
202
|
|
|
156
|
-
|
|
203
|
+
### Standard SQL → YDB
|
|
204
|
+
|
|
205
|
+
| SQL type | YDB type |
|
|
157
206
|
|---|---|
|
|
158
|
-
| `TINYINT` | `
|
|
159
|
-
| `SMALLINT` | `
|
|
160
|
-
| `INT` / `INTEGER` | `
|
|
161
|
-
| `BIGINT` | `
|
|
207
|
+
| `TINYINT` | `Int8` |
|
|
208
|
+
| `SMALLINT` | `Int16` |
|
|
209
|
+
| `INT` / `INTEGER` | `Int32` |
|
|
210
|
+
| `BIGINT` | `Int64` |
|
|
162
211
|
| `FLOAT` | `Float` |
|
|
163
212
|
| `DOUBLE` / `DOUBLE PRECISION` | `Double` |
|
|
164
213
|
| `DECIMAL(p, s)` | `Decimal(p, s)` |
|
|
165
214
|
| `BOOLEAN` / `BIT` | `Uint8` |
|
|
166
215
|
| `TIMESTAMP` | `Timestamp` |
|
|
167
|
-
| `VARCHAR` / `NVARCHAR` / `CHAR` | `Utf8` |
|
|
168
|
-
| `
|
|
169
|
-
|
|
216
|
+
| `VARCHAR` / `NVARCHAR` / `CHAR` / `TEXT` | `Utf8` |
|
|
217
|
+
| `BLOB` / `BINARY` / `VARBINARY` | `String` |
|
|
218
|
+
|
|
219
|
+
### YDB types → standard SQL
|
|
220
|
+
|
|
221
|
+
| YDB type | Standard SQL | Postgres | ClickHouse |
|
|
222
|
+
|---|---|---|---|
|
|
223
|
+
| `Utf8` | `TEXT` | `TEXT` | `String` |
|
|
224
|
+
| `String` | `BLOB` | `BYTEA` | `String` |
|
|
225
|
+
| `Int32` | `INT` | `INT` | `Int32` |
|
|
226
|
+
| `Int64` | `BIGINT` | `BIGINT` | `Int64` |
|
|
227
|
+
| `Optional<T>` | `T` (nullable) | `T` | `Nullable(T)` |
|
|
228
|
+
| `List<T>` | `LIST<T>` | `LIST<T>` | `Array(T)` |
|
|
229
|
+
| `Dict<K,V>` | `MAP<K,V>` | `MAP<K,V>` | `Map(K,V)` |
|
|
230
|
+
| `Tuple<T1,T2>` | `STRUCT<...>` | `STRUCT<...>` | `Tuple(T1,T2)` |
|
|
170
231
|
|
|
171
232
|
---
|
|
172
233
|
|
|
173
234
|
## Limitations
|
|
174
235
|
|
|
236
|
+
### Dialect-specific functions
|
|
237
|
+
|
|
238
|
+
Functions that sqlglot does not parse into typed AST nodes are passed through unchanged and must be replaced manually. Common examples from ClickHouse: `now()`, `today()`, `parseDateTimeBestEffort()`, `toDate()`, `toFloat64()`, `toString()`, `countDistinct()`, `groupArray()`.
|
|
239
|
+
|
|
175
240
|
### Correlated subqueries in DML
|
|
176
241
|
|
|
177
242
|
Correlated subqueries inside `UPDATE` or `INSERT` statements cannot be automatically decorrelated — YDB does not support them natively, and rewriting requires knowledge of the table's primary key. Rewrite manually using a `$variable`:
|
|
@@ -191,6 +256,10 @@ Correlated subqueries inside `SELECT` are handled automatically via JOIN rewriti
|
|
|
191
256
|
|
|
192
257
|
`dateDiff('month', a, b)` has no exact equivalent in YDB because months have variable length. Use `DateTime::ShiftMonths` for date arithmetic instead.
|
|
193
258
|
|
|
259
|
+
### YDB container types in other dialects
|
|
260
|
+
|
|
261
|
+
`Uint8`/`Uint16`/`Uint32`/`Uint64` and YDB-specific container types (`Struct<...>`, `Variant<...>`, `Enum<...>`) do not have direct equivalents in standard SQL and are passed through as-is when targeting other dialects.
|
|
262
|
+
|
|
194
263
|
---
|
|
195
264
|
|
|
196
265
|
## Development
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ydb-sqlglot-plugin"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0" # AUTOVERSION
|
|
8
8
|
description = "YDB dialect plugin for sqlglot"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "Apache-2.0"}
|
|
@@ -31,6 +31,7 @@ dev = [
|
|
|
31
31
|
"pytest>=7.0",
|
|
32
32
|
"pytest-cov>=4.0",
|
|
33
33
|
"ydb>=3.28.0,<4",
|
|
34
|
+
"ruff>=0.9.0",
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
[project.urls]
|
|
@@ -48,3 +49,11 @@ include = ["ydb_sqlglot*"]
|
|
|
48
49
|
testpaths = ["tests"]
|
|
49
50
|
python_files = ["test_*.py"]
|
|
50
51
|
python_functions = ["test_*"]
|
|
52
|
+
|
|
53
|
+
[tool.ruff]
|
|
54
|
+
target-version = "py39"
|
|
55
|
+
line-length = 120
|
|
56
|
+
|
|
57
|
+
[tool.ruff.lint]
|
|
58
|
+
select = ["E", "F", "I", "W"]
|
|
59
|
+
ignore = ["E501"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = "0.2.0"
|
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import typing as t
|
|
3
3
|
|
|
4
|
-
from sqlglot import
|
|
4
|
+
from sqlglot import Generator, TokenType, exp, generator, parser, tokens, transforms
|
|
5
|
+
from sqlglot.dialects.dialect import Dialect, NormalizationStrategy, concat_to_dpipe_sql, unit_to_var
|
|
5
6
|
from sqlglot.errors import UnsupportedError
|
|
6
7
|
from sqlglot.expressions import Expression
|
|
7
|
-
from sqlglot.
|
|
8
|
-
from sqlglot.
|
|
9
|
-
from sqlglot.helper import name_sequence, seq_get, flatten
|
|
8
|
+
from sqlglot.helper import flatten, name_sequence, seq_get
|
|
9
|
+
from sqlglot.optimizer.scope import ScopeType, find_in_scope, traverse_scope
|
|
10
10
|
from sqlglot.optimizer.simplify import simplify
|
|
11
|
-
from sqlglot.transforms import move_ctes_to_top_level
|
|
12
|
-
from sqlglot.optimizer.scope import find_in_scope, ScopeType, traverse_scope
|
|
13
|
-
from sqlglot.transforms import eliminate_join_marks
|
|
11
|
+
from sqlglot.transforms import eliminate_join_marks, move_ctes_to_top_level
|
|
14
12
|
|
|
15
13
|
JOIN_ATTRS = ("on", "side", "kind", "using", "method")
|
|
16
14
|
|
|
@@ -29,21 +27,6 @@ def table_names_to_lower_case(expression: exp.Expression) -> exp.Expression:
|
|
|
29
27
|
return expression
|
|
30
28
|
|
|
31
29
|
|
|
32
|
-
def make_db_name_lower(expression: exp.Expression) -> exp.Expression:
|
|
33
|
-
"""
|
|
34
|
-
Converts all database names to uppercase
|
|
35
|
-
Args:
|
|
36
|
-
expression: The SQL expression to modify
|
|
37
|
-
Returns:
|
|
38
|
-
Modified expression with uppercase database names
|
|
39
|
-
"""
|
|
40
|
-
for table in expression.find_all(exp.Table):
|
|
41
|
-
if table.db:
|
|
42
|
-
table.set("db", table.db.lower())
|
|
43
|
-
|
|
44
|
-
return expression
|
|
45
|
-
|
|
46
|
-
|
|
47
30
|
def make_db_name_lower(expression: exp.Expression) -> exp.Expression:
|
|
48
31
|
"""
|
|
49
32
|
Converts all database names to uppercase
|
|
@@ -440,6 +423,110 @@ def _has_implicit_cross_join(expression: exp.Expression) -> bool:
|
|
|
440
423
|
return False
|
|
441
424
|
|
|
442
425
|
|
|
426
|
+
class FlattenBy(exp.Expression):
|
|
427
|
+
"""YDB-specific FLATTEN [LIST|DICT] BY clause on a table reference."""
|
|
428
|
+
arg_types = {"this": True, "expressions": True, "kind": False}
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
class AssumeOrderBy(exp.Expression):
|
|
432
|
+
"""YDB-specific ASSUME ORDER BY hint (data is pre-sorted, skip sort)."""
|
|
433
|
+
arg_types = {"this": True}
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
class YdbTuple(exp.Expression):
|
|
437
|
+
"""YDB Tuple<T1, T2, ...> type — positional unnamed fields."""
|
|
438
|
+
arg_types = {"expressions": True, "nullable": False}
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
# Container types that use Generic<T, ...> syntax in YDB
|
|
442
|
+
_YDB_GENERIC_TYPES = {
|
|
443
|
+
"List": exp.DataType.Type.LIST,
|
|
444
|
+
"Dict": exp.DataType.Type.MAP,
|
|
445
|
+
"Set": exp.DataType.Type.SET,
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def _reassemble_ctes(
|
|
450
|
+
statements: t.List[t.Optional[exp.Expression]],
|
|
451
|
+
) -> t.List[t.Optional[exp.Expression]]:
|
|
452
|
+
"""Convert sequences of YDB named-expression statements into standard WITH CTEs.
|
|
453
|
+
|
|
454
|
+
YDB generator emits: $t = (SELECT ...); SELECT * FROM $t AS t
|
|
455
|
+
This function rebuilds: WITH t AS (SELECT ...) SELECT * FROM t
|
|
456
|
+
|
|
457
|
+
so that transpiling YDB output to other dialects produces valid SQL.
|
|
458
|
+
"""
|
|
459
|
+
result: t.List[t.Optional[exp.Expression]] = []
|
|
460
|
+
# Keep both the original Alias nodes and the converted CTE nodes
|
|
461
|
+
pending_aliases: t.List[exp.Alias] = []
|
|
462
|
+
pending_ctes: t.List[exp.CTE] = []
|
|
463
|
+
pending_names: t.Set[str] = set()
|
|
464
|
+
|
|
465
|
+
def _flush_as_aliases() -> None:
|
|
466
|
+
result.extend(pending_aliases)
|
|
467
|
+
pending_aliases.clear()
|
|
468
|
+
pending_ctes.clear()
|
|
469
|
+
pending_names.clear()
|
|
470
|
+
|
|
471
|
+
for stmt in statements:
|
|
472
|
+
if (
|
|
473
|
+
isinstance(stmt, exp.Alias)
|
|
474
|
+
and isinstance(stmt.args.get("alias"), exp.Identifier)
|
|
475
|
+
and stmt.alias.startswith("$")
|
|
476
|
+
):
|
|
477
|
+
name = stmt.alias[1:]
|
|
478
|
+
inner = stmt.this
|
|
479
|
+
# Unwrap Subquery — CTE.this must be Select, not Subquery
|
|
480
|
+
if isinstance(inner, exp.Subquery):
|
|
481
|
+
inner = inner.this
|
|
482
|
+
# Replace any $prev_cte refs inside this CTE body
|
|
483
|
+
inner = _replace_param_table_refs(inner, pending_names)
|
|
484
|
+
pending_aliases.append(stmt)
|
|
485
|
+
pending_ctes.append(
|
|
486
|
+
exp.CTE(
|
|
487
|
+
this=inner,
|
|
488
|
+
alias=exp.TableAlias(this=exp.to_identifier(name)),
|
|
489
|
+
)
|
|
490
|
+
)
|
|
491
|
+
pending_names.add(name)
|
|
492
|
+
elif pending_ctes and isinstance(stmt, exp.Select):
|
|
493
|
+
stmt = _replace_param_table_refs(stmt, pending_names)
|
|
494
|
+
stmt.set("with_", exp.With(expressions=list(pending_ctes)))
|
|
495
|
+
result.append(stmt)
|
|
496
|
+
pending_aliases.clear()
|
|
497
|
+
pending_ctes.clear()
|
|
498
|
+
pending_names.clear()
|
|
499
|
+
else:
|
|
500
|
+
# No following SELECT — keep original Alias form
|
|
501
|
+
_flush_as_aliases()
|
|
502
|
+
result.append(stmt)
|
|
503
|
+
|
|
504
|
+
# Trailing named exprs without a SELECT — keep as-is
|
|
505
|
+
_flush_as_aliases()
|
|
506
|
+
return result
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def _replace_param_table_refs(
|
|
510
|
+
tree: exp.Expression, names: t.Set[str]
|
|
511
|
+
) -> exp.Expression:
|
|
512
|
+
"""Replace Table(Parameter(Var("t"))) with Table(Identifier("t")) for CTE names."""
|
|
513
|
+
|
|
514
|
+
def _transform(node: exp.Expression) -> exp.Expression:
|
|
515
|
+
if (
|
|
516
|
+
isinstance(node, exp.Table)
|
|
517
|
+
and isinstance(node.this, exp.Parameter)
|
|
518
|
+
and isinstance(node.this.this, exp.Var)
|
|
519
|
+
and node.this.this.name in names
|
|
520
|
+
):
|
|
521
|
+
return exp.Table(
|
|
522
|
+
this=exp.to_identifier(node.this.this.name),
|
|
523
|
+
alias=node.args.get("alias"),
|
|
524
|
+
)
|
|
525
|
+
return node
|
|
526
|
+
|
|
527
|
+
return tree.transform(_transform)
|
|
528
|
+
|
|
529
|
+
|
|
443
530
|
class YDB(Dialect):
|
|
444
531
|
"""
|
|
445
532
|
YDB SQL dialect implementation for sqlglot.
|
|
@@ -470,9 +557,18 @@ class YDB(Dialect):
|
|
|
470
557
|
Defines how the SQL text is broken into tokens.
|
|
471
558
|
"""
|
|
472
559
|
|
|
560
|
+
KEYWORDS = {
|
|
561
|
+
**tokens.Tokenizer.KEYWORDS,
|
|
562
|
+
"DECLARE": TokenType.DECLARE,
|
|
563
|
+
"UTF8": TokenType.TEXT, # YDB Utf8 = unicode text = SQL TEXT
|
|
564
|
+
"STRING": TokenType.BLOB, # YDB String = bytes = SQL BLOB
|
|
565
|
+
}
|
|
566
|
+
|
|
473
567
|
SINGLE_TOKENS = {
|
|
474
568
|
**tokens.Tokenizer.SINGLE_TOKENS,
|
|
569
|
+
"$": TokenType.PARAMETER,
|
|
475
570
|
}
|
|
571
|
+
VAR_SINGLE_TOKENS = {"$"}
|
|
476
572
|
|
|
477
573
|
SUPPORTS_VALUES_DEFAULT = False
|
|
478
574
|
QUOTES = ["'", '"']
|
|
@@ -480,6 +576,154 @@ class YDB(Dialect):
|
|
|
480
576
|
IDENTIFIERS = ["`"]
|
|
481
577
|
|
|
482
578
|
class Parser(parser.Parser):
|
|
579
|
+
COLUMN_OPERATORS = {
|
|
580
|
+
**parser.Parser.COLUMN_OPERATORS,
|
|
581
|
+
# In YDB :: is a module namespace separator (e.g. DateTime::GetYear),
|
|
582
|
+
# not a Postgres-style cast. Reparse the right side as a function call.
|
|
583
|
+
TokenType.DCOLON: lambda self, this, field: (
|
|
584
|
+
self.expression(
|
|
585
|
+
exp.Anonymous(
|
|
586
|
+
this=f"{this.name}::{field.name}",
|
|
587
|
+
expressions=field.expressions,
|
|
588
|
+
)
|
|
589
|
+
)
|
|
590
|
+
if isinstance(field, exp.Func)
|
|
591
|
+
else self.expression(exp.ScopeResolution(this=this, expression=field))
|
|
592
|
+
),
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
STATEMENT_PARSERS = {
|
|
596
|
+
**parser.Parser.STATEMENT_PARSERS,
|
|
597
|
+
TokenType.DECLARE: lambda self: self._parse_ydb_declare(),
|
|
598
|
+
TokenType.PARAMETER: lambda self: self._parse_ydb_named_expr(),
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
def parse(self, raw_tokens, sql=None):
|
|
602
|
+
statements = super().parse(raw_tokens, sql)
|
|
603
|
+
return _reassemble_ctes(statements)
|
|
604
|
+
|
|
605
|
+
def _parse_dcolon(self) -> t.Optional[exp.Expression]:
|
|
606
|
+
return self._parse_function(anonymous=True) or self._parse_var(any_token=True)
|
|
607
|
+
|
|
608
|
+
def _parse_ydb_named_expr(self) -> t.Optional[exp.Expression]:
|
|
609
|
+
# _match_set already consumed '$', so _index points to the var name.
|
|
610
|
+
# Retreat one extra step to include '$' when falling back to expression parsing.
|
|
611
|
+
index = self._index - 1
|
|
612
|
+
name_var = self._parse_var(any_token=True)
|
|
613
|
+
if not self._match(TokenType.EQ):
|
|
614
|
+
# Not an assignment — retreat (including '$') and parse as expression.
|
|
615
|
+
self._retreat(index)
|
|
616
|
+
return self._parse_expression()
|
|
617
|
+
value = self._parse_select() or self._parse_expression()
|
|
618
|
+
return self.expression(
|
|
619
|
+
exp.Alias(
|
|
620
|
+
this=value,
|
|
621
|
+
alias=exp.Identifier(this=f"${name_var.name}"),
|
|
622
|
+
)
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
def _parse_ydb_declare(self) -> exp.Declare:
|
|
626
|
+
items = self._parse_csv(self._parse_ydb_declareitem)
|
|
627
|
+
return self.expression(exp.Declare(expressions=items))
|
|
628
|
+
|
|
629
|
+
def _parse_ydb_declareitem(self) -> t.Optional[exp.DeclareItem]:
|
|
630
|
+
if not self._match(TokenType.PARAMETER):
|
|
631
|
+
return None
|
|
632
|
+
name = self._parse_var(any_token=True)
|
|
633
|
+
if not name:
|
|
634
|
+
return None
|
|
635
|
+
self._match(TokenType.ALIAS)
|
|
636
|
+
kind = self._parse_types()
|
|
637
|
+
return self.expression(exp.DeclareItem(this=name, kind=kind))
|
|
638
|
+
|
|
639
|
+
def _parse_types(self, *args, **kwargs) -> t.Optional[exp.Expression]:
|
|
640
|
+
# YDB generic types use Name<...> syntax; token type varies by keyword status
|
|
641
|
+
if self._curr and self._next and self._next.token_type == TokenType.LT:
|
|
642
|
+
name = self._curr.text
|
|
643
|
+
|
|
644
|
+
if name == "Optional":
|
|
645
|
+
self._advance() # consume 'Optional'
|
|
646
|
+
self._advance() # consume '<'
|
|
647
|
+
inner = self._parse_types(*args, **kwargs)
|
|
648
|
+
self._match(TokenType.GT)
|
|
649
|
+
if inner:
|
|
650
|
+
inner.set("nullable", True)
|
|
651
|
+
return inner
|
|
652
|
+
|
|
653
|
+
if name in _YDB_GENERIC_TYPES:
|
|
654
|
+
self._advance() # consume type name
|
|
655
|
+
self._advance() # consume '<'
|
|
656
|
+
type_args = self._parse_csv(
|
|
657
|
+
lambda: self._parse_types(*args, **kwargs)
|
|
658
|
+
)
|
|
659
|
+
self._match(TokenType.GT)
|
|
660
|
+
return exp.DataType(
|
|
661
|
+
this=_YDB_GENERIC_TYPES[name],
|
|
662
|
+
expressions=[a for a in type_args if a],
|
|
663
|
+
nested=True,
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
if name == "Tuple":
|
|
667
|
+
self._advance() # consume 'Tuple'
|
|
668
|
+
self._advance() # consume '<'
|
|
669
|
+
type_args = self._parse_csv(
|
|
670
|
+
lambda: self._parse_types(*args, **kwargs)
|
|
671
|
+
)
|
|
672
|
+
self._match(TokenType.GT)
|
|
673
|
+
# Represent as STRUCT so other dialects can serialize it.
|
|
674
|
+
# kind="tuple" is a YDB-specific marker for the generator to emit Tuple<...>.
|
|
675
|
+
return exp.DataType(
|
|
676
|
+
this=exp.DataType.Type.STRUCT,
|
|
677
|
+
expressions=[
|
|
678
|
+
exp.ColumnDef(this=exp.to_identifier(f"_{i}"), kind=a)
|
|
679
|
+
for i, a in enumerate(type_args) if a
|
|
680
|
+
],
|
|
681
|
+
nested=True,
|
|
682
|
+
kind=exp.Var(this="tuple"),
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
dtype = super()._parse_types(*args, **kwargs)
|
|
686
|
+
if dtype and self._match(TokenType.PLACEHOLDER): # T?
|
|
687
|
+
dtype.set("nullable", True)
|
|
688
|
+
return dtype
|
|
689
|
+
|
|
690
|
+
def _parse_table_alias(self, alias_tokens=None):
|
|
691
|
+
# Prevent YDB-specific keywords from being consumed as table aliases
|
|
692
|
+
if self._curr and self._curr.text.upper() in ("FLATTEN", "ASSUME"):
|
|
693
|
+
# Also check that what follows is a YDB construct, not a regular alias
|
|
694
|
+
if self._next and (
|
|
695
|
+
self._next.text.upper() in ("BY", "LIST", "DICT")
|
|
696
|
+
or self._next.token_type == TokenType.ORDER_BY
|
|
697
|
+
):
|
|
698
|
+
return None
|
|
699
|
+
return super()._parse_table_alias(alias_tokens=alias_tokens)
|
|
700
|
+
|
|
701
|
+
def _parse_query_modifiers(self, this):
|
|
702
|
+
if (
|
|
703
|
+
self._curr
|
|
704
|
+
and self._curr.text.upper() == "ASSUME"
|
|
705
|
+
and self._next
|
|
706
|
+
and self._next.token_type == TokenType.ORDER_BY
|
|
707
|
+
):
|
|
708
|
+
self._advance() # consume ASSUME
|
|
709
|
+
_, order = self.QUERY_MODIFIER_PARSERS[TokenType.ORDER_BY](self)
|
|
710
|
+
if order and this:
|
|
711
|
+
this.set("order", self.expression(AssumeOrderBy(this=order)))
|
|
712
|
+
return super()._parse_query_modifiers(this)
|
|
713
|
+
|
|
714
|
+
def _parse_table(self, *args, **kwargs) -> t.Optional[exp.Expression]:
|
|
715
|
+
table = super()._parse_table(*args, **kwargs)
|
|
716
|
+
if table and self._curr and self._curr.text.upper() == "FLATTEN":
|
|
717
|
+
self._advance()
|
|
718
|
+
kind: t.Optional[str] = None
|
|
719
|
+
if self._curr and self._curr.text.upper() in ("LIST", "DICT"):
|
|
720
|
+
kind = self._curr.text.upper()
|
|
721
|
+
self._advance()
|
|
722
|
+
self._match_text_seq("BY")
|
|
723
|
+
cols = self._parse_csv(self._parse_column)
|
|
724
|
+
return self.expression(FlattenBy(this=table, expressions=cols, kind=kind))
|
|
725
|
+
return table
|
|
726
|
+
|
|
483
727
|
def _parse_struct_types(self, type_required=True) -> t.Optional[exp.Expression]:
|
|
484
728
|
if not self._curr:
|
|
485
729
|
return None
|
|
@@ -559,6 +803,8 @@ class YDB(Dialect):
|
|
|
559
803
|
Responsible for translating SQL AST back to SQL text with YDB-specific syntax.
|
|
560
804
|
"""
|
|
561
805
|
|
|
806
|
+
PARAMETER_TOKEN = "$"
|
|
807
|
+
|
|
562
808
|
SUPPORTS_VALUES_DEFAULT = False
|
|
563
809
|
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE
|
|
564
810
|
JOIN_HINTS = False
|
|
@@ -574,7 +820,7 @@ class YDB(Dialect):
|
|
|
574
820
|
JSON_KEY_VALUE_PAIR_SEP = ","
|
|
575
821
|
VARCHAR_REQUIRES_SIZE = False
|
|
576
822
|
CAN_IMPLEMENT_ARRAY_ANY = True
|
|
577
|
-
STRUCT_DELIMITER = ("
|
|
823
|
+
STRUCT_DELIMITER = ("<", ">")
|
|
578
824
|
NULL_ORDERING_SUPPORTED: t.Optional[bool] = False
|
|
579
825
|
NULL_ORDERING = None
|
|
580
826
|
MATCHED_BY_SOURCE = False
|
|
@@ -637,6 +883,10 @@ class YDB(Dialect):
|
|
|
637
883
|
Returns:
|
|
638
884
|
Generated SQL string for the table reference
|
|
639
885
|
"""
|
|
886
|
+
if isinstance(expression.this, exp.Parameter):
|
|
887
|
+
var = self.sql(expression, "this")
|
|
888
|
+
alias = f" AS {expression.alias}" if expression.alias else ""
|
|
889
|
+
return f"{var}{alias}"
|
|
640
890
|
prefix = f"{expression.db}/" if expression.db else ""
|
|
641
891
|
sql = f"`{prefix}{expression.name}`"
|
|
642
892
|
|
|
@@ -663,6 +913,38 @@ class YDB(Dialect):
|
|
|
663
913
|
|
|
664
914
|
return is_sql
|
|
665
915
|
|
|
916
|
+
def scoperesolution_sql(self, expression: exp.ScopeResolution) -> str:
|
|
917
|
+
this = self.sql(expression, "this")
|
|
918
|
+
expr = self.sql(expression, "expression")
|
|
919
|
+
return f"{this}::{expr}"
|
|
920
|
+
|
|
921
|
+
def declareitem_sql(self, expression: exp.DeclareItem) -> str:
|
|
922
|
+
name = self.sql(expression, "this")
|
|
923
|
+
kind = self.sql(expression, "kind")
|
|
924
|
+
return f"${name} AS {kind}"
|
|
925
|
+
|
|
926
|
+
def flattenby_sql(self, expression: FlattenBy) -> str:
|
|
927
|
+
table = self.sql(expression, "this")
|
|
928
|
+
kind = expression.args.get("kind")
|
|
929
|
+
kind_str = f" {kind}" if kind else ""
|
|
930
|
+
cols = self.expressions(expression, flat=True)
|
|
931
|
+
return f"{table} FLATTEN{kind_str} BY {cols}"
|
|
932
|
+
|
|
933
|
+
def assumeorderby_sql(self, expression: AssumeOrderBy) -> str:
|
|
934
|
+
order = self.sql(expression, "this").lstrip()
|
|
935
|
+
return self.seg(f"ASSUME {order}")
|
|
936
|
+
|
|
937
|
+
def ydbtuple_sql(self, expression: YdbTuple) -> str:
|
|
938
|
+
inner = ", ".join(self.sql(e) for e in expression.expressions)
|
|
939
|
+
sql = f"Tuple<{inner}>"
|
|
940
|
+
return f"Optional<{sql}>" if expression.args.get("nullable") else sql
|
|
941
|
+
|
|
942
|
+
def alias_sql(self, expression: exp.Alias) -> str:
|
|
943
|
+
alias = expression.args.get("alias")
|
|
944
|
+
if alias and alias.name.startswith("$"):
|
|
945
|
+
return f"{alias.name} = {self.sql(expression, 'this')}"
|
|
946
|
+
return super().alias_sql(expression)
|
|
947
|
+
|
|
666
948
|
def anonymous_sql(self, expression: exp.Anonymous) -> str:
|
|
667
949
|
"""
|
|
668
950
|
Generate SQL for Anonymous functions, with special handling for YQL lambda variables.
|
|
@@ -799,6 +1081,35 @@ class YDB(Dialect):
|
|
|
799
1081
|
Returns:
|
|
800
1082
|
Generated SQL string for the data type
|
|
801
1083
|
"""
|
|
1084
|
+
nullable = expression.args.get("nullable")
|
|
1085
|
+
|
|
1086
|
+
# YDB generic container types rendered with <> syntax and correct casing
|
|
1087
|
+
if expression.args.get("nested"):
|
|
1088
|
+
type_value = expression.this
|
|
1089
|
+
# Tuple<...>: STRUCT with kind="tuple" marker
|
|
1090
|
+
if (
|
|
1091
|
+
type_value == exp.DataType.Type.STRUCT
|
|
1092
|
+
and isinstance(expression.args.get("kind"), exp.Var)
|
|
1093
|
+
and expression.args["kind"].name == "tuple"
|
|
1094
|
+
):
|
|
1095
|
+
inner = ", ".join(
|
|
1096
|
+
self.sql(col.args["kind"])
|
|
1097
|
+
for col in expression.expressions
|
|
1098
|
+
if isinstance(col, exp.ColumnDef)
|
|
1099
|
+
)
|
|
1100
|
+
sql = f"Tuple<{inner}>"
|
|
1101
|
+
return f"Optional<{sql}>" if nullable else sql
|
|
1102
|
+
|
|
1103
|
+
inner = ", ".join(self.sql(e) for e in expression.expressions)
|
|
1104
|
+
name = {
|
|
1105
|
+
exp.DataType.Type.LIST: "List",
|
|
1106
|
+
exp.DataType.Type.MAP: "Dict",
|
|
1107
|
+
exp.DataType.Type.SET: "Set",
|
|
1108
|
+
}.get(type_value)
|
|
1109
|
+
if name:
|
|
1110
|
+
sql = f"{name}<{inner}>"
|
|
1111
|
+
return f"Optional<{sql}>" if nullable else sql
|
|
1112
|
+
|
|
802
1113
|
if (
|
|
803
1114
|
expression.is_type(exp.DataType.Type.NVARCHAR)
|
|
804
1115
|
or expression.is_type(exp.DataType.Type.VARCHAR)
|
|
@@ -845,7 +1156,10 @@ class YDB(Dialect):
|
|
|
845
1156
|
exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
|
|
846
1157
|
)
|
|
847
1158
|
|
|
848
|
-
|
|
1159
|
+
sql = super().datatype_sql(expression)
|
|
1160
|
+
if nullable:
|
|
1161
|
+
sql = f"Optional<{sql}>"
|
|
1162
|
+
return sql
|
|
849
1163
|
|
|
850
1164
|
def primarykeycolumnconstraint_sql(self, expression: exp.PrimaryKeyColumnConstraint) -> str:
|
|
851
1165
|
"""
|
|
@@ -2358,10 +2672,10 @@ class YDB(Dialect):
|
|
|
2358
2672
|
TYPE_MAPPING = {
|
|
2359
2673
|
**generator.Generator.TYPE_MAPPING,
|
|
2360
2674
|
**STRING_TYPE_MAPPING,
|
|
2361
|
-
exp.DataType.Type.TINYINT: "
|
|
2362
|
-
exp.DataType.Type.SMALLINT: "
|
|
2363
|
-
exp.DataType.Type.INT: "
|
|
2364
|
-
exp.DataType.Type.BIGINT: "
|
|
2675
|
+
exp.DataType.Type.TINYINT: "Int8",
|
|
2676
|
+
exp.DataType.Type.SMALLINT: "Int16",
|
|
2677
|
+
exp.DataType.Type.INT: "Int32",
|
|
2678
|
+
exp.DataType.Type.BIGINT: "Int64",
|
|
2365
2679
|
exp.DataType.Type.DECIMAL: "Decimal",
|
|
2366
2680
|
exp.DataType.Type.FLOAT: "Float",
|
|
2367
2681
|
exp.DataType.Type.DOUBLE: "Double",
|
|
@@ -2373,6 +2687,9 @@ class YDB(Dialect):
|
|
|
2373
2687
|
|
|
2374
2688
|
TRANSFORMS = {
|
|
2375
2689
|
**generator.Generator.TRANSFORMS,
|
|
2690
|
+
FlattenBy: lambda self, e: self.flattenby_sql(e),
|
|
2691
|
+
AssumeOrderBy: lambda self, e: self.assumeorderby_sql(e),
|
|
2692
|
+
YdbTuple: lambda self, e: self.ydbtuple_sql(e),
|
|
2376
2693
|
exp.Create: create_sql,
|
|
2377
2694
|
exp.DefaultColumnConstraint: lambda self, e: "",
|
|
2378
2695
|
exp.DateTrunc: _date_trunc_sql,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ydb-sqlglot-plugin
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: YDB dialect plugin for sqlglot
|
|
5
5
|
Author: YDB Team
|
|
6
6
|
License: Apache-2.0
|
|
@@ -22,11 +22,12 @@ Provides-Extra: dev
|
|
|
22
22
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
23
23
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
24
24
|
Requires-Dist: ydb<4,>=3.28.0; extra == "dev"
|
|
25
|
+
Requires-Dist: ruff>=0.9.0; extra == "dev"
|
|
25
26
|
Dynamic: license-file
|
|
26
27
|
|
|
27
28
|
# ydb-sqlglot-plugin
|
|
28
29
|
|
|
29
|
-
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) —
|
|
30
|
+
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) — bidirectional transpilation between YDB/YQL and any SQL dialect.
|
|
30
31
|
|
|
31
32
|
## Installation
|
|
32
33
|
|
|
@@ -41,19 +42,20 @@ After installing the package, the `ydb` dialect is available in sqlglot automati
|
|
|
41
42
|
```python
|
|
42
43
|
import sqlglot
|
|
43
44
|
|
|
44
|
-
#
|
|
45
|
+
# Any dialect → YDB
|
|
45
46
|
result = sqlglot.transpile("SELECT * FROM users WHERE id = 1", read="mysql", write="ydb")[0]
|
|
46
47
|
# → SELECT * FROM `users` WHERE id = 1
|
|
47
48
|
|
|
48
|
-
#
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
yql = parsed.sql(dialect="ydb")
|
|
49
|
+
# YDB → any dialect
|
|
50
|
+
result = sqlglot.transpile("$t = (SELECT id FROM users); SELECT * FROM $t AS t", read="ydb", write="postgres")[0]
|
|
51
|
+
# → WITH t AS (SELECT id FROM users) SELECT * FROM t AS t
|
|
52
52
|
```
|
|
53
53
|
|
|
54
54
|
## What the plugin does
|
|
55
55
|
|
|
56
|
-
###
|
|
56
|
+
### Any SQL → YDB
|
|
57
|
+
|
|
58
|
+
#### Table names
|
|
57
59
|
|
|
58
60
|
Database-qualified names are rewritten to the YDB path format and wrapped in backticks:
|
|
59
61
|
|
|
@@ -65,7 +67,7 @@ SELECT * FROM analytics.events
|
|
|
65
67
|
SELECT * FROM `analytics/events`
|
|
66
68
|
```
|
|
67
69
|
|
|
68
|
-
|
|
70
|
+
#### CTEs → YDB variables
|
|
69
71
|
|
|
70
72
|
```sql
|
|
71
73
|
-- input
|
|
@@ -78,7 +80,7 @@ $active = (SELECT * FROM `users` WHERE status = 'active');
|
|
|
78
80
|
SELECT * FROM $active AS active
|
|
79
81
|
```
|
|
80
82
|
|
|
81
|
-
|
|
83
|
+
#### Subquery decorrelation
|
|
82
84
|
|
|
83
85
|
Correlated subqueries (which YQL does not support) are rewritten as JOINs:
|
|
84
86
|
|
|
@@ -102,6 +104,52 @@ The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subquerie
|
|
|
102
104
|
|
|
103
105
|
---
|
|
104
106
|
|
|
107
|
+
### YDB → any SQL
|
|
108
|
+
|
|
109
|
+
The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-YDB transformations, and transpilation to other dialects.
|
|
110
|
+
|
|
111
|
+
#### Supported YQL constructs
|
|
112
|
+
|
|
113
|
+
| Construct | Example |
|
|
114
|
+
|---|---|
|
|
115
|
+
| `$variable` references | `SELECT * FROM $t AS t` |
|
|
116
|
+
| `Module::Function()` | `DateTime::GetYear(ts)` |
|
|
117
|
+
| `DECLARE $p AS Type` | `DECLARE $p AS Int32` |
|
|
118
|
+
| `FLATTEN [LIST\|DICT] BY col` | `FROM t FLATTEN LIST BY col` |
|
|
119
|
+
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
120
|
+
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
121
|
+
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
122
|
+
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
123
|
+
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
124
|
+
|
|
125
|
+
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
126
|
+
|
|
127
|
+
#### CTEs reassembly
|
|
128
|
+
|
|
129
|
+
YDB-style named expressions are automatically reassembled into standard `WITH` CTEs when targeting other dialects:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
ydb_sql = "$t = (SELECT 1 AS x); SELECT * FROM $t AS t"
|
|
133
|
+
parse_one(ydb_sql, dialect="ydb").sql(dialect="postgres")
|
|
134
|
+
# → WITH t AS (SELECT 1 AS x) SELECT * FROM t AS t
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
### Column lineage
|
|
140
|
+
|
|
141
|
+
Because YDB SQL is fully parsed into sqlglot's AST, column-level lineage works out of the box:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from sqlglot.lineage import lineage
|
|
145
|
+
|
|
146
|
+
node = lineage("total", "$orders = (SELECT user_id, amount FROM orders); SELECT SUM(amount) AS total FROM $orders AS o", dialect="ydb")
|
|
147
|
+
for dep in node.walk():
|
|
148
|
+
print(dep.name, "→", dep.source)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
105
153
|
## Function reference
|
|
106
154
|
|
|
107
155
|
Functions below are recognized by sqlglot as standard SQL expressions and translated to their YQL equivalents. Dialect-specific functions that sqlglot does not parse into typed AST nodes are **passed through unchanged** — see [Limitations](#limitations).
|
|
@@ -179,25 +227,43 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
179
227
|
|
|
180
228
|
## Type mapping
|
|
181
229
|
|
|
182
|
-
|
|
230
|
+
### Standard SQL → YDB
|
|
231
|
+
|
|
232
|
+
| SQL type | YDB type |
|
|
183
233
|
|---|---|
|
|
184
|
-
| `TINYINT` | `
|
|
185
|
-
| `SMALLINT` | `
|
|
186
|
-
| `INT` / `INTEGER` | `
|
|
187
|
-
| `BIGINT` | `
|
|
234
|
+
| `TINYINT` | `Int8` |
|
|
235
|
+
| `SMALLINT` | `Int16` |
|
|
236
|
+
| `INT` / `INTEGER` | `Int32` |
|
|
237
|
+
| `BIGINT` | `Int64` |
|
|
188
238
|
| `FLOAT` | `Float` |
|
|
189
239
|
| `DOUBLE` / `DOUBLE PRECISION` | `Double` |
|
|
190
240
|
| `DECIMAL(p, s)` | `Decimal(p, s)` |
|
|
191
241
|
| `BOOLEAN` / `BIT` | `Uint8` |
|
|
192
242
|
| `TIMESTAMP` | `Timestamp` |
|
|
193
|
-
| `VARCHAR` / `NVARCHAR` / `CHAR` | `Utf8` |
|
|
194
|
-
| `
|
|
195
|
-
|
|
243
|
+
| `VARCHAR` / `NVARCHAR` / `CHAR` / `TEXT` | `Utf8` |
|
|
244
|
+
| `BLOB` / `BINARY` / `VARBINARY` | `String` |
|
|
245
|
+
|
|
246
|
+
### YDB types → standard SQL
|
|
247
|
+
|
|
248
|
+
| YDB type | Standard SQL | Postgres | ClickHouse |
|
|
249
|
+
|---|---|---|---|
|
|
250
|
+
| `Utf8` | `TEXT` | `TEXT` | `String` |
|
|
251
|
+
| `String` | `BLOB` | `BYTEA` | `String` |
|
|
252
|
+
| `Int32` | `INT` | `INT` | `Int32` |
|
|
253
|
+
| `Int64` | `BIGINT` | `BIGINT` | `Int64` |
|
|
254
|
+
| `Optional<T>` | `T` (nullable) | `T` | `Nullable(T)` |
|
|
255
|
+
| `List<T>` | `LIST<T>` | `LIST<T>` | `Array(T)` |
|
|
256
|
+
| `Dict<K,V>` | `MAP<K,V>` | `MAP<K,V>` | `Map(K,V)` |
|
|
257
|
+
| `Tuple<T1,T2>` | `STRUCT<...>` | `STRUCT<...>` | `Tuple(T1,T2)` |
|
|
196
258
|
|
|
197
259
|
---
|
|
198
260
|
|
|
199
261
|
## Limitations
|
|
200
262
|
|
|
263
|
+
### Dialect-specific functions
|
|
264
|
+
|
|
265
|
+
Functions that sqlglot does not parse into typed AST nodes are passed through unchanged and must be replaced manually. Common examples from ClickHouse: `now()`, `today()`, `parseDateTimeBestEffort()`, `toDate()`, `toFloat64()`, `toString()`, `countDistinct()`, `groupArray()`.
|
|
266
|
+
|
|
201
267
|
### Correlated subqueries in DML
|
|
202
268
|
|
|
203
269
|
Correlated subqueries inside `UPDATE` or `INSERT` statements cannot be automatically decorrelated — YDB does not support them natively, and rewriting requires knowledge of the table's primary key. Rewrite manually using a `$variable`:
|
|
@@ -217,6 +283,10 @@ Correlated subqueries inside `SELECT` are handled automatically via JOIN rewriti
|
|
|
217
283
|
|
|
218
284
|
`dateDiff('month', a, b)` has no exact equivalent in YDB because months have variable length. Use `DateTime::ShiftMonths` for date arithmetic instead.
|
|
219
285
|
|
|
286
|
+
### YDB container types in other dialects
|
|
287
|
+
|
|
288
|
+
`Uint8`/`Uint16`/`Uint32`/`Uint64` and YDB-specific container types (`Struct<...>`, `Variant<...>`, `Enum<...>`) do not have direct equivalents in standard SQL and are passed through as-is when targeting other dialects.
|
|
289
|
+
|
|
220
290
|
---
|
|
221
291
|
|
|
222
292
|
## Development
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
VERSION = "0.1.1"
|
|
File without changes
|
|
File without changes
|
{ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.0}/ydb_sqlglot_plugin.egg-info/top_level.txt
RENAMED
|
File without changes
|