ydb-sqlglot-plugin 0.1.1__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/PKG-INFO +88 -18
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/README.md +86 -17
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/pyproject.toml +10 -1
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/ydb_sqlglot/__init__.py +1 -2
- ydb_sqlglot_plugin-0.2.1/ydb_sqlglot/version.py +1 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/ydb_sqlglot/ydb.py +355 -71
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/ydb_sqlglot_plugin.egg-info/PKG-INFO +88 -18
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/ydb_sqlglot_plugin.egg-info/requires.txt +1 -0
- ydb_sqlglot_plugin-0.1.1/ydb_sqlglot/version.py +0 -1
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/LICENSE +0 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/setup.cfg +0 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/ydb_sqlglot_plugin.egg-info/SOURCES.txt +0 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/ydb_sqlglot_plugin.egg-info/dependency_links.txt +0 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/ydb_sqlglot_plugin.egg-info/entry_points.txt +0 -0
- {ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/ydb_sqlglot_plugin.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ydb-sqlglot-plugin
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: YDB dialect plugin for sqlglot
|
|
5
5
|
Author: YDB Team
|
|
6
6
|
License: Apache-2.0
|
|
@@ -22,11 +22,12 @@ Provides-Extra: dev
|
|
|
22
22
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
23
23
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
24
24
|
Requires-Dist: ydb<4,>=3.28.0; extra == "dev"
|
|
25
|
+
Requires-Dist: ruff>=0.9.0; extra == "dev"
|
|
25
26
|
Dynamic: license-file
|
|
26
27
|
|
|
27
28
|
# ydb-sqlglot-plugin
|
|
28
29
|
|
|
29
|
-
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) —
|
|
30
|
+
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) — bidirectional transpilation between YDB/YQL and any SQL dialect.
|
|
30
31
|
|
|
31
32
|
## Installation
|
|
32
33
|
|
|
@@ -41,19 +42,20 @@ After installing the package, the `ydb` dialect is available in sqlglot automati
|
|
|
41
42
|
```python
|
|
42
43
|
import sqlglot
|
|
43
44
|
|
|
44
|
-
#
|
|
45
|
+
# Any dialect → YDB
|
|
45
46
|
result = sqlglot.transpile("SELECT * FROM users WHERE id = 1", read="mysql", write="ydb")[0]
|
|
46
47
|
# → SELECT * FROM `users` WHERE id = 1
|
|
47
48
|
|
|
48
|
-
#
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
yql = parsed.sql(dialect="ydb")
|
|
49
|
+
# YDB → any dialect
|
|
50
|
+
result = sqlglot.transpile("$t = (SELECT id FROM users); SELECT * FROM $t AS t", read="ydb", write="postgres")[0]
|
|
51
|
+
# → WITH t AS (SELECT id FROM users) SELECT * FROM t AS t
|
|
52
52
|
```
|
|
53
53
|
|
|
54
54
|
## What the plugin does
|
|
55
55
|
|
|
56
|
-
###
|
|
56
|
+
### Any SQL → YDB
|
|
57
|
+
|
|
58
|
+
#### Table names
|
|
57
59
|
|
|
58
60
|
Database-qualified names are rewritten to the YDB path format and wrapped in backticks:
|
|
59
61
|
|
|
@@ -65,7 +67,7 @@ SELECT * FROM analytics.events
|
|
|
65
67
|
SELECT * FROM `analytics/events`
|
|
66
68
|
```
|
|
67
69
|
|
|
68
|
-
|
|
70
|
+
#### CTEs → YDB variables
|
|
69
71
|
|
|
70
72
|
```sql
|
|
71
73
|
-- input
|
|
@@ -78,7 +80,7 @@ $active = (SELECT * FROM `users` WHERE status = 'active');
|
|
|
78
80
|
SELECT * FROM $active AS active
|
|
79
81
|
```
|
|
80
82
|
|
|
81
|
-
|
|
83
|
+
#### Subquery decorrelation
|
|
82
84
|
|
|
83
85
|
Correlated subqueries (which YQL does not support) are rewritten as JOINs:
|
|
84
86
|
|
|
@@ -102,6 +104,52 @@ The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subquerie
|
|
|
102
104
|
|
|
103
105
|
---
|
|
104
106
|
|
|
107
|
+
### YDB → any SQL
|
|
108
|
+
|
|
109
|
+
The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-YDB transformations, and transpilation to other dialects.
|
|
110
|
+
|
|
111
|
+
#### Supported YQL constructs
|
|
112
|
+
|
|
113
|
+
| Construct | Example |
|
|
114
|
+
|---|---|
|
|
115
|
+
| `$variable` references | `SELECT * FROM $t AS t` |
|
|
116
|
+
| `Module::Function()` | `DateTime::GetYear(ts)` |
|
|
117
|
+
| `DECLARE $p AS Type` | `DECLARE $p AS Int32` |
|
|
118
|
+
| `FLATTEN [LIST\|DICT] BY col` | `FROM t FLATTEN LIST BY col` |
|
|
119
|
+
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
120
|
+
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
121
|
+
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
122
|
+
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
123
|
+
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
124
|
+
|
|
125
|
+
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
126
|
+
|
|
127
|
+
#### CTEs reassembly
|
|
128
|
+
|
|
129
|
+
YDB-style named expressions are automatically reassembled into standard `WITH` CTEs when targeting other dialects:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
ydb_sql = "$t = (SELECT 1 AS x); SELECT * FROM $t AS t"
|
|
133
|
+
parse_one(ydb_sql, dialect="ydb").sql(dialect="postgres")
|
|
134
|
+
# → WITH t AS (SELECT 1 AS x) SELECT * FROM t AS t
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
### Column lineage
|
|
140
|
+
|
|
141
|
+
Because YDB SQL is fully parsed into sqlglot's AST, column-level lineage works out of the box:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from sqlglot.lineage import lineage
|
|
145
|
+
|
|
146
|
+
node = lineage("total", "$orders = (SELECT user_id, amount FROM orders); SELECT SUM(amount) AS total FROM $orders AS o", dialect="ydb")
|
|
147
|
+
for dep in node.walk():
|
|
148
|
+
print(dep.name, "→", dep.source)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
105
153
|
## Function reference
|
|
106
154
|
|
|
107
155
|
Functions below are recognized by sqlglot as standard SQL expressions and translated to their YQL equivalents. Dialect-specific functions that sqlglot does not parse into typed AST nodes are **passed through unchanged** — see [Limitations](#limitations).
|
|
@@ -179,25 +227,43 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
179
227
|
|
|
180
228
|
## Type mapping
|
|
181
229
|
|
|
182
|
-
|
|
230
|
+
### Standard SQL → YDB
|
|
231
|
+
|
|
232
|
+
| SQL type | YDB type |
|
|
183
233
|
|---|---|
|
|
184
|
-
| `TINYINT` | `
|
|
185
|
-
| `SMALLINT` | `
|
|
186
|
-
| `INT` / `INTEGER` | `
|
|
187
|
-
| `BIGINT` | `
|
|
234
|
+
| `TINYINT` | `Int8` |
|
|
235
|
+
| `SMALLINT` | `Int16` |
|
|
236
|
+
| `INT` / `INTEGER` | `Int32` |
|
|
237
|
+
| `BIGINT` | `Int64` |
|
|
188
238
|
| `FLOAT` | `Float` |
|
|
189
239
|
| `DOUBLE` / `DOUBLE PRECISION` | `Double` |
|
|
190
240
|
| `DECIMAL(p, s)` | `Decimal(p, s)` |
|
|
191
241
|
| `BOOLEAN` / `BIT` | `Uint8` |
|
|
192
242
|
| `TIMESTAMP` | `Timestamp` |
|
|
193
|
-
| `VARCHAR` / `NVARCHAR` / `CHAR` | `Utf8` |
|
|
194
|
-
| `
|
|
195
|
-
|
|
243
|
+
| `VARCHAR` / `NVARCHAR` / `CHAR` / `TEXT` | `Utf8` |
|
|
244
|
+
| `BLOB` / `BINARY` / `VARBINARY` | `String` |
|
|
245
|
+
|
|
246
|
+
### YDB types → standard SQL
|
|
247
|
+
|
|
248
|
+
| YDB type | Standard SQL | Postgres | ClickHouse |
|
|
249
|
+
|---|---|---|---|
|
|
250
|
+
| `Utf8` | `TEXT` | `TEXT` | `String` |
|
|
251
|
+
| `String` | `BLOB` | `BYTEA` | `String` |
|
|
252
|
+
| `Int32` | `INT` | `INT` | `Int32` |
|
|
253
|
+
| `Int64` | `BIGINT` | `BIGINT` | `Int64` |
|
|
254
|
+
| `Optional<T>` | `T` (nullable) | `T` | `Nullable(T)` |
|
|
255
|
+
| `List<T>` | `LIST<T>` | `LIST<T>` | `Array(T)` |
|
|
256
|
+
| `Dict<K,V>` | `MAP<K,V>` | `MAP<K,V>` | `Map(K,V)` |
|
|
257
|
+
| `Tuple<T1,T2>` | `STRUCT<...>` | `STRUCT<...>` | `Tuple(T1,T2)` |
|
|
196
258
|
|
|
197
259
|
---
|
|
198
260
|
|
|
199
261
|
## Limitations
|
|
200
262
|
|
|
263
|
+
### Dialect-specific functions
|
|
264
|
+
|
|
265
|
+
Functions that sqlglot does not parse into typed AST nodes are passed through unchanged and must be replaced manually. Common examples from ClickHouse: `now()`, `today()`, `parseDateTimeBestEffort()`, `toDate()`, `toFloat64()`, `toString()`, `countDistinct()`, `groupArray()`.
|
|
266
|
+
|
|
201
267
|
### Correlated subqueries in DML
|
|
202
268
|
|
|
203
269
|
Correlated subqueries inside `UPDATE` or `INSERT` statements cannot be automatically decorrelated — YDB does not support them natively, and rewriting requires knowledge of the table's primary key. Rewrite manually using a `$variable`:
|
|
@@ -217,6 +283,10 @@ Correlated subqueries inside `SELECT` are handled automatically via JOIN rewriti
|
|
|
217
283
|
|
|
218
284
|
`dateDiff('month', a, b)` has no exact equivalent in YDB because months have variable length. Use `DateTime::ShiftMonths` for date arithmetic instead.
|
|
219
285
|
|
|
286
|
+
### YDB container types in other dialects
|
|
287
|
+
|
|
288
|
+
`Uint8`/`Uint16`/`Uint32`/`Uint64` and YDB-specific container types (`Struct<...>`, `Variant<...>`, `Enum<...>`) do not have direct equivalents in standard SQL and are passed through as-is when targeting other dialects.
|
|
289
|
+
|
|
220
290
|
---
|
|
221
291
|
|
|
222
292
|
## Development
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ydb-sqlglot-plugin
|
|
2
2
|
|
|
3
|
-
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) —
|
|
3
|
+
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) — bidirectional transpilation between YDB/YQL and any SQL dialect.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -15,19 +15,20 @@ After installing the package, the `ydb` dialect is available in sqlglot automati
|
|
|
15
15
|
```python
|
|
16
16
|
import sqlglot
|
|
17
17
|
|
|
18
|
-
#
|
|
18
|
+
# Any dialect → YDB
|
|
19
19
|
result = sqlglot.transpile("SELECT * FROM users WHERE id = 1", read="mysql", write="ydb")[0]
|
|
20
20
|
# → SELECT * FROM `users` WHERE id = 1
|
|
21
21
|
|
|
22
|
-
#
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
yql = parsed.sql(dialect="ydb")
|
|
22
|
+
# YDB → any dialect
|
|
23
|
+
result = sqlglot.transpile("$t = (SELECT id FROM users); SELECT * FROM $t AS t", read="ydb", write="postgres")[0]
|
|
24
|
+
# → WITH t AS (SELECT id FROM users) SELECT * FROM t AS t
|
|
26
25
|
```
|
|
27
26
|
|
|
28
27
|
## What the plugin does
|
|
29
28
|
|
|
30
|
-
###
|
|
29
|
+
### Any SQL → YDB
|
|
30
|
+
|
|
31
|
+
#### Table names
|
|
31
32
|
|
|
32
33
|
Database-qualified names are rewritten to the YDB path format and wrapped in backticks:
|
|
33
34
|
|
|
@@ -39,7 +40,7 @@ SELECT * FROM analytics.events
|
|
|
39
40
|
SELECT * FROM `analytics/events`
|
|
40
41
|
```
|
|
41
42
|
|
|
42
|
-
|
|
43
|
+
#### CTEs → YDB variables
|
|
43
44
|
|
|
44
45
|
```sql
|
|
45
46
|
-- input
|
|
@@ -52,7 +53,7 @@ $active = (SELECT * FROM `users` WHERE status = 'active');
|
|
|
52
53
|
SELECT * FROM $active AS active
|
|
53
54
|
```
|
|
54
55
|
|
|
55
|
-
|
|
56
|
+
#### Subquery decorrelation
|
|
56
57
|
|
|
57
58
|
Correlated subqueries (which YQL does not support) are rewritten as JOINs:
|
|
58
59
|
|
|
@@ -76,6 +77,52 @@ The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subquerie
|
|
|
76
77
|
|
|
77
78
|
---
|
|
78
79
|
|
|
80
|
+
### YDB → any SQL
|
|
81
|
+
|
|
82
|
+
The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-YDB transformations, and transpilation to other dialects.
|
|
83
|
+
|
|
84
|
+
#### Supported YQL constructs
|
|
85
|
+
|
|
86
|
+
| Construct | Example |
|
|
87
|
+
|---|---|
|
|
88
|
+
| `$variable` references | `SELECT * FROM $t AS t` |
|
|
89
|
+
| `Module::Function()` | `DateTime::GetYear(ts)` |
|
|
90
|
+
| `DECLARE $p AS Type` | `DECLARE $p AS Int32` |
|
|
91
|
+
| `FLATTEN [LIST\|DICT] BY col` | `FROM t FLATTEN LIST BY col` |
|
|
92
|
+
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
93
|
+
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
94
|
+
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
95
|
+
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
96
|
+
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
97
|
+
|
|
98
|
+
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
99
|
+
|
|
100
|
+
#### CTEs reassembly
|
|
101
|
+
|
|
102
|
+
YDB-style named expressions are automatically reassembled into standard `WITH` CTEs when targeting other dialects:
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
ydb_sql = "$t = (SELECT 1 AS x); SELECT * FROM $t AS t"
|
|
106
|
+
parse_one(ydb_sql, dialect="ydb").sql(dialect="postgres")
|
|
107
|
+
# → WITH t AS (SELECT 1 AS x) SELECT * FROM t AS t
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
### Column lineage
|
|
113
|
+
|
|
114
|
+
Because YDB SQL is fully parsed into sqlglot's AST, column-level lineage works out of the box:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from sqlglot.lineage import lineage
|
|
118
|
+
|
|
119
|
+
node = lineage("total", "$orders = (SELECT user_id, amount FROM orders); SELECT SUM(amount) AS total FROM $orders AS o", dialect="ydb")
|
|
120
|
+
for dep in node.walk():
|
|
121
|
+
print(dep.name, "→", dep.source)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
79
126
|
## Function reference
|
|
80
127
|
|
|
81
128
|
Functions below are recognized by sqlglot as standard SQL expressions and translated to their YQL equivalents. Dialect-specific functions that sqlglot does not parse into typed AST nodes are **passed through unchanged** — see [Limitations](#limitations).
|
|
@@ -153,25 +200,43 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
153
200
|
|
|
154
201
|
## Type mapping
|
|
155
202
|
|
|
156
|
-
|
|
203
|
+
### Standard SQL → YDB
|
|
204
|
+
|
|
205
|
+
| SQL type | YDB type |
|
|
157
206
|
|---|---|
|
|
158
|
-
| `TINYINT` | `
|
|
159
|
-
| `SMALLINT` | `
|
|
160
|
-
| `INT` / `INTEGER` | `
|
|
161
|
-
| `BIGINT` | `
|
|
207
|
+
| `TINYINT` | `Int8` |
|
|
208
|
+
| `SMALLINT` | `Int16` |
|
|
209
|
+
| `INT` / `INTEGER` | `Int32` |
|
|
210
|
+
| `BIGINT` | `Int64` |
|
|
162
211
|
| `FLOAT` | `Float` |
|
|
163
212
|
| `DOUBLE` / `DOUBLE PRECISION` | `Double` |
|
|
164
213
|
| `DECIMAL(p, s)` | `Decimal(p, s)` |
|
|
165
214
|
| `BOOLEAN` / `BIT` | `Uint8` |
|
|
166
215
|
| `TIMESTAMP` | `Timestamp` |
|
|
167
|
-
| `VARCHAR` / `NVARCHAR` / `CHAR` | `Utf8` |
|
|
168
|
-
| `
|
|
169
|
-
|
|
216
|
+
| `VARCHAR` / `NVARCHAR` / `CHAR` / `TEXT` | `Utf8` |
|
|
217
|
+
| `BLOB` / `BINARY` / `VARBINARY` | `String` |
|
|
218
|
+
|
|
219
|
+
### YDB types → standard SQL
|
|
220
|
+
|
|
221
|
+
| YDB type | Standard SQL | Postgres | ClickHouse |
|
|
222
|
+
|---|---|---|---|
|
|
223
|
+
| `Utf8` | `TEXT` | `TEXT` | `String` |
|
|
224
|
+
| `String` | `BLOB` | `BYTEA` | `String` |
|
|
225
|
+
| `Int32` | `INT` | `INT` | `Int32` |
|
|
226
|
+
| `Int64` | `BIGINT` | `BIGINT` | `Int64` |
|
|
227
|
+
| `Optional<T>` | `T` (nullable) | `T` | `Nullable(T)` |
|
|
228
|
+
| `List<T>` | `LIST<T>` | `LIST<T>` | `Array(T)` |
|
|
229
|
+
| `Dict<K,V>` | `MAP<K,V>` | `MAP<K,V>` | `Map(K,V)` |
|
|
230
|
+
| `Tuple<T1,T2>` | `STRUCT<...>` | `STRUCT<...>` | `Tuple(T1,T2)` |
|
|
170
231
|
|
|
171
232
|
---
|
|
172
233
|
|
|
173
234
|
## Limitations
|
|
174
235
|
|
|
236
|
+
### Dialect-specific functions
|
|
237
|
+
|
|
238
|
+
Functions that sqlglot does not parse into typed AST nodes are passed through unchanged and must be replaced manually. Common examples from ClickHouse: `now()`, `today()`, `parseDateTimeBestEffort()`, `toDate()`, `toFloat64()`, `toString()`, `countDistinct()`, `groupArray()`.
|
|
239
|
+
|
|
175
240
|
### Correlated subqueries in DML
|
|
176
241
|
|
|
177
242
|
Correlated subqueries inside `UPDATE` or `INSERT` statements cannot be automatically decorrelated — YDB does not support them natively, and rewriting requires knowledge of the table's primary key. Rewrite manually using a `$variable`:
|
|
@@ -191,6 +256,10 @@ Correlated subqueries inside `SELECT` are handled automatically via JOIN rewriti
|
|
|
191
256
|
|
|
192
257
|
`dateDiff('month', a, b)` has no exact equivalent in YDB because months have variable length. Use `DateTime::ShiftMonths` for date arithmetic instead.
|
|
193
258
|
|
|
259
|
+
### YDB container types in other dialects
|
|
260
|
+
|
|
261
|
+
`Uint8`/`Uint16`/`Uint32`/`Uint64` and YDB-specific container types (`Struct<...>`, `Variant<...>`, `Enum<...>`) do not have direct equivalents in standard SQL and are passed through as-is when targeting other dialects.
|
|
262
|
+
|
|
194
263
|
---
|
|
195
264
|
|
|
196
265
|
## Development
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ydb-sqlglot-plugin"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.1" # AUTOVERSION
|
|
8
8
|
description = "YDB dialect plugin for sqlglot"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "Apache-2.0"}
|
|
@@ -31,6 +31,7 @@ dev = [
|
|
|
31
31
|
"pytest>=7.0",
|
|
32
32
|
"pytest-cov>=4.0",
|
|
33
33
|
"ydb>=3.28.0,<4",
|
|
34
|
+
"ruff>=0.9.0",
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
[project.urls]
|
|
@@ -48,3 +49,11 @@ include = ["ydb_sqlglot*"]
|
|
|
48
49
|
testpaths = ["tests"]
|
|
49
50
|
python_files = ["test_*.py"]
|
|
50
51
|
python_functions = ["test_*"]
|
|
52
|
+
|
|
53
|
+
[tool.ruff]
|
|
54
|
+
target-version = "py39"
|
|
55
|
+
line-length = 120
|
|
56
|
+
|
|
57
|
+
[tool.ruff.lint]
|
|
58
|
+
select = ["E", "F", "I", "W"]
|
|
59
|
+
ignore = ["E501"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = "0.2.1"
|
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import typing as t
|
|
3
3
|
|
|
4
|
-
from sqlglot import
|
|
4
|
+
from sqlglot import Generator, TokenType, exp, generator, parser, tokens, transforms
|
|
5
|
+
from sqlglot.dialects.dialect import Dialect, NormalizationStrategy, concat_to_dpipe_sql, unit_to_var
|
|
5
6
|
from sqlglot.errors import UnsupportedError
|
|
6
7
|
from sqlglot.expressions import Expression
|
|
7
|
-
from sqlglot.
|
|
8
|
-
from sqlglot.
|
|
9
|
-
from sqlglot.helper import name_sequence, seq_get, flatten
|
|
8
|
+
from sqlglot.helper import flatten, name_sequence, seq_get
|
|
9
|
+
from sqlglot.optimizer.scope import ScopeType, find_in_scope, traverse_scope
|
|
10
10
|
from sqlglot.optimizer.simplify import simplify
|
|
11
|
-
from sqlglot.transforms import move_ctes_to_top_level
|
|
12
|
-
from sqlglot.optimizer.scope import find_in_scope, ScopeType, traverse_scope
|
|
13
|
-
from sqlglot.transforms import eliminate_join_marks
|
|
11
|
+
from sqlglot.transforms import eliminate_join_marks, move_ctes_to_top_level
|
|
14
12
|
|
|
15
13
|
JOIN_ATTRS = ("on", "side", "kind", "using", "method")
|
|
16
14
|
|
|
@@ -29,21 +27,6 @@ def table_names_to_lower_case(expression: exp.Expression) -> exp.Expression:
|
|
|
29
27
|
return expression
|
|
30
28
|
|
|
31
29
|
|
|
32
|
-
def make_db_name_lower(expression: exp.Expression) -> exp.Expression:
|
|
33
|
-
"""
|
|
34
|
-
Converts all database names to uppercase
|
|
35
|
-
Args:
|
|
36
|
-
expression: The SQL expression to modify
|
|
37
|
-
Returns:
|
|
38
|
-
Modified expression with uppercase database names
|
|
39
|
-
"""
|
|
40
|
-
for table in expression.find_all(exp.Table):
|
|
41
|
-
if table.db:
|
|
42
|
-
table.set("db", table.db.lower())
|
|
43
|
-
|
|
44
|
-
return expression
|
|
45
|
-
|
|
46
|
-
|
|
47
30
|
def make_db_name_lower(expression: exp.Expression) -> exp.Expression:
|
|
48
31
|
"""
|
|
49
32
|
Converts all database names to uppercase
|
|
@@ -420,24 +403,108 @@ def _apply_subquery_alias_columns(expression: exp.Expression) -> None:
|
|
|
420
403
|
alias.set("columns", [])
|
|
421
404
|
|
|
422
405
|
|
|
423
|
-
|
|
424
|
-
"""
|
|
406
|
+
class FlattenBy(exp.Expression):
|
|
407
|
+
"""YDB-specific FLATTEN [LIST|DICT] BY clause on a table reference."""
|
|
408
|
+
arg_types = {"this": True, "expressions": True, "kind": False}
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
class AssumeOrderBy(exp.Expression):
|
|
412
|
+
"""YDB-specific ASSUME ORDER BY hint (data is pre-sorted, skip sort)."""
|
|
413
|
+
arg_types = {"this": True}
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
class YdbTuple(exp.Expression):
|
|
417
|
+
"""YDB Tuple<T1, T2, ...> type — positional unnamed fields."""
|
|
418
|
+
arg_types = {"expressions": True, "nullable": False}
|
|
425
419
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
420
|
+
|
|
421
|
+
# Container types that use Generic<T, ...> syntax in YDB
|
|
422
|
+
_YDB_GENERIC_TYPES = {
|
|
423
|
+
"List": exp.DataType.Type.LIST,
|
|
424
|
+
"Dict": exp.DataType.Type.MAP,
|
|
425
|
+
"Set": exp.DataType.Type.SET,
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def _reassemble_ctes(
|
|
430
|
+
statements: t.List[t.Optional[exp.Expression]],
|
|
431
|
+
) -> t.List[t.Optional[exp.Expression]]:
|
|
432
|
+
"""Convert sequences of YDB named-expression statements into standard WITH CTEs.
|
|
433
|
+
|
|
434
|
+
YDB generator emits: $t = (SELECT ...); SELECT * FROM $t AS t
|
|
435
|
+
This function rebuilds: WITH t AS (SELECT ...) SELECT * FROM t
|
|
436
|
+
|
|
437
|
+
so that transpiling YDB output to other dialects produces valid SQL.
|
|
431
438
|
"""
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
439
|
+
result: t.List[t.Optional[exp.Expression]] = []
|
|
440
|
+
# Keep both the original Alias nodes and the converted CTE nodes
|
|
441
|
+
pending_aliases: t.List[exp.Alias] = []
|
|
442
|
+
pending_ctes: t.List[exp.CTE] = []
|
|
443
|
+
pending_names: t.Set[str] = set()
|
|
444
|
+
|
|
445
|
+
def _flush_as_aliases() -> None:
|
|
446
|
+
result.extend(pending_aliases)
|
|
447
|
+
pending_aliases.clear()
|
|
448
|
+
pending_ctes.clear()
|
|
449
|
+
pending_names.clear()
|
|
450
|
+
|
|
451
|
+
for stmt in statements:
|
|
452
|
+
if (
|
|
453
|
+
isinstance(stmt, exp.Alias)
|
|
454
|
+
and isinstance(stmt.args.get("alias"), exp.Identifier)
|
|
455
|
+
and stmt.alias.startswith("$")
|
|
456
|
+
):
|
|
457
|
+
name = stmt.alias[1:]
|
|
458
|
+
inner = stmt.this
|
|
459
|
+
# Unwrap Subquery — CTE.this must be Select, not Subquery
|
|
460
|
+
if isinstance(inner, exp.Subquery):
|
|
461
|
+
inner = inner.this
|
|
462
|
+
# Replace any $prev_cte refs inside this CTE body
|
|
463
|
+
inner = _replace_param_table_refs(inner, pending_names)
|
|
464
|
+
pending_aliases.append(stmt)
|
|
465
|
+
pending_ctes.append(
|
|
466
|
+
exp.CTE(
|
|
467
|
+
this=inner,
|
|
468
|
+
alias=exp.TableAlias(this=exp.to_identifier(name)),
|
|
469
|
+
)
|
|
470
|
+
)
|
|
471
|
+
pending_names.add(name)
|
|
472
|
+
elif pending_ctes and isinstance(stmt, exp.Select):
|
|
473
|
+
stmt = _replace_param_table_refs(stmt, pending_names)
|
|
474
|
+
stmt.set("with_", exp.With(expressions=list(pending_ctes)))
|
|
475
|
+
result.append(stmt)
|
|
476
|
+
pending_aliases.clear()
|
|
477
|
+
pending_ctes.clear()
|
|
478
|
+
pending_names.clear()
|
|
479
|
+
else:
|
|
480
|
+
# No following SELECT — keep original Alias form
|
|
481
|
+
_flush_as_aliases()
|
|
482
|
+
result.append(stmt)
|
|
483
|
+
|
|
484
|
+
# Trailing named exprs without a SELECT — keep as-is
|
|
485
|
+
_flush_as_aliases()
|
|
486
|
+
return result
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def _replace_param_table_refs(
|
|
490
|
+
tree: exp.Expression, names: t.Set[str]
|
|
491
|
+
) -> exp.Expression:
|
|
492
|
+
"""Replace Table(Parameter(Var("t"))) with Table(Identifier("t")) for CTE names."""
|
|
493
|
+
|
|
494
|
+
def _transform(node: exp.Expression) -> exp.Expression:
|
|
495
|
+
if (
|
|
496
|
+
isinstance(node, exp.Table)
|
|
497
|
+
and isinstance(node.this, exp.Parameter)
|
|
498
|
+
and isinstance(node.this.this, exp.Var)
|
|
499
|
+
and node.this.this.name in names
|
|
500
|
+
):
|
|
501
|
+
return exp.Table(
|
|
502
|
+
this=exp.to_identifier(node.this.this.name),
|
|
503
|
+
alias=node.args.get("alias"),
|
|
504
|
+
)
|
|
505
|
+
return node
|
|
506
|
+
|
|
507
|
+
return tree.transform(_transform)
|
|
441
508
|
|
|
442
509
|
|
|
443
510
|
class YDB(Dialect):
|
|
@@ -470,9 +537,18 @@ class YDB(Dialect):
|
|
|
470
537
|
Defines how the SQL text is broken into tokens.
|
|
471
538
|
"""
|
|
472
539
|
|
|
540
|
+
KEYWORDS = {
|
|
541
|
+
**tokens.Tokenizer.KEYWORDS,
|
|
542
|
+
"DECLARE": TokenType.DECLARE,
|
|
543
|
+
"UTF8": TokenType.TEXT, # YDB Utf8 = unicode text = SQL TEXT
|
|
544
|
+
"STRING": TokenType.BLOB, # YDB String = bytes = SQL BLOB
|
|
545
|
+
}
|
|
546
|
+
|
|
473
547
|
SINGLE_TOKENS = {
|
|
474
548
|
**tokens.Tokenizer.SINGLE_TOKENS,
|
|
549
|
+
"$": TokenType.PARAMETER,
|
|
475
550
|
}
|
|
551
|
+
VAR_SINGLE_TOKENS = {"$"}
|
|
476
552
|
|
|
477
553
|
SUPPORTS_VALUES_DEFAULT = False
|
|
478
554
|
QUOTES = ["'", '"']
|
|
@@ -480,6 +556,154 @@ class YDB(Dialect):
|
|
|
480
556
|
IDENTIFIERS = ["`"]
|
|
481
557
|
|
|
482
558
|
class Parser(parser.Parser):
|
|
559
|
+
COLUMN_OPERATORS = {
|
|
560
|
+
**parser.Parser.COLUMN_OPERATORS,
|
|
561
|
+
# In YDB :: is a module namespace separator (e.g. DateTime::GetYear),
|
|
562
|
+
# not a Postgres-style cast. Reparse the right side as a function call.
|
|
563
|
+
TokenType.DCOLON: lambda self, this, field: (
|
|
564
|
+
self.expression(
|
|
565
|
+
exp.Anonymous(
|
|
566
|
+
this=f"{this.name}::{field.name}",
|
|
567
|
+
expressions=field.expressions,
|
|
568
|
+
)
|
|
569
|
+
)
|
|
570
|
+
if isinstance(field, exp.Func)
|
|
571
|
+
else self.expression(exp.ScopeResolution(this=this, expression=field))
|
|
572
|
+
),
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
STATEMENT_PARSERS = {
|
|
576
|
+
**parser.Parser.STATEMENT_PARSERS,
|
|
577
|
+
TokenType.DECLARE: lambda self: self._parse_ydb_declare(),
|
|
578
|
+
TokenType.PARAMETER: lambda self: self._parse_ydb_named_expr(),
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
def parse(self, raw_tokens, sql=None):
|
|
582
|
+
statements = super().parse(raw_tokens, sql)
|
|
583
|
+
return _reassemble_ctes(statements)
|
|
584
|
+
|
|
585
|
+
def _parse_dcolon(self) -> t.Optional[exp.Expression]:
|
|
586
|
+
return self._parse_function(anonymous=True) or self._parse_var(any_token=True)
|
|
587
|
+
|
|
588
|
+
def _parse_ydb_named_expr(self) -> t.Optional[exp.Expression]:
|
|
589
|
+
# _match_set already consumed '$', so _index points to the var name.
|
|
590
|
+
# Retreat one extra step to include '$' when falling back to expression parsing.
|
|
591
|
+
index = self._index - 1
|
|
592
|
+
name_var = self._parse_var(any_token=True)
|
|
593
|
+
if not self._match(TokenType.EQ):
|
|
594
|
+
# Not an assignment — retreat (including '$') and parse as expression.
|
|
595
|
+
self._retreat(index)
|
|
596
|
+
return self._parse_expression()
|
|
597
|
+
value = self._parse_select() or self._parse_expression()
|
|
598
|
+
return self.expression(
|
|
599
|
+
exp.Alias(
|
|
600
|
+
this=value,
|
|
601
|
+
alias=exp.Identifier(this=f"${name_var.name}"),
|
|
602
|
+
)
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
def _parse_ydb_declare(self) -> exp.Declare:
|
|
606
|
+
items = self._parse_csv(self._parse_ydb_declareitem)
|
|
607
|
+
return self.expression(exp.Declare(expressions=items))
|
|
608
|
+
|
|
609
|
+
def _parse_ydb_declareitem(self) -> t.Optional[exp.DeclareItem]:
|
|
610
|
+
if not self._match(TokenType.PARAMETER):
|
|
611
|
+
return None
|
|
612
|
+
name = self._parse_var(any_token=True)
|
|
613
|
+
if not name:
|
|
614
|
+
return None
|
|
615
|
+
self._match(TokenType.ALIAS)
|
|
616
|
+
kind = self._parse_types()
|
|
617
|
+
return self.expression(exp.DeclareItem(this=name, kind=kind))
|
|
618
|
+
|
|
619
|
+
def _parse_types(self, *args, **kwargs) -> t.Optional[exp.Expression]:
|
|
620
|
+
# YDB generic types use Name<...> syntax; token type varies by keyword status
|
|
621
|
+
if self._curr and self._next and self._next.token_type == TokenType.LT:
|
|
622
|
+
name = self._curr.text
|
|
623
|
+
|
|
624
|
+
if name == "Optional":
|
|
625
|
+
self._advance() # consume 'Optional'
|
|
626
|
+
self._advance() # consume '<'
|
|
627
|
+
inner = self._parse_types(*args, **kwargs)
|
|
628
|
+
self._match(TokenType.GT)
|
|
629
|
+
if inner:
|
|
630
|
+
inner.set("nullable", True)
|
|
631
|
+
return inner
|
|
632
|
+
|
|
633
|
+
if name in _YDB_GENERIC_TYPES:
|
|
634
|
+
self._advance() # consume type name
|
|
635
|
+
self._advance() # consume '<'
|
|
636
|
+
type_args = self._parse_csv(
|
|
637
|
+
lambda: self._parse_types(*args, **kwargs)
|
|
638
|
+
)
|
|
639
|
+
self._match(TokenType.GT)
|
|
640
|
+
return exp.DataType(
|
|
641
|
+
this=_YDB_GENERIC_TYPES[name],
|
|
642
|
+
expressions=[a for a in type_args if a],
|
|
643
|
+
nested=True,
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
if name == "Tuple":
|
|
647
|
+
self._advance() # consume 'Tuple'
|
|
648
|
+
self._advance() # consume '<'
|
|
649
|
+
type_args = self._parse_csv(
|
|
650
|
+
lambda: self._parse_types(*args, **kwargs)
|
|
651
|
+
)
|
|
652
|
+
self._match(TokenType.GT)
|
|
653
|
+
# Represent as STRUCT so other dialects can serialize it.
|
|
654
|
+
# kind="tuple" is a YDB-specific marker for the generator to emit Tuple<...>.
|
|
655
|
+
return exp.DataType(
|
|
656
|
+
this=exp.DataType.Type.STRUCT,
|
|
657
|
+
expressions=[
|
|
658
|
+
exp.ColumnDef(this=exp.to_identifier(f"_{i}"), kind=a)
|
|
659
|
+
for i, a in enumerate(type_args) if a
|
|
660
|
+
],
|
|
661
|
+
nested=True,
|
|
662
|
+
kind=exp.Var(this="tuple"),
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
dtype = super()._parse_types(*args, **kwargs)
|
|
666
|
+
if dtype and self._match(TokenType.PLACEHOLDER): # T?
|
|
667
|
+
dtype.set("nullable", True)
|
|
668
|
+
return dtype
|
|
669
|
+
|
|
670
|
+
def _parse_table_alias(self, alias_tokens=None):
|
|
671
|
+
# Prevent YDB-specific keywords from being consumed as table aliases
|
|
672
|
+
if self._curr and self._curr.text.upper() in ("FLATTEN", "ASSUME"):
|
|
673
|
+
# Also check that what follows is a YDB construct, not a regular alias
|
|
674
|
+
if self._next and (
|
|
675
|
+
self._next.text.upper() in ("BY", "LIST", "DICT")
|
|
676
|
+
or self._next.token_type == TokenType.ORDER_BY
|
|
677
|
+
):
|
|
678
|
+
return None
|
|
679
|
+
return super()._parse_table_alias(alias_tokens=alias_tokens)
|
|
680
|
+
|
|
681
|
+
def _parse_query_modifiers(self, this):
|
|
682
|
+
if (
|
|
683
|
+
self._curr
|
|
684
|
+
and self._curr.text.upper() == "ASSUME"
|
|
685
|
+
and self._next
|
|
686
|
+
and self._next.token_type == TokenType.ORDER_BY
|
|
687
|
+
):
|
|
688
|
+
self._advance() # consume ASSUME
|
|
689
|
+
_, order = self.QUERY_MODIFIER_PARSERS[TokenType.ORDER_BY](self)
|
|
690
|
+
if order and this:
|
|
691
|
+
this.set("order", self.expression(AssumeOrderBy(this=order)))
|
|
692
|
+
return super()._parse_query_modifiers(this)
|
|
693
|
+
|
|
694
|
+
def _parse_table(self, *args, **kwargs) -> t.Optional[exp.Expression]:
|
|
695
|
+
table = super()._parse_table(*args, **kwargs)
|
|
696
|
+
if table and self._curr and self._curr.text.upper() == "FLATTEN":
|
|
697
|
+
self._advance()
|
|
698
|
+
kind: t.Optional[str] = None
|
|
699
|
+
if self._curr and self._curr.text.upper() in ("LIST", "DICT"):
|
|
700
|
+
kind = self._curr.text.upper()
|
|
701
|
+
self._advance()
|
|
702
|
+
self._match_text_seq("BY")
|
|
703
|
+
cols = self._parse_csv(self._parse_column)
|
|
704
|
+
return self.expression(FlattenBy(this=table, expressions=cols, kind=kind))
|
|
705
|
+
return table
|
|
706
|
+
|
|
483
707
|
def _parse_struct_types(self, type_required=True) -> t.Optional[exp.Expression]:
|
|
484
708
|
if not self._curr:
|
|
485
709
|
return None
|
|
@@ -559,6 +783,8 @@ class YDB(Dialect):
|
|
|
559
783
|
Responsible for translating SQL AST back to SQL text with YDB-specific syntax.
|
|
560
784
|
"""
|
|
561
785
|
|
|
786
|
+
PARAMETER_TOKEN = "$"
|
|
787
|
+
|
|
562
788
|
SUPPORTS_VALUES_DEFAULT = False
|
|
563
789
|
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE
|
|
564
790
|
JOIN_HINTS = False
|
|
@@ -574,7 +800,7 @@ class YDB(Dialect):
|
|
|
574
800
|
JSON_KEY_VALUE_PAIR_SEP = ","
|
|
575
801
|
VARCHAR_REQUIRES_SIZE = False
|
|
576
802
|
CAN_IMPLEMENT_ARRAY_ANY = True
|
|
577
|
-
STRUCT_DELIMITER = ("
|
|
803
|
+
STRUCT_DELIMITER = ("<", ">")
|
|
578
804
|
NULL_ORDERING_SUPPORTED: t.Optional[bool] = False
|
|
579
805
|
NULL_ORDERING = None
|
|
580
806
|
MATCHED_BY_SOURCE = False
|
|
@@ -637,6 +863,10 @@ class YDB(Dialect):
|
|
|
637
863
|
Returns:
|
|
638
864
|
Generated SQL string for the table reference
|
|
639
865
|
"""
|
|
866
|
+
if isinstance(expression.this, exp.Parameter):
|
|
867
|
+
var = self.sql(expression, "this")
|
|
868
|
+
alias = f" AS {expression.alias}" if expression.alias else ""
|
|
869
|
+
return f"{var}{alias}"
|
|
640
870
|
prefix = f"{expression.db}/" if expression.db else ""
|
|
641
871
|
sql = f"`{prefix}{expression.name}`"
|
|
642
872
|
|
|
@@ -663,6 +893,38 @@ class YDB(Dialect):
|
|
|
663
893
|
|
|
664
894
|
return is_sql
|
|
665
895
|
|
|
896
|
+
def scoperesolution_sql(self, expression: exp.ScopeResolution) -> str:
|
|
897
|
+
this = self.sql(expression, "this")
|
|
898
|
+
expr = self.sql(expression, "expression")
|
|
899
|
+
return f"{this}::{expr}"
|
|
900
|
+
|
|
901
|
+
def declareitem_sql(self, expression: exp.DeclareItem) -> str:
|
|
902
|
+
name = self.sql(expression, "this")
|
|
903
|
+
kind = self.sql(expression, "kind")
|
|
904
|
+
return f"${name} AS {kind}"
|
|
905
|
+
|
|
906
|
+
def flattenby_sql(self, expression: FlattenBy) -> str:
|
|
907
|
+
table = self.sql(expression, "this")
|
|
908
|
+
kind = expression.args.get("kind")
|
|
909
|
+
kind_str = f" {kind}" if kind else ""
|
|
910
|
+
cols = self.expressions(expression, flat=True)
|
|
911
|
+
return f"{table} FLATTEN{kind_str} BY {cols}"
|
|
912
|
+
|
|
913
|
+
def assumeorderby_sql(self, expression: AssumeOrderBy) -> str:
|
|
914
|
+
order = self.sql(expression, "this").lstrip()
|
|
915
|
+
return self.seg(f"ASSUME {order}")
|
|
916
|
+
|
|
917
|
+
def ydbtuple_sql(self, expression: YdbTuple) -> str:
|
|
918
|
+
inner = ", ".join(self.sql(e) for e in expression.expressions)
|
|
919
|
+
sql = f"Tuple<{inner}>"
|
|
920
|
+
return f"Optional<{sql}>" if expression.args.get("nullable") else sql
|
|
921
|
+
|
|
922
|
+
def alias_sql(self, expression: exp.Alias) -> str:
|
|
923
|
+
alias = expression.args.get("alias")
|
|
924
|
+
if alias and alias.name.startswith("$"):
|
|
925
|
+
return f"{alias.name} = {self.sql(expression, 'this')}"
|
|
926
|
+
return super().alias_sql(expression)
|
|
927
|
+
|
|
666
928
|
def anonymous_sql(self, expression: exp.Anonymous) -> str:
|
|
667
929
|
"""
|
|
668
930
|
Generate SQL for Anonymous functions, with special handling for YQL lambda variables.
|
|
@@ -799,6 +1061,35 @@ class YDB(Dialect):
|
|
|
799
1061
|
Returns:
|
|
800
1062
|
Generated SQL string for the data type
|
|
801
1063
|
"""
|
|
1064
|
+
nullable = expression.args.get("nullable")
|
|
1065
|
+
|
|
1066
|
+
# YDB generic container types rendered with <> syntax and correct casing
|
|
1067
|
+
if expression.args.get("nested"):
|
|
1068
|
+
type_value = expression.this
|
|
1069
|
+
# Tuple<...>: STRUCT with kind="tuple" marker
|
|
1070
|
+
if (
|
|
1071
|
+
type_value == exp.DataType.Type.STRUCT
|
|
1072
|
+
and isinstance(expression.args.get("kind"), exp.Var)
|
|
1073
|
+
and expression.args["kind"].name == "tuple"
|
|
1074
|
+
):
|
|
1075
|
+
inner = ", ".join(
|
|
1076
|
+
self.sql(col.args["kind"])
|
|
1077
|
+
for col in expression.expressions
|
|
1078
|
+
if isinstance(col, exp.ColumnDef)
|
|
1079
|
+
)
|
|
1080
|
+
sql = f"Tuple<{inner}>"
|
|
1081
|
+
return f"Optional<{sql}>" if nullable else sql
|
|
1082
|
+
|
|
1083
|
+
inner = ", ".join(self.sql(e) for e in expression.expressions)
|
|
1084
|
+
name = {
|
|
1085
|
+
exp.DataType.Type.LIST: "List",
|
|
1086
|
+
exp.DataType.Type.MAP: "Dict",
|
|
1087
|
+
exp.DataType.Type.SET: "Set",
|
|
1088
|
+
}.get(type_value)
|
|
1089
|
+
if name:
|
|
1090
|
+
sql = f"{name}<{inner}>"
|
|
1091
|
+
return f"Optional<{sql}>" if nullable else sql
|
|
1092
|
+
|
|
802
1093
|
if (
|
|
803
1094
|
expression.is_type(exp.DataType.Type.NVARCHAR)
|
|
804
1095
|
or expression.is_type(exp.DataType.Type.VARCHAR)
|
|
@@ -845,7 +1136,10 @@ class YDB(Dialect):
|
|
|
845
1136
|
exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
|
|
846
1137
|
)
|
|
847
1138
|
|
|
848
|
-
|
|
1139
|
+
sql = super().datatype_sql(expression)
|
|
1140
|
+
if nullable:
|
|
1141
|
+
sql = f"Optional<{sql}>"
|
|
1142
|
+
return sql
|
|
849
1143
|
|
|
850
1144
|
def primarykeycolumnconstraint_sql(self, expression: exp.PrimaryKeyColumnConstraint) -> str:
|
|
851
1145
|
"""
|
|
@@ -1070,12 +1364,6 @@ class YDB(Dialect):
|
|
|
1070
1364
|
else:
|
|
1071
1365
|
sql = self._generate_create_table(expression)
|
|
1072
1366
|
|
|
1073
|
-
# Prepend PRAGMA AnsiImplicitCrossJoin only when the query contains
|
|
1074
|
-
# implicit cross joins (FROM t1, t2 syntax). YDB disables them by
|
|
1075
|
-
# default; the pragma restores standard SQL semantics.
|
|
1076
|
-
if _has_implicit_cross_join(expression):
|
|
1077
|
-
sql = "PRAGMA AnsiImplicitCrossJoin;\n" + sql
|
|
1078
|
-
|
|
1079
1367
|
return sql
|
|
1080
1368
|
|
|
1081
1369
|
def unnest_subqueries(self, expression):
|
|
@@ -1866,15 +2154,15 @@ class YDB(Dialect):
|
|
|
1866
2154
|
# we move the WHERE expression from ON, using literals
|
|
1867
2155
|
def join_sql(self, expression: exp.Join) -> str:
|
|
1868
2156
|
on_condition = expression.args.get("on")
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
#
|
|
1872
|
-
# YDB requires
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
expression.set("kind",
|
|
1877
|
-
expression.set("
|
|
2157
|
+
using = expression.args.get("using")
|
|
2158
|
+
|
|
2159
|
+
# Any join with no ON/USING clause becomes an explicit CROSS JOIN.
|
|
2160
|
+
# YDB requires an ON clause for outer joins, and emitting CROSS JOIN
|
|
2161
|
+
# explicitly (instead of the comma-separated form) keeps the output
|
|
2162
|
+
# valid without any extra pragma.
|
|
2163
|
+
if not on_condition and not using:
|
|
2164
|
+
expression.set("kind", "CROSS")
|
|
2165
|
+
expression.set("side", None)
|
|
1878
2166
|
return super().join_sql(expression)
|
|
1879
2167
|
|
|
1880
2168
|
if on_condition:
|
|
@@ -1942,18 +2230,11 @@ class YDB(Dialect):
|
|
|
1942
2230
|
on_condition = exp.and_(on_condition, cond)
|
|
1943
2231
|
expression.set("on", on_condition)
|
|
1944
2232
|
else:
|
|
1945
|
-
# No valid equality conditions
|
|
1946
|
-
#
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
):
|
|
1951
|
-
# Convert to CROSS JOIN by removing kind and ON
|
|
1952
|
-
expression.set("kind", None)
|
|
1953
|
-
expression.set("on", None)
|
|
1954
|
-
expression.set("side", "CROSS")
|
|
1955
|
-
else:
|
|
1956
|
-
expression.set("on", None)
|
|
2233
|
+
# No valid equality conditions remain on the JOIN — fall back
|
|
2234
|
+
# to an explicit CROSS JOIN regardless of the original kind.
|
|
2235
|
+
expression.set("kind", "CROSS")
|
|
2236
|
+
expression.set("on", None)
|
|
2237
|
+
expression.set("side", None)
|
|
1957
2238
|
|
|
1958
2239
|
if conditions_to_move:
|
|
1959
2240
|
select_stmt = expression.find_ancestor(exp.Select)
|
|
@@ -2358,10 +2639,10 @@ class YDB(Dialect):
|
|
|
2358
2639
|
TYPE_MAPPING = {
|
|
2359
2640
|
**generator.Generator.TYPE_MAPPING,
|
|
2360
2641
|
**STRING_TYPE_MAPPING,
|
|
2361
|
-
exp.DataType.Type.TINYINT: "
|
|
2362
|
-
exp.DataType.Type.SMALLINT: "
|
|
2363
|
-
exp.DataType.Type.INT: "
|
|
2364
|
-
exp.DataType.Type.BIGINT: "
|
|
2642
|
+
exp.DataType.Type.TINYINT: "Int8",
|
|
2643
|
+
exp.DataType.Type.SMALLINT: "Int16",
|
|
2644
|
+
exp.DataType.Type.INT: "Int32",
|
|
2645
|
+
exp.DataType.Type.BIGINT: "Int64",
|
|
2365
2646
|
exp.DataType.Type.DECIMAL: "Decimal",
|
|
2366
2647
|
exp.DataType.Type.FLOAT: "Float",
|
|
2367
2648
|
exp.DataType.Type.DOUBLE: "Double",
|
|
@@ -2373,6 +2654,9 @@ class YDB(Dialect):
|
|
|
2373
2654
|
|
|
2374
2655
|
TRANSFORMS = {
|
|
2375
2656
|
**generator.Generator.TRANSFORMS,
|
|
2657
|
+
FlattenBy: lambda self, e: self.flattenby_sql(e),
|
|
2658
|
+
AssumeOrderBy: lambda self, e: self.assumeorderby_sql(e),
|
|
2659
|
+
YdbTuple: lambda self, e: self.ydbtuple_sql(e),
|
|
2376
2660
|
exp.Create: create_sql,
|
|
2377
2661
|
exp.DefaultColumnConstraint: lambda self, e: "",
|
|
2378
2662
|
exp.DateTrunc: _date_trunc_sql,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ydb-sqlglot-plugin
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: YDB dialect plugin for sqlglot
|
|
5
5
|
Author: YDB Team
|
|
6
6
|
License: Apache-2.0
|
|
@@ -22,11 +22,12 @@ Provides-Extra: dev
|
|
|
22
22
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
23
23
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
24
24
|
Requires-Dist: ydb<4,>=3.28.0; extra == "dev"
|
|
25
|
+
Requires-Dist: ruff>=0.9.0; extra == "dev"
|
|
25
26
|
Dynamic: license-file
|
|
26
27
|
|
|
27
28
|
# ydb-sqlglot-plugin
|
|
28
29
|
|
|
29
|
-
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) —
|
|
30
|
+
YDB dialect plugin for [sqlglot](https://github.com/tobymao/sqlglot) — bidirectional transpilation between YDB/YQL and any SQL dialect.
|
|
30
31
|
|
|
31
32
|
## Installation
|
|
32
33
|
|
|
@@ -41,19 +42,20 @@ After installing the package, the `ydb` dialect is available in sqlglot automati
|
|
|
41
42
|
```python
|
|
42
43
|
import sqlglot
|
|
43
44
|
|
|
44
|
-
#
|
|
45
|
+
# Any dialect → YDB
|
|
45
46
|
result = sqlglot.transpile("SELECT * FROM users WHERE id = 1", read="mysql", write="ydb")[0]
|
|
46
47
|
# → SELECT * FROM `users` WHERE id = 1
|
|
47
48
|
|
|
48
|
-
#
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
yql = parsed.sql(dialect="ydb")
|
|
49
|
+
# YDB → any dialect
|
|
50
|
+
result = sqlglot.transpile("$t = (SELECT id FROM users); SELECT * FROM $t AS t", read="ydb", write="postgres")[0]
|
|
51
|
+
# → WITH t AS (SELECT id FROM users) SELECT * FROM t AS t
|
|
52
52
|
```
|
|
53
53
|
|
|
54
54
|
## What the plugin does
|
|
55
55
|
|
|
56
|
-
###
|
|
56
|
+
### Any SQL → YDB
|
|
57
|
+
|
|
58
|
+
#### Table names
|
|
57
59
|
|
|
58
60
|
Database-qualified names are rewritten to the YDB path format and wrapped in backticks:
|
|
59
61
|
|
|
@@ -65,7 +67,7 @@ SELECT * FROM analytics.events
|
|
|
65
67
|
SELECT * FROM `analytics/events`
|
|
66
68
|
```
|
|
67
69
|
|
|
68
|
-
|
|
70
|
+
#### CTEs → YDB variables
|
|
69
71
|
|
|
70
72
|
```sql
|
|
71
73
|
-- input
|
|
@@ -78,7 +80,7 @@ $active = (SELECT * FROM `users` WHERE status = 'active');
|
|
|
78
80
|
SELECT * FROM $active AS active
|
|
79
81
|
```
|
|
80
82
|
|
|
81
|
-
|
|
83
|
+
#### Subquery decorrelation
|
|
82
84
|
|
|
83
85
|
Correlated subqueries (which YQL does not support) are rewritten as JOINs:
|
|
84
86
|
|
|
@@ -102,6 +104,52 @@ The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subquerie
|
|
|
102
104
|
|
|
103
105
|
---
|
|
104
106
|
|
|
107
|
+
### YDB → any SQL
|
|
108
|
+
|
|
109
|
+
The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-YDB transformations, and transpilation to other dialects.
|
|
110
|
+
|
|
111
|
+
#### Supported YQL constructs
|
|
112
|
+
|
|
113
|
+
| Construct | Example |
|
|
114
|
+
|---|---|
|
|
115
|
+
| `$variable` references | `SELECT * FROM $t AS t` |
|
|
116
|
+
| `Module::Function()` | `DateTime::GetYear(ts)` |
|
|
117
|
+
| `DECLARE $p AS Type` | `DECLARE $p AS Int32` |
|
|
118
|
+
| `FLATTEN [LIST\|DICT] BY col` | `FROM t FLATTEN LIST BY col` |
|
|
119
|
+
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
120
|
+
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
121
|
+
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
122
|
+
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
123
|
+
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
124
|
+
|
|
125
|
+
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
126
|
+
|
|
127
|
+
#### CTEs reassembly
|
|
128
|
+
|
|
129
|
+
YDB-style named expressions are automatically reassembled into standard `WITH` CTEs when targeting other dialects:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
ydb_sql = "$t = (SELECT 1 AS x); SELECT * FROM $t AS t"
|
|
133
|
+
parse_one(ydb_sql, dialect="ydb").sql(dialect="postgres")
|
|
134
|
+
# → WITH t AS (SELECT 1 AS x) SELECT * FROM t AS t
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
### Column lineage
|
|
140
|
+
|
|
141
|
+
Because YDB SQL is fully parsed into sqlglot's AST, column-level lineage works out of the box:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from sqlglot.lineage import lineage
|
|
145
|
+
|
|
146
|
+
node = lineage("total", "$orders = (SELECT user_id, amount FROM orders); SELECT SUM(amount) AS total FROM $orders AS o", dialect="ydb")
|
|
147
|
+
for dep in node.walk():
|
|
148
|
+
print(dep.name, "→", dep.source)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
105
153
|
## Function reference
|
|
106
154
|
|
|
107
155
|
Functions below are recognized by sqlglot as standard SQL expressions and translated to their YQL equivalents. Dialect-specific functions that sqlglot does not parse into typed AST nodes are **passed through unchanged** — see [Limitations](#limitations).
|
|
@@ -179,25 +227,43 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
179
227
|
|
|
180
228
|
## Type mapping
|
|
181
229
|
|
|
182
|
-
|
|
230
|
+
### Standard SQL → YDB
|
|
231
|
+
|
|
232
|
+
| SQL type | YDB type |
|
|
183
233
|
|---|---|
|
|
184
|
-
| `TINYINT` | `
|
|
185
|
-
| `SMALLINT` | `
|
|
186
|
-
| `INT` / `INTEGER` | `
|
|
187
|
-
| `BIGINT` | `
|
|
234
|
+
| `TINYINT` | `Int8` |
|
|
235
|
+
| `SMALLINT` | `Int16` |
|
|
236
|
+
| `INT` / `INTEGER` | `Int32` |
|
|
237
|
+
| `BIGINT` | `Int64` |
|
|
188
238
|
| `FLOAT` | `Float` |
|
|
189
239
|
| `DOUBLE` / `DOUBLE PRECISION` | `Double` |
|
|
190
240
|
| `DECIMAL(p, s)` | `Decimal(p, s)` |
|
|
191
241
|
| `BOOLEAN` / `BIT` | `Uint8` |
|
|
192
242
|
| `TIMESTAMP` | `Timestamp` |
|
|
193
|
-
| `VARCHAR` / `NVARCHAR` / `CHAR` | `Utf8` |
|
|
194
|
-
| `
|
|
195
|
-
|
|
243
|
+
| `VARCHAR` / `NVARCHAR` / `CHAR` / `TEXT` | `Utf8` |
|
|
244
|
+
| `BLOB` / `BINARY` / `VARBINARY` | `String` |
|
|
245
|
+
|
|
246
|
+
### YDB types → standard SQL
|
|
247
|
+
|
|
248
|
+
| YDB type | Standard SQL | Postgres | ClickHouse |
|
|
249
|
+
|---|---|---|---|
|
|
250
|
+
| `Utf8` | `TEXT` | `TEXT` | `String` |
|
|
251
|
+
| `String` | `BLOB` | `BYTEA` | `String` |
|
|
252
|
+
| `Int32` | `INT` | `INT` | `Int32` |
|
|
253
|
+
| `Int64` | `BIGINT` | `BIGINT` | `Int64` |
|
|
254
|
+
| `Optional<T>` | `T` (nullable) | `T` | `Nullable(T)` |
|
|
255
|
+
| `List<T>` | `LIST<T>` | `LIST<T>` | `Array(T)` |
|
|
256
|
+
| `Dict<K,V>` | `MAP<K,V>` | `MAP<K,V>` | `Map(K,V)` |
|
|
257
|
+
| `Tuple<T1,T2>` | `STRUCT<...>` | `STRUCT<...>` | `Tuple(T1,T2)` |
|
|
196
258
|
|
|
197
259
|
---
|
|
198
260
|
|
|
199
261
|
## Limitations
|
|
200
262
|
|
|
263
|
+
### Dialect-specific functions
|
|
264
|
+
|
|
265
|
+
Functions that sqlglot does not parse into typed AST nodes are passed through unchanged and must be replaced manually. Common examples from ClickHouse: `now()`, `today()`, `parseDateTimeBestEffort()`, `toDate()`, `toFloat64()`, `toString()`, `countDistinct()`, `groupArray()`.
|
|
266
|
+
|
|
201
267
|
### Correlated subqueries in DML
|
|
202
268
|
|
|
203
269
|
Correlated subqueries inside `UPDATE` or `INSERT` statements cannot be automatically decorrelated — YDB does not support them natively, and rewriting requires knowledge of the table's primary key. Rewrite manually using a `$variable`:
|
|
@@ -217,6 +283,10 @@ Correlated subqueries inside `SELECT` are handled automatically via JOIN rewriti
|
|
|
217
283
|
|
|
218
284
|
`dateDiff('month', a, b)` has no exact equivalent in YDB because months have variable length. Use `DateTime::ShiftMonths` for date arithmetic instead.
|
|
219
285
|
|
|
286
|
+
### YDB container types in other dialects
|
|
287
|
+
|
|
288
|
+
`Uint8`/`Uint16`/`Uint32`/`Uint64` and YDB-specific container types (`Struct<...>`, `Variant<...>`, `Enum<...>`) do not have direct equivalents in standard SQL and are passed through as-is when targeting other dialects.
|
|
289
|
+
|
|
220
290
|
---
|
|
221
291
|
|
|
222
292
|
## Development
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
VERSION = "0.1.1"
|
|
File without changes
|
|
File without changes
|
{ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/ydb_sqlglot_plugin.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/ydb_sqlglot_plugin.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{ydb_sqlglot_plugin-0.1.1 → ydb_sqlglot_plugin-0.2.1}/ydb_sqlglot_plugin.egg-info/top_level.txt
RENAMED
|
File without changes
|