ydb-sqlglot-plugin 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/PKG-INFO +50 -3
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/README.md +49 -2
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/pyproject.toml +1 -1
- ydb_sqlglot_plugin-0.2.3/ydb_sqlglot/version.py +1 -0
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot/ydb.py +484 -21
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/PKG-INFO +50 -3
- ydb_sqlglot_plugin-0.2.1/ydb_sqlglot/version.py +0 -1
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/LICENSE +0 -0
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/setup.cfg +0 -0
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot/__init__.py +0 -0
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/SOURCES.txt +0 -0
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/dependency_links.txt +0 -0
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/entry_points.txt +0 -0
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/requires.txt +0 -0
- {ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ydb-sqlglot-plugin
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: YDB dialect plugin for sqlglot
|
|
5
5
|
Author: YDB Team
|
|
6
6
|
License: Apache-2.0
|
|
@@ -102,6 +102,31 @@ LEFT JOIN (
|
|
|
102
102
|
|
|
103
103
|
The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subqueries.
|
|
104
104
|
|
|
105
|
+
#### GROUP BY aliases
|
|
106
|
+
|
|
107
|
+
YDB accepts aliases directly inside `GROUP BY` items. The generator uses this
|
|
108
|
+
form for grouped columns so later clauses and decorrelated subqueries can refer
|
|
109
|
+
to a stable grouping name:
|
|
110
|
+
|
|
111
|
+
```sql
|
|
112
|
+
-- input
|
|
113
|
+
SELECT user_id, COUNT(*) FROM events GROUP BY user_id
|
|
114
|
+
|
|
115
|
+
-- output
|
|
116
|
+
SELECT user_id, COUNT(*) FROM `events` GROUP BY user_id AS user_id
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
If a grouped column is selected under a generated alias, the `GROUP BY` item uses
|
|
120
|
+
that alias as well:
|
|
121
|
+
|
|
122
|
+
```sql
|
|
123
|
+
SELECT user_id AS _u_1, COUNT(*) FROM `events` GROUP BY user_id AS _u_1
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Positional `GROUP BY` references are expanded before generation. When a
|
|
127
|
+
positional reference points to a constant expression, the grouping item is
|
|
128
|
+
removed because YDB rejects grouping by constants.
|
|
129
|
+
|
|
105
130
|
---
|
|
106
131
|
|
|
107
132
|
### YDB → any SQL
|
|
@@ -119,7 +144,10 @@ The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-
|
|
|
119
144
|
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
120
145
|
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
121
146
|
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
147
|
+
| `GROUP BY expr AS alias` | `SELECT v, COUNT(*) FROM t GROUP BY v AS v` |
|
|
122
148
|
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
149
|
+
| Lambda expressions | `($x, $y?) -> ($x + COALESCE($y, 0))`, `($y) -> { $p = "x"; RETURN $p \|\| $y }` |
|
|
150
|
+
| `IN COMPACT` | `WHERE key IN COMPACT $values` |
|
|
123
151
|
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
124
152
|
|
|
125
153
|
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
@@ -204,11 +232,30 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
204
232
|
|---|---|
|
|
205
233
|
| `ARRAY(v1, v2, ...)` | `AsList(v1, v2, ...)` |
|
|
206
234
|
| `ARRAY_LENGTH(x)` / `ARRAY_SIZE(x)` | `ListLength(x)` |
|
|
207
|
-
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) ->
|
|
208
|
-
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) ->
|
|
235
|
+
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) -> (cond))` |
|
|
236
|
+
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) -> (cond)))` |
|
|
209
237
|
| `ARRAY_AGG(x)` | `AGGREGATE_LIST(x)` |
|
|
210
238
|
| `UNNEST(x)` | `FLATTEN BY x` |
|
|
211
239
|
|
|
240
|
+
Lambda expressions are represented with sqlglot's standard `exp.Lambda` AST node.
|
|
241
|
+
When a source dialect parses lambdas, the YDB generator emits YQL lambda syntax:
|
|
242
|
+
|
|
243
|
+
```sql
|
|
244
|
+
-- DuckDB input
|
|
245
|
+
SELECT list_filter(arr, x -> x > 0) FROM t
|
|
246
|
+
|
|
247
|
+
-- YDB output
|
|
248
|
+
SELECT ListFilter(arr, ($x) -> ($x > 0)) FROM `t`
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
YDB input also supports documented YQL lambda forms, including optional
|
|
252
|
+
arguments and block bodies with local named expressions:
|
|
253
|
+
|
|
254
|
+
```sql
|
|
255
|
+
($x, $y?) -> ($x + COALESCE($y, 0));
|
|
256
|
+
($y) -> { $prefix = "x"; RETURN $prefix || $y; };
|
|
257
|
+
```
|
|
258
|
+
|
|
212
259
|
### Conditional / math
|
|
213
260
|
|
|
214
261
|
| Input | YQL output |
|
|
@@ -75,6 +75,31 @@ LEFT JOIN (
|
|
|
75
75
|
|
|
76
76
|
The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subqueries.
|
|
77
77
|
|
|
78
|
+
#### GROUP BY aliases
|
|
79
|
+
|
|
80
|
+
YDB accepts aliases directly inside `GROUP BY` items. The generator uses this
|
|
81
|
+
form for grouped columns so later clauses and decorrelated subqueries can refer
|
|
82
|
+
to a stable grouping name:
|
|
83
|
+
|
|
84
|
+
```sql
|
|
85
|
+
-- input
|
|
86
|
+
SELECT user_id, COUNT(*) FROM events GROUP BY user_id
|
|
87
|
+
|
|
88
|
+
-- output
|
|
89
|
+
SELECT user_id, COUNT(*) FROM `events` GROUP BY user_id AS user_id
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
If a grouped column is selected under a generated alias, the `GROUP BY` item uses
|
|
93
|
+
that alias as well:
|
|
94
|
+
|
|
95
|
+
```sql
|
|
96
|
+
SELECT user_id AS _u_1, COUNT(*) FROM `events` GROUP BY user_id AS _u_1
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Positional `GROUP BY` references are expanded before generation. When a
|
|
100
|
+
positional reference points to a constant expression, the grouping item is
|
|
101
|
+
removed because YDB rejects grouping by constants.
|
|
102
|
+
|
|
78
103
|
---
|
|
79
104
|
|
|
80
105
|
### YDB → any SQL
|
|
@@ -92,7 +117,10 @@ The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-
|
|
|
92
117
|
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
93
118
|
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
94
119
|
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
120
|
+
| `GROUP BY expr AS alias` | `SELECT v, COUNT(*) FROM t GROUP BY v AS v` |
|
|
95
121
|
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
122
|
+
| Lambda expressions | `($x, $y?) -> ($x + COALESCE($y, 0))`, `($y) -> { $p = "x"; RETURN $p \|\| $y }` |
|
|
123
|
+
| `IN COMPACT` | `WHERE key IN COMPACT $values` |
|
|
96
124
|
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
97
125
|
|
|
98
126
|
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
@@ -177,11 +205,30 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
177
205
|
|---|---|
|
|
178
206
|
| `ARRAY(v1, v2, ...)` | `AsList(v1, v2, ...)` |
|
|
179
207
|
| `ARRAY_LENGTH(x)` / `ARRAY_SIZE(x)` | `ListLength(x)` |
|
|
180
|
-
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) ->
|
|
181
|
-
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) ->
|
|
208
|
+
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) -> (cond))` |
|
|
209
|
+
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) -> (cond)))` |
|
|
182
210
|
| `ARRAY_AGG(x)` | `AGGREGATE_LIST(x)` |
|
|
183
211
|
| `UNNEST(x)` | `FLATTEN BY x` |
|
|
184
212
|
|
|
213
|
+
Lambda expressions are represented with sqlglot's standard `exp.Lambda` AST node.
|
|
214
|
+
When a source dialect parses lambdas, the YDB generator emits YQL lambda syntax:
|
|
215
|
+
|
|
216
|
+
```sql
|
|
217
|
+
-- DuckDB input
|
|
218
|
+
SELECT list_filter(arr, x -> x > 0) FROM t
|
|
219
|
+
|
|
220
|
+
-- YDB output
|
|
221
|
+
SELECT ListFilter(arr, ($x) -> ($x > 0)) FROM `t`
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
YDB input also supports documented YQL lambda forms, including optional
|
|
225
|
+
arguments and block bodies with local named expressions:
|
|
226
|
+
|
|
227
|
+
```sql
|
|
228
|
+
($x, $y?) -> ($x + COALESCE($y, 0));
|
|
229
|
+
($y) -> { $prefix = "x"; RETURN $prefix || $y; };
|
|
230
|
+
```
|
|
231
|
+
|
|
185
232
|
### Conditional / math
|
|
186
233
|
|
|
187
234
|
| Input | YQL output |
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = "0.2.3"
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
import inspect as _inspect
|
|
1
2
|
import re
|
|
2
3
|
import typing as t
|
|
4
|
+
from collections import defaultdict
|
|
3
5
|
|
|
4
6
|
from sqlglot import Generator, TokenType, exp, generator, parser, tokens, transforms
|
|
5
7
|
from sqlglot.dialects.dialect import Dialect, NormalizationStrategy, concat_to_dpipe_sql, unit_to_var
|
|
@@ -175,6 +177,8 @@ def _wrap_udf_group_by(expression: exp.Expression) -> None:
|
|
|
175
177
|
# Resolve each GROUP BY expression to its "effective" form
|
|
176
178
|
# (expand alias references to the aliased expression).
|
|
177
179
|
def _effective(e: exp.Expression) -> exp.Expression:
|
|
180
|
+
if isinstance(e, exp.Alias):
|
|
181
|
+
return e.this
|
|
178
182
|
if isinstance(e, exp.Column):
|
|
179
183
|
name = e.name
|
|
180
184
|
if name in alias_to_expr:
|
|
@@ -418,6 +422,21 @@ class YdbTuple(exp.Expression):
|
|
|
418
422
|
arg_types = {"expressions": True, "nullable": False}
|
|
419
423
|
|
|
420
424
|
|
|
425
|
+
class YdbAtString(exp.Expression):
|
|
426
|
+
"""YDB @@...@@ string literal."""
|
|
427
|
+
arg_types = {"this": True}
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
class YdbPostfixCall(exp.Expression):
|
|
431
|
+
"""YDB call of an expression result, e.g. $grep(x) or DateTime::Format(fmt)(ts)."""
|
|
432
|
+
arg_types = {"this": True, "expressions": False}
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
class YdbLambdaBlock(exp.Expression):
|
|
436
|
+
"""YDB lambda body with local named expressions followed by RETURN."""
|
|
437
|
+
arg_types = {"this": True, "expressions": False}
|
|
438
|
+
|
|
439
|
+
|
|
421
440
|
# Container types that use Generic<T, ...> syntax in YDB
|
|
422
441
|
_YDB_GENERIC_TYPES = {
|
|
423
442
|
"List": exp.DataType.Type.LIST,
|
|
@@ -426,6 +445,13 @@ _YDB_GENERIC_TYPES = {
|
|
|
426
445
|
}
|
|
427
446
|
|
|
428
447
|
|
|
448
|
+
# sqlglot >= 30.0.0 changed Parser.expression() to take a pre-built instance instead of
|
|
449
|
+
# (cls, **kwargs). Detect once so the YDB parser override below can support both APIs.
|
|
450
|
+
_EXPRESSION_TAKES_INSTANCE = (
|
|
451
|
+
"instance" in _inspect.signature(parser.Parser.expression).parameters
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
|
|
429
455
|
def _reassemble_ctes(
|
|
430
456
|
statements: t.List[t.Optional[exp.Expression]],
|
|
431
457
|
) -> t.List[t.Optional[exp.Expression]]:
|
|
@@ -449,6 +475,9 @@ def _reassemble_ctes(
|
|
|
449
475
|
pending_names.clear()
|
|
450
476
|
|
|
451
477
|
for stmt in statements:
|
|
478
|
+
if stmt is None:
|
|
479
|
+
continue
|
|
480
|
+
|
|
452
481
|
if (
|
|
453
482
|
isinstance(stmt, exp.Alias)
|
|
454
483
|
and isinstance(stmt.args.get("alias"), exp.Identifier)
|
|
@@ -459,6 +488,10 @@ def _reassemble_ctes(
|
|
|
459
488
|
# Unwrap Subquery — CTE.this must be Select, not Subquery
|
|
460
489
|
if isinstance(inner, exp.Subquery):
|
|
461
490
|
inner = inner.this
|
|
491
|
+
if not isinstance(inner, exp.Query):
|
|
492
|
+
_flush_as_aliases()
|
|
493
|
+
result.append(stmt)
|
|
494
|
+
continue
|
|
462
495
|
# Replace any $prev_cte refs inside this CTE body
|
|
463
496
|
inner = _replace_param_table_refs(inner, pending_names)
|
|
464
497
|
pending_aliases.append(stmt)
|
|
@@ -572,16 +605,106 @@ class YDB(Dialect):
|
|
|
572
605
|
),
|
|
573
606
|
}
|
|
574
607
|
|
|
608
|
+
LAMBDAS = {
|
|
609
|
+
**parser.Parser.LAMBDAS,
|
|
610
|
+
TokenType.ARROW: lambda self, expressions: self._parse_ydb_lambda(expressions),
|
|
611
|
+
}
|
|
612
|
+
|
|
575
613
|
STATEMENT_PARSERS = {
|
|
576
614
|
**parser.Parser.STATEMENT_PARSERS,
|
|
577
615
|
TokenType.DECLARE: lambda self: self._parse_ydb_declare(),
|
|
578
616
|
TokenType.PARAMETER: lambda self: self._parse_ydb_named_expr(),
|
|
579
617
|
}
|
|
580
618
|
|
|
619
|
+
PRIMARY_PARSERS = {
|
|
620
|
+
**parser.Parser.PRIMARY_PARSERS,
|
|
621
|
+
TokenType.STRING: lambda self, token: self._parse_ydb_string(token),
|
|
622
|
+
}
|
|
623
|
+
|
|
581
624
|
def parse(self, raw_tokens, sql=None):
|
|
582
|
-
|
|
625
|
+
self.reset()
|
|
626
|
+
self.sql = sql or ""
|
|
627
|
+
|
|
628
|
+
chunks: t.List[t.List[tokens.Token]] = [[]]
|
|
629
|
+
brace_depth = 0
|
|
630
|
+
total = len(raw_tokens)
|
|
631
|
+
|
|
632
|
+
for i, token in enumerate(raw_tokens):
|
|
633
|
+
if token.token_type == TokenType.L_BRACE:
|
|
634
|
+
brace_depth += 1
|
|
635
|
+
elif token.token_type == TokenType.R_BRACE and brace_depth:
|
|
636
|
+
brace_depth -= 1
|
|
637
|
+
|
|
638
|
+
if token.token_type == TokenType.SEMICOLON and not brace_depth:
|
|
639
|
+
if token.comments:
|
|
640
|
+
chunks.append([token])
|
|
641
|
+
if i < total - 1:
|
|
642
|
+
chunks.append([])
|
|
643
|
+
else:
|
|
644
|
+
chunks[-1].append(token)
|
|
645
|
+
|
|
646
|
+
self._chunks = chunks
|
|
647
|
+
statements = self._parse_ydb_chunks(self.__class__._parse_statement, chunks)
|
|
583
648
|
return _reassemble_ctes(statements)
|
|
584
649
|
|
|
650
|
+
def _parse_ydb_chunks(self, parse_method, chunks):
|
|
651
|
+
expressions = []
|
|
652
|
+
|
|
653
|
+
# sqlglot >= 29 has this helper; 28.x only exposes the lower-level
|
|
654
|
+
# parser state, so keep a local fallback for the supported range.
|
|
655
|
+
if hasattr(self, "_parse_batch_statements"):
|
|
656
|
+
return self._parse_batch_statements(
|
|
657
|
+
parse_method=parse_method,
|
|
658
|
+
sep_first_statement=False,
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
for chunk in chunks:
|
|
662
|
+
self._index = -1
|
|
663
|
+
self._tokens = chunk
|
|
664
|
+
self._tokens_size = len(chunk)
|
|
665
|
+
self._advance()
|
|
666
|
+
|
|
667
|
+
expressions.append(parse_method(self))
|
|
668
|
+
|
|
669
|
+
if self._index < len(self._tokens):
|
|
670
|
+
self.raise_error("Invalid expression / Unexpected token")
|
|
671
|
+
|
|
672
|
+
self.check_errors()
|
|
673
|
+
|
|
674
|
+
return expressions
|
|
675
|
+
|
|
676
|
+
def expression(self, exp_class_or_instance, token=None, comments=None, **kwargs):
|
|
677
|
+
"""Bridge sqlglot's two `Parser.expression()` calling conventions.
|
|
678
|
+
|
|
679
|
+
sqlglot < 30 expects ``expression(cls, **kwargs)`` and instantiates internally.
|
|
680
|
+
sqlglot >= 30 expects a pre-built ``expression(instance)`` and rejects kwargs.
|
|
681
|
+
Several call sites in this dialect (and a few in upstream code paths we exercise)
|
|
682
|
+
mix both styles, so normalise here before delegating.
|
|
683
|
+
"""
|
|
684
|
+
if _EXPRESSION_TAKES_INSTANCE:
|
|
685
|
+
if not isinstance(exp_class_or_instance, exp.Expression):
|
|
686
|
+
exp_class_or_instance = exp_class_or_instance(**kwargs)
|
|
687
|
+
return super().expression(
|
|
688
|
+
exp_class_or_instance, token=token, comments=comments
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
if isinstance(exp_class_or_instance, exp.Expression):
|
|
692
|
+
# Old super() would attempt instance(**kwargs) -> "object is not callable".
|
|
693
|
+
instance = exp_class_or_instance
|
|
694
|
+
if token is not None:
|
|
695
|
+
update_positions = getattr(instance, "update_positions", None)
|
|
696
|
+
if update_positions is not None:
|
|
697
|
+
update_positions(token)
|
|
698
|
+
if comments:
|
|
699
|
+
instance.add_comments(comments)
|
|
700
|
+
else:
|
|
701
|
+
self._add_comments(instance)
|
|
702
|
+
return self.validate_expression(instance)
|
|
703
|
+
|
|
704
|
+
return super().expression(
|
|
705
|
+
exp_class_or_instance, token=token, comments=comments, **kwargs
|
|
706
|
+
)
|
|
707
|
+
|
|
585
708
|
def _parse_dcolon(self) -> t.Optional[exp.Expression]:
|
|
586
709
|
return self._parse_function(anonymous=True) or self._parse_var(any_token=True)
|
|
587
710
|
|
|
@@ -602,9 +725,40 @@ class YDB(Dialect):
|
|
|
602
725
|
)
|
|
603
726
|
)
|
|
604
727
|
|
|
728
|
+
def _parse_lambda_arg(self) -> t.Optional[exp.Expression]:
|
|
729
|
+
if self._match(TokenType.PARAMETER):
|
|
730
|
+
name = self._parse_var(any_token=True)
|
|
731
|
+
if not name:
|
|
732
|
+
return None
|
|
733
|
+
|
|
734
|
+
parameter = exp.Parameter(this=name)
|
|
735
|
+
if self._match(TokenType.PLACEHOLDER):
|
|
736
|
+
parameter.meta["optional"] = True
|
|
737
|
+
return self.expression(parameter)
|
|
738
|
+
|
|
739
|
+
arg = super()._parse_lambda_arg()
|
|
740
|
+
if arg and self._match(TokenType.PLACEHOLDER):
|
|
741
|
+
arg.meta["optional"] = True
|
|
742
|
+
return arg
|
|
743
|
+
|
|
605
744
|
def _parse_ydb_declare(self) -> exp.Declare:
|
|
606
745
|
items = self._parse_csv(self._parse_ydb_declareitem)
|
|
607
|
-
|
|
746
|
+
declare = self.expression(exp.Declare(expressions=items))
|
|
747
|
+
|
|
748
|
+
item_comments = {
|
|
749
|
+
comment
|
|
750
|
+
for item in items
|
|
751
|
+
if item is not None
|
|
752
|
+
for comment in (item.comments or [])
|
|
753
|
+
}
|
|
754
|
+
if item_comments and declare.comments:
|
|
755
|
+
declare.comments = [
|
|
756
|
+
comment
|
|
757
|
+
for comment in declare.comments
|
|
758
|
+
if comment.strip().startswith("!") or comment not in item_comments
|
|
759
|
+
]
|
|
760
|
+
|
|
761
|
+
return declare
|
|
608
762
|
|
|
609
763
|
def _parse_ydb_declareitem(self) -> t.Optional[exp.DeclareItem]:
|
|
610
764
|
if not self._match(TokenType.PARAMETER):
|
|
@@ -614,7 +768,61 @@ class YDB(Dialect):
|
|
|
614
768
|
return None
|
|
615
769
|
self._match(TokenType.ALIAS)
|
|
616
770
|
kind = self._parse_types()
|
|
617
|
-
|
|
771
|
+
comments = self._prev.comments if self._prev else None
|
|
772
|
+
return self.expression(
|
|
773
|
+
exp.DeclareItem(this=name, kind=kind),
|
|
774
|
+
comments=comments,
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
def _parse_ydb_string(self, token: tokens.Token) -> exp.Literal:
|
|
778
|
+
literal = self.expression(exp.Literal(this=token.text, is_string=True), token)
|
|
779
|
+
if (
|
|
780
|
+
self._curr
|
|
781
|
+
and self._curr.token_type == TokenType.VAR
|
|
782
|
+
and self._curr.text.lower() == "u"
|
|
783
|
+
and token.end + 1 == self._curr.start
|
|
784
|
+
):
|
|
785
|
+
literal.meta["ydb_string_suffix"] = self._curr.text
|
|
786
|
+
self._advance()
|
|
787
|
+
return literal
|
|
788
|
+
|
|
789
|
+
def _parse_unary(self) -> t.Optional[exp.Expression]:
|
|
790
|
+
return self._parse_ydb_postfix_calls(super()._parse_unary())
|
|
791
|
+
|
|
792
|
+
def _parse_type(
|
|
793
|
+
self,
|
|
794
|
+
parse_interval: bool = True,
|
|
795
|
+
fallback_to_identifier: bool = False,
|
|
796
|
+
) -> t.Optional[exp.Expression]:
|
|
797
|
+
if (
|
|
798
|
+
self._curr
|
|
799
|
+
and self._next
|
|
800
|
+
and self._curr.token_type == TokenType.STRING
|
|
801
|
+
and self._next.token_type == TokenType.VAR
|
|
802
|
+
and self._next.text.lower() == "u"
|
|
803
|
+
and self._curr.end + 1 == self._next.start
|
|
804
|
+
):
|
|
805
|
+
token = self._curr
|
|
806
|
+
suffix = self._next.text
|
|
807
|
+
self._advance(2)
|
|
808
|
+
literal = self.expression(exp.Literal(this=token.text, is_string=True), token)
|
|
809
|
+
literal.meta["ydb_string_suffix"] = suffix
|
|
810
|
+
return literal
|
|
811
|
+
|
|
812
|
+
return super()._parse_type(
|
|
813
|
+
parse_interval=parse_interval,
|
|
814
|
+
fallback_to_identifier=fallback_to_identifier,
|
|
815
|
+
)
|
|
816
|
+
|
|
817
|
+
def _parse_ydb_postfix_calls(
|
|
818
|
+
self,
|
|
819
|
+
expression: t.Optional[exp.Expression],
|
|
820
|
+
) -> t.Optional[exp.Expression]:
|
|
821
|
+
while expression is not None and self._match(TokenType.L_PAREN):
|
|
822
|
+
args = self._parse_function_args(alias=True)
|
|
823
|
+
self._match_r_paren(expression)
|
|
824
|
+
expression = self.expression(YdbPostfixCall(this=expression, expressions=args))
|
|
825
|
+
return expression
|
|
618
826
|
|
|
619
827
|
def _parse_types(self, *args, **kwargs) -> t.Optional[exp.Expression]:
|
|
620
828
|
# YDB generic types use Name<...> syntax; token type varies by keyword status
|
|
@@ -643,6 +851,17 @@ class YDB(Dialect):
|
|
|
643
851
|
nested=True,
|
|
644
852
|
)
|
|
645
853
|
|
|
854
|
+
if name == "Struct":
|
|
855
|
+
self._advance() # consume 'Struct'
|
|
856
|
+
self._advance() # consume '<'
|
|
857
|
+
fields = self._parse_csv(self._parse_ydb_struct_field)
|
|
858
|
+
self._match(TokenType.GT)
|
|
859
|
+
return exp.DataType(
|
|
860
|
+
this=exp.DataType.Type.STRUCT,
|
|
861
|
+
expressions=[field for field in fields if field],
|
|
862
|
+
nested=True,
|
|
863
|
+
)
|
|
864
|
+
|
|
646
865
|
if name == "Tuple":
|
|
647
866
|
self._advance() # consume 'Tuple'
|
|
648
867
|
self._advance() # consume '<'
|
|
@@ -667,13 +886,28 @@ class YDB(Dialect):
|
|
|
667
886
|
dtype.set("nullable", True)
|
|
668
887
|
return dtype
|
|
669
888
|
|
|
889
|
+
def _parse_ydb_struct_field(self) -> t.Optional[exp.ColumnDef]:
|
|
890
|
+
if self._curr and self._curr.token_type == TokenType.GT:
|
|
891
|
+
return None
|
|
892
|
+
|
|
893
|
+
field = self._parse_id_var(any_token=True)
|
|
894
|
+
if not field:
|
|
895
|
+
return None
|
|
896
|
+
|
|
897
|
+
if not self._match(TokenType.COLON):
|
|
898
|
+
self.raise_error("Expected colon after struct field name")
|
|
899
|
+
|
|
900
|
+
kind = self._parse_types()
|
|
901
|
+
return self.expression(exp.ColumnDef(this=field, kind=kind))
|
|
902
|
+
|
|
670
903
|
def _parse_table_alias(self, alias_tokens=None):
|
|
671
904
|
# Prevent YDB-specific keywords from being consumed as table aliases
|
|
672
|
-
if self._curr and self._curr.text.upper() in ("FLATTEN", "ASSUME"):
|
|
905
|
+
if self._curr and self._curr.text.upper() in ("FLATTEN", "ASSUME", "VIEW"):
|
|
673
906
|
# Also check that what follows is a YDB construct, not a regular alias
|
|
674
907
|
if self._next and (
|
|
675
908
|
self._next.text.upper() in ("BY", "LIST", "DICT")
|
|
676
909
|
or self._next.token_type == TokenType.ORDER_BY
|
|
910
|
+
or self._curr.text.upper() == "VIEW"
|
|
677
911
|
):
|
|
678
912
|
return None
|
|
679
913
|
return super()._parse_table_alias(alias_tokens=alias_tokens)
|
|
@@ -691,8 +925,70 @@ class YDB(Dialect):
|
|
|
691
925
|
this.set("order", self.expression(AssumeOrderBy(this=order)))
|
|
692
926
|
return super()._parse_query_modifiers(this)
|
|
693
927
|
|
|
928
|
+
def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]:
|
|
929
|
+
# This mirrors sqlglot's Parser._parse_group control flow so YDB keeps
|
|
930
|
+
# support for GROUP BY modifiers (WITH ROLLUP, CUBE, GROUPING SETS,
|
|
931
|
+
# TOTALS). The YDB-specific change is parsing each group item through
|
|
932
|
+
# _parse_alias(..., explicit=True), enabling `GROUP BY expr AS alias`.
|
|
933
|
+
if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
|
|
934
|
+
return None
|
|
935
|
+
comments = self._prev_comments
|
|
936
|
+
|
|
937
|
+
elements: dict[str, t.Any] = defaultdict(list)
|
|
938
|
+
|
|
939
|
+
if self._match(TokenType.ALL):
|
|
940
|
+
elements["all"] = True
|
|
941
|
+
elif self._match(TokenType.DISTINCT):
|
|
942
|
+
elements["all"] = False
|
|
943
|
+
|
|
944
|
+
if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False):
|
|
945
|
+
return self.expression(exp.Group(**elements), comments=comments)
|
|
946
|
+
|
|
947
|
+
def _parse_group_expression():
|
|
948
|
+
if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False):
|
|
949
|
+
return None
|
|
950
|
+
return self._parse_alias(self._parse_disjunction(), explicit=True)
|
|
951
|
+
|
|
952
|
+
while True:
|
|
953
|
+
index = self._index
|
|
954
|
+
|
|
955
|
+
elements["expressions"].extend(self._parse_csv(_parse_group_expression))
|
|
956
|
+
|
|
957
|
+
before_with_index = self._index
|
|
958
|
+
with_prefix = self._match(TokenType.WITH)
|
|
959
|
+
|
|
960
|
+
if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix):
|
|
961
|
+
key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube"
|
|
962
|
+
elements[key].append(cube_or_rollup)
|
|
963
|
+
elif grouping_sets := self._parse_grouping_sets():
|
|
964
|
+
elements["grouping_sets"].append(grouping_sets)
|
|
965
|
+
elif self._match_text_seq("TOTALS"):
|
|
966
|
+
elements["totals"] = True
|
|
967
|
+
|
|
968
|
+
if before_with_index <= self._index <= before_with_index + 1:
|
|
969
|
+
self._retreat(before_with_index)
|
|
970
|
+
break
|
|
971
|
+
|
|
972
|
+
if index == self._index:
|
|
973
|
+
break
|
|
974
|
+
|
|
975
|
+
return self.expression(exp.Group(**elements), comments=comments)
|
|
976
|
+
|
|
694
977
|
def _parse_table(self, *args, **kwargs) -> t.Optional[exp.Expression]:
|
|
978
|
+
if self._match(TokenType.L_BRACKET):
|
|
979
|
+
parts = []
|
|
980
|
+
while self._curr and not self._match(TokenType.R_BRACKET, advance=False):
|
|
981
|
+
parts.append(self._curr.text)
|
|
982
|
+
self._advance()
|
|
983
|
+
self._match(TokenType.R_BRACKET)
|
|
984
|
+
table = self.expression(exp.Table(this=exp.to_identifier("".join(parts))))
|
|
985
|
+
table.set("alias", self._parse_table_alias())
|
|
986
|
+
return table
|
|
987
|
+
|
|
695
988
|
table = super()._parse_table(*args, **kwargs)
|
|
989
|
+
if table and self._match(TokenType.VIEW):
|
|
990
|
+
table.set("ydb_index_view", self._parse_id_var(any_token=True))
|
|
991
|
+
table.set("alias", self._parse_table_alias())
|
|
696
992
|
if table and self._curr and self._curr.text.upper() == "FLATTEN":
|
|
697
993
|
self._advance()
|
|
698
994
|
kind: t.Optional[str] = None
|
|
@@ -722,8 +1018,22 @@ class YDB(Dialect):
|
|
|
722
1018
|
return exp.EQ(this=key, expression=value)
|
|
723
1019
|
|
|
724
1020
|
def _parse_primary(self) -> t.Optional[exp.Expression]:
|
|
1021
|
+
if (
|
|
1022
|
+
self._curr
|
|
1023
|
+
and self._curr.token_type == TokenType.PARAMETER
|
|
1024
|
+
and self._next
|
|
1025
|
+
and self._next.token_type == TokenType.PARAMETER
|
|
1026
|
+
):
|
|
1027
|
+
return self._parse_at_raw_string()
|
|
1028
|
+
|
|
725
1029
|
if self._match(TokenType.L_PAREN):
|
|
726
1030
|
comments = self._prev_comments
|
|
1031
|
+
if self._next_matching_rparen_is_arrow():
|
|
1032
|
+
expressions = self._parse_csv(self._parse_lambda_arg)
|
|
1033
|
+
self._match_r_paren()
|
|
1034
|
+
self._match(TokenType.ARROW)
|
|
1035
|
+
return self._parse_ydb_lambda(expressions)
|
|
1036
|
+
|
|
727
1037
|
query = self._parse_select()
|
|
728
1038
|
|
|
729
1039
|
if query:
|
|
@@ -757,25 +1067,88 @@ class YDB(Dialect):
|
|
|
757
1067
|
return this
|
|
758
1068
|
return super()._parse_primary()
|
|
759
1069
|
|
|
1070
|
+
def _next_matching_rparen_is_arrow(self) -> bool:
|
|
1071
|
+
depth = 1
|
|
1072
|
+
# _tokens_size is not available in all supported sqlglot versions.
|
|
1073
|
+
for i in range(self._index, len(self._tokens)):
|
|
1074
|
+
token = self._tokens[i]
|
|
1075
|
+
if token.token_type == TokenType.L_PAREN:
|
|
1076
|
+
depth += 1
|
|
1077
|
+
elif token.token_type == TokenType.R_PAREN:
|
|
1078
|
+
depth -= 1
|
|
1079
|
+
if depth == 0:
|
|
1080
|
+
return (
|
|
1081
|
+
i + 1 < len(self._tokens)
|
|
1082
|
+
and self._tokens[i + 1].token_type == TokenType.ARROW
|
|
1083
|
+
)
|
|
1084
|
+
return False
|
|
1085
|
+
|
|
1086
|
+
def _parse_at_raw_string(self) -> YdbAtString:
|
|
1087
|
+
self._advance()
|
|
1088
|
+
self._advance()
|
|
1089
|
+
|
|
1090
|
+
parts = []
|
|
1091
|
+
while self._curr:
|
|
1092
|
+
if (
|
|
1093
|
+
self._curr.token_type == TokenType.PARAMETER
|
|
1094
|
+
and self._next
|
|
1095
|
+
and self._next.token_type == TokenType.PARAMETER
|
|
1096
|
+
):
|
|
1097
|
+
self._advance()
|
|
1098
|
+
self._advance()
|
|
1099
|
+
break
|
|
1100
|
+
|
|
1101
|
+
parts.append(self._curr.text)
|
|
1102
|
+
self._advance()
|
|
1103
|
+
|
|
1104
|
+
return self.expression(YdbAtString(this="".join(parts)))
|
|
1105
|
+
|
|
760
1106
|
def _parse_lambda_body(self, params):
|
|
761
1107
|
if (
|
|
762
|
-
self._curr
|
|
1108
|
+
self._curr is None
|
|
1109
|
+
or self._curr.token_type != TokenType.R_PAREN
|
|
1110
|
+
or self._next is None
|
|
763
1111
|
or self._next.token_type != TokenType.ARROW
|
|
764
1112
|
):
|
|
765
1113
|
return None
|
|
766
1114
|
self._advance()
|
|
767
1115
|
self._advance()
|
|
768
|
-
self.
|
|
1116
|
+
return self._parse_ydb_lambda(params)
|
|
1117
|
+
|
|
1118
|
+
def _parse_ydb_lambda(self, params):
|
|
1119
|
+
has_brace = self._match(TokenType.L_BRACE)
|
|
1120
|
+
assignments = []
|
|
1121
|
+
|
|
1122
|
+
if has_brace:
|
|
1123
|
+
while self._curr and self._curr.text != "RETURN":
|
|
1124
|
+
assignment = self._parse_ydb_named_expr()
|
|
1125
|
+
if not assignment:
|
|
1126
|
+
self.raise_error("Expected lambda body expression after '->'")
|
|
1127
|
+
assignments.append(assignment)
|
|
1128
|
+
self._match(TokenType.SEMICOLON)
|
|
1129
|
+
|
|
1130
|
+
if not self._match_text_seq("RETURN"):
|
|
1131
|
+
self.raise_error("Expected lambda body RETURN after '->'")
|
|
1132
|
+
|
|
1133
|
+
body = self._parse_disjunction()
|
|
769
1134
|
|
|
770
|
-
if not (self._curr.text == "RETURN"):
|
|
771
|
-
self.raise_error("Expected lambda body expression after '->'")
|
|
772
|
-
self._advance()
|
|
773
|
-
body = self._parse_conjunction()
|
|
774
1135
|
if not body:
|
|
775
1136
|
self.raise_error("Expected lambda body expression after '->'")
|
|
776
1137
|
|
|
777
|
-
self._match(TokenType.
|
|
778
|
-
|
|
1138
|
+
self._match(TokenType.SEMICOLON)
|
|
1139
|
+
if has_brace:
|
|
1140
|
+
self._match(TokenType.R_BRACE, expression=body)
|
|
1141
|
+
if assignments:
|
|
1142
|
+
body = self.expression(YdbLambdaBlock(this=body, expressions=assignments))
|
|
1143
|
+
|
|
1144
|
+
return self.expression(exp.Lambda(this=body, expressions=params))
|
|
1145
|
+
|
|
1146
|
+
def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In:
|
|
1147
|
+
if self._match_text_seq("COMPACT"):
|
|
1148
|
+
expression = self.expression(exp.In(this=this, field=self._parse_column()))
|
|
1149
|
+
expression.meta["compact"] = True
|
|
1150
|
+
return expression
|
|
1151
|
+
return super()._parse_in(this, alias=alias)
|
|
779
1152
|
|
|
780
1153
|
class Generator(generator.Generator):
|
|
781
1154
|
"""
|
|
@@ -870,6 +1243,10 @@ class YDB(Dialect):
|
|
|
870
1243
|
prefix = f"{expression.db}/" if expression.db else ""
|
|
871
1244
|
sql = f"`{prefix}{expression.name}`"
|
|
872
1245
|
|
|
1246
|
+
ydb_index_view = self.sql(expression, "ydb_index_view")
|
|
1247
|
+
if ydb_index_view:
|
|
1248
|
+
sql += f" VIEW {ydb_index_view}"
|
|
1249
|
+
|
|
873
1250
|
if expression.alias:
|
|
874
1251
|
sql += f" AS {expression.alias}"
|
|
875
1252
|
|
|
@@ -898,6 +1275,11 @@ class YDB(Dialect):
|
|
|
898
1275
|
expr = self.sql(expression, "expression")
|
|
899
1276
|
return f"{this}::{expr}"
|
|
900
1277
|
|
|
1278
|
+
def literal_sql(self, expression: exp.Literal) -> str:
|
|
1279
|
+
sql = super().literal_sql(expression)
|
|
1280
|
+
suffix = expression.meta.get("ydb_string_suffix")
|
|
1281
|
+
return f"{sql}{suffix}" if suffix else sql
|
|
1282
|
+
|
|
901
1283
|
def declareitem_sql(self, expression: exp.DeclareItem) -> str:
|
|
902
1284
|
name = self.sql(expression, "this")
|
|
903
1285
|
kind = self.sql(expression, "kind")
|
|
@@ -919,6 +1301,48 @@ class YDB(Dialect):
|
|
|
919
1301
|
sql = f"Tuple<{inner}>"
|
|
920
1302
|
return f"Optional<{sql}>" if expression.args.get("nullable") else sql
|
|
921
1303
|
|
|
1304
|
+
def ydbatstring_sql(self, expression: YdbAtString) -> str:
|
|
1305
|
+
return f"@@{expression.this}@@"
|
|
1306
|
+
|
|
1307
|
+
def ydbpostfixcall_sql(self, expression: YdbPostfixCall) -> str:
|
|
1308
|
+
this = self.sql(expression, "this")
|
|
1309
|
+
args = self.expressions(expression, flat=True)
|
|
1310
|
+
return f"{this}({args})"
|
|
1311
|
+
|
|
1312
|
+
def ydblambdablock_sql(self, expression: YdbLambdaBlock) -> str:
|
|
1313
|
+
assignments = [self.sql(assignment) for assignment in expression.expressions]
|
|
1314
|
+
statements = [*assignments, f"RETURN {self.sql(expression, 'this')}"]
|
|
1315
|
+
return "{ " + "; ".join(statements) + " }"
|
|
1316
|
+
|
|
1317
|
+
def in_sql(self, expression: exp.In) -> str:
|
|
1318
|
+
if expression.meta.get("compact"):
|
|
1319
|
+
return f"{self.sql(expression, 'this')} IN COMPACT {self.sql(expression, 'field')}"
|
|
1320
|
+
return super().in_sql(expression)
|
|
1321
|
+
|
|
1322
|
+
def maybe_comment(
|
|
1323
|
+
self,
|
|
1324
|
+
sql: str,
|
|
1325
|
+
expression: t.Optional[exp.Expression] = None,
|
|
1326
|
+
comments: t.Optional[t.List[str]] = None,
|
|
1327
|
+
separated: bool = False,
|
|
1328
|
+
) -> str:
|
|
1329
|
+
comments = (
|
|
1330
|
+
((expression and expression.comments) if comments is None else comments)
|
|
1331
|
+
if self.comments
|
|
1332
|
+
else None
|
|
1333
|
+
)
|
|
1334
|
+
if not comments:
|
|
1335
|
+
return sql
|
|
1336
|
+
|
|
1337
|
+
line_directives = [comment.strip() for comment in comments if comment.strip().startswith("!")]
|
|
1338
|
+
if not line_directives:
|
|
1339
|
+
return super().maybe_comment(sql, expression, comments=comments, separated=separated)
|
|
1340
|
+
|
|
1341
|
+
remaining_comments = [comment for comment in comments if not comment.strip().startswith("!")]
|
|
1342
|
+
sql = super().maybe_comment(sql, expression, comments=remaining_comments, separated=separated)
|
|
1343
|
+
prefix = "\n".join(f"--{directive}" for directive in line_directives)
|
|
1344
|
+
return f"{prefix}\n{sql}" if sql else prefix
|
|
1345
|
+
|
|
922
1346
|
def alias_sql(self, expression: exp.Alias) -> str:
|
|
923
1347
|
alias = expression.args.get("alias")
|
|
924
1348
|
if alias and alias.name.startswith("$"):
|
|
@@ -1063,6 +1487,12 @@ class YDB(Dialect):
|
|
|
1063
1487
|
"""
|
|
1064
1488
|
nullable = expression.args.get("nullable")
|
|
1065
1489
|
|
|
1490
|
+
def _struct_field_sql(field: exp.Expression) -> str:
|
|
1491
|
+
if isinstance(field, exp.Identifier) and field.args.get("quoted"):
|
|
1492
|
+
name = field.name.replace("'", "\\'")
|
|
1493
|
+
return f"'{name}'"
|
|
1494
|
+
return self.sql(field)
|
|
1495
|
+
|
|
1066
1496
|
# YDB generic container types rendered with <> syntax and correct casing
|
|
1067
1497
|
if expression.args.get("nested"):
|
|
1068
1498
|
type_value = expression.this
|
|
@@ -1080,6 +1510,15 @@ class YDB(Dialect):
|
|
|
1080
1510
|
sql = f"Tuple<{inner}>"
|
|
1081
1511
|
return f"Optional<{sql}>" if nullable else sql
|
|
1082
1512
|
|
|
1513
|
+
if type_value == exp.DataType.Type.STRUCT:
|
|
1514
|
+
inner = ", ".join(
|
|
1515
|
+
f"{_struct_field_sql(col.this)}: {self.sql(col, 'kind')}"
|
|
1516
|
+
for col in expression.expressions
|
|
1517
|
+
if isinstance(col, exp.ColumnDef)
|
|
1518
|
+
)
|
|
1519
|
+
sql = f"Struct<{inner}>"
|
|
1520
|
+
return f"Optional<{sql}>" if nullable else sql
|
|
1521
|
+
|
|
1083
1522
|
inner = ", ".join(self.sql(e) for e in expression.expressions)
|
|
1084
1523
|
name = {
|
|
1085
1524
|
exp.DataType.Type.LIST: "List",
|
|
@@ -1405,7 +1844,7 @@ class YDB(Dialect):
|
|
|
1405
1844
|
f"decorrelated in YDB — rewrite manually using a $variable subquery"
|
|
1406
1845
|
)
|
|
1407
1846
|
continue
|
|
1408
|
-
if scope.external_columns:
|
|
1847
|
+
if scope.external_columns and scope.scope_type != ScopeType.CTE:
|
|
1409
1848
|
self.decorrelate(select, parent, scope.external_columns, next_alias_name)
|
|
1410
1849
|
if scope.scope_type == ScopeType.SUBQUERY:
|
|
1411
1850
|
self.unnest(select, parent, next_alias_name)
|
|
@@ -1459,6 +1898,9 @@ class YDB(Dialect):
|
|
|
1459
1898
|
"""
|
|
1460
1899
|
Unnests a subquery by transforming it into a join
|
|
1461
1900
|
"""
|
|
1901
|
+
if isinstance(select.parent, exp.CTE):
|
|
1902
|
+
return
|
|
1903
|
+
|
|
1462
1904
|
if len(select.selects) > 1:
|
|
1463
1905
|
return
|
|
1464
1906
|
self.ensure_select_aliases(select)
|
|
@@ -2031,14 +2473,30 @@ class YDB(Dialect):
|
|
|
2031
2473
|
Returns:
|
|
2032
2474
|
YDB-specific SQL for lambda functions
|
|
2033
2475
|
"""
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2476
|
+
def _arg_name(arg: exp.Expression) -> str:
|
|
2477
|
+
if isinstance(arg, exp.Parameter):
|
|
2478
|
+
return arg.name
|
|
2479
|
+
return arg.name if hasattr(arg, "name") else self.sql(arg).lstrip("$")
|
|
2038
2480
|
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2481
|
+
def _arg_sql(arg: exp.Expression) -> str:
|
|
2482
|
+
name = _arg_name(arg)
|
|
2483
|
+
sql = f"${name}" if name and not name.startswith("$") else self.sql(arg)
|
|
2484
|
+
return f"{sql}?" if arg.meta.get("optional") else sql
|
|
2485
|
+
|
|
2486
|
+
def _prefix_lambda_refs(node: exp.Expression) -> exp.Expression:
|
|
2487
|
+
if (
|
|
2488
|
+
isinstance(node, exp.Identifier)
|
|
2489
|
+
and not node.name.startswith("$")
|
|
2490
|
+
):
|
|
2491
|
+
return exp.Identifier(this=f"${node.name}", quoted=False)
|
|
2492
|
+
return node
|
|
2493
|
+
|
|
2494
|
+
args = ", ".join(_arg_sql(arg) for arg in expression.expressions)
|
|
2495
|
+
body = expression.this.copy().transform(_prefix_lambda_refs)
|
|
2496
|
+
if isinstance(body, YdbLambdaBlock):
|
|
2497
|
+
return f"({args}) {arrow_sep} {self.sql(body)}"
|
|
2498
|
+
body_sql = self.sql(body)
|
|
2499
|
+
return f"({args}) {arrow_sep} {body_sql if isinstance(body, exp.Paren) else f'({body_sql})'}"
|
|
2042
2500
|
|
|
2043
2501
|
def _is_simple_expression(self, expr: exp.Expression) -> bool:
|
|
2044
2502
|
"""
|
|
@@ -2536,7 +2994,7 @@ class YDB(Dialect):
|
|
|
2536
2994
|
transformed.append(alias_map[gb_sql].copy())
|
|
2537
2995
|
elif isinstance(gb_expr, (exp.Column, exp.Identifier)):
|
|
2538
2996
|
# Add column AS alias so YDB resolves unambiguously.
|
|
2539
|
-
# Strip any table qualifier from the column (e.g. y.a
|
|
2997
|
+
# Strip any table qualifier from the column (e.g. y.a -> a).
|
|
2540
2998
|
# Use the SELECT-level alias if the column is aliased there
|
|
2541
2999
|
# (e.g. `a_id AS _u_1` in SELECT means GROUP BY `a_id AS _u_1`).
|
|
2542
3000
|
column_name = gb_expr.alias_or_name
|
|
@@ -2557,6 +3015,8 @@ class YDB(Dialect):
|
|
|
2557
3015
|
# Build the GROUP BY clause
|
|
2558
3016
|
if group_by_items:
|
|
2559
3017
|
result = f" GROUP BY {group_by_items}"
|
|
3018
|
+
elif not (rollup or cube or grouping_sets):
|
|
3019
|
+
return ""
|
|
2560
3020
|
else:
|
|
2561
3021
|
result = " GROUP BY"
|
|
2562
3022
|
|
|
@@ -2657,6 +3117,9 @@ class YDB(Dialect):
|
|
|
2657
3117
|
FlattenBy: lambda self, e: self.flattenby_sql(e),
|
|
2658
3118
|
AssumeOrderBy: lambda self, e: self.assumeorderby_sql(e),
|
|
2659
3119
|
YdbTuple: lambda self, e: self.ydbtuple_sql(e),
|
|
3120
|
+
YdbAtString: lambda self, e: self.ydbatstring_sql(e),
|
|
3121
|
+
YdbPostfixCall: lambda self, e: self.ydbpostfixcall_sql(e),
|
|
3122
|
+
YdbLambdaBlock: lambda self, e: self.ydblambdablock_sql(e),
|
|
2660
3123
|
exp.Create: create_sql,
|
|
2661
3124
|
exp.DefaultColumnConstraint: lambda self, e: "",
|
|
2662
3125
|
exp.DateTrunc: _date_trunc_sql,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ydb-sqlglot-plugin
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: YDB dialect plugin for sqlglot
|
|
5
5
|
Author: YDB Team
|
|
6
6
|
License: Apache-2.0
|
|
@@ -102,6 +102,31 @@ LEFT JOIN (
|
|
|
102
102
|
|
|
103
103
|
The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subqueries.
|
|
104
104
|
|
|
105
|
+
#### GROUP BY aliases
|
|
106
|
+
|
|
107
|
+
YDB accepts aliases directly inside `GROUP BY` items. The generator uses this
|
|
108
|
+
form for grouped columns so later clauses and decorrelated subqueries can refer
|
|
109
|
+
to a stable grouping name:
|
|
110
|
+
|
|
111
|
+
```sql
|
|
112
|
+
-- input
|
|
113
|
+
SELECT user_id, COUNT(*) FROM events GROUP BY user_id
|
|
114
|
+
|
|
115
|
+
-- output
|
|
116
|
+
SELECT user_id, COUNT(*) FROM `events` GROUP BY user_id AS user_id
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
If a grouped column is selected under a generated alias, the `GROUP BY` item uses
|
|
120
|
+
that alias as well:
|
|
121
|
+
|
|
122
|
+
```sql
|
|
123
|
+
SELECT user_id AS _u_1, COUNT(*) FROM `events` GROUP BY user_id AS _u_1
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Positional `GROUP BY` references are expanded before generation. When a
|
|
127
|
+
positional reference points to a constant expression, the grouping item is
|
|
128
|
+
removed because YDB rejects grouping by constants.
|
|
129
|
+
|
|
105
130
|
---
|
|
106
131
|
|
|
107
132
|
### YDB → any SQL
|
|
@@ -119,7 +144,10 @@ The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-
|
|
|
119
144
|
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
120
145
|
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
121
146
|
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
147
|
+
| `GROUP BY expr AS alias` | `SELECT v, COUNT(*) FROM t GROUP BY v AS v` |
|
|
122
148
|
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
149
|
+
| Lambda expressions | `($x, $y?) -> ($x + COALESCE($y, 0))`, `($y) -> { $p = "x"; RETURN $p \|\| $y }` |
|
|
150
|
+
| `IN COMPACT` | `WHERE key IN COMPACT $values` |
|
|
123
151
|
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
124
152
|
|
|
125
153
|
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
@@ -204,11 +232,30 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
204
232
|
|---|---|
|
|
205
233
|
| `ARRAY(v1, v2, ...)` | `AsList(v1, v2, ...)` |
|
|
206
234
|
| `ARRAY_LENGTH(x)` / `ARRAY_SIZE(x)` | `ListLength(x)` |
|
|
207
|
-
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) ->
|
|
208
|
-
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) ->
|
|
235
|
+
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) -> (cond))` |
|
|
236
|
+
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) -> (cond)))` |
|
|
209
237
|
| `ARRAY_AGG(x)` | `AGGREGATE_LIST(x)` |
|
|
210
238
|
| `UNNEST(x)` | `FLATTEN BY x` |
|
|
211
239
|
|
|
240
|
+
Lambda expressions are represented with sqlglot's standard `exp.Lambda` AST node.
|
|
241
|
+
When a source dialect parses lambdas, the YDB generator emits YQL lambda syntax:
|
|
242
|
+
|
|
243
|
+
```sql
|
|
244
|
+
-- DuckDB input
|
|
245
|
+
SELECT list_filter(arr, x -> x > 0) FROM t
|
|
246
|
+
|
|
247
|
+
-- YDB output
|
|
248
|
+
SELECT ListFilter(arr, ($x) -> ($x > 0)) FROM `t`
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
YDB input also supports documented YQL lambda forms, including optional
|
|
252
|
+
arguments and block bodies with local named expressions:
|
|
253
|
+
|
|
254
|
+
```sql
|
|
255
|
+
($x, $y?) -> ($x + COALESCE($y, 0));
|
|
256
|
+
($y) -> { $prefix = "x"; RETURN $prefix || $y; };
|
|
257
|
+
```
|
|
258
|
+
|
|
212
259
|
### Conditional / math
|
|
213
260
|
|
|
214
261
|
| Input | YQL output |
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
VERSION = "0.2.1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/requires.txt
RENAMED
|
File without changes
|
{ydb_sqlglot_plugin-0.2.1 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/top_level.txt
RENAMED
|
File without changes
|