ydb-sqlglot-plugin 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/PKG-INFO +50 -3
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/README.md +49 -2
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/pyproject.toml +1 -1
- ydb_sqlglot_plugin-0.2.3/ydb_sqlglot/version.py +1 -0
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot/ydb.py +441 -20
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/PKG-INFO +50 -3
- ydb_sqlglot_plugin-0.2.2/ydb_sqlglot/version.py +0 -1
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/LICENSE +0 -0
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/setup.cfg +0 -0
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot/__init__.py +0 -0
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/SOURCES.txt +0 -0
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/dependency_links.txt +0 -0
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/entry_points.txt +0 -0
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/requires.txt +0 -0
- {ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ydb-sqlglot-plugin
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: YDB dialect plugin for sqlglot
|
|
5
5
|
Author: YDB Team
|
|
6
6
|
License: Apache-2.0
|
|
@@ -102,6 +102,31 @@ LEFT JOIN (
|
|
|
102
102
|
|
|
103
103
|
The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subqueries.
|
|
104
104
|
|
|
105
|
+
#### GROUP BY aliases
|
|
106
|
+
|
|
107
|
+
YDB accepts aliases directly inside `GROUP BY` items. The generator uses this
|
|
108
|
+
form for grouped columns so later clauses and decorrelated subqueries can refer
|
|
109
|
+
to a stable grouping name:
|
|
110
|
+
|
|
111
|
+
```sql
|
|
112
|
+
-- input
|
|
113
|
+
SELECT user_id, COUNT(*) FROM events GROUP BY user_id
|
|
114
|
+
|
|
115
|
+
-- output
|
|
116
|
+
SELECT user_id, COUNT(*) FROM `events` GROUP BY user_id AS user_id
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
If a grouped column is selected under a generated alias, the `GROUP BY` item uses
|
|
120
|
+
that alias as well:
|
|
121
|
+
|
|
122
|
+
```sql
|
|
123
|
+
SELECT user_id AS _u_1, COUNT(*) FROM `events` GROUP BY user_id AS _u_1
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Positional `GROUP BY` references are expanded before generation. When a
|
|
127
|
+
positional reference points to a constant expression, the grouping item is
|
|
128
|
+
removed because YDB rejects grouping by constants.
|
|
129
|
+
|
|
105
130
|
---
|
|
106
131
|
|
|
107
132
|
### YDB → any SQL
|
|
@@ -119,7 +144,10 @@ The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-
|
|
|
119
144
|
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
120
145
|
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
121
146
|
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
147
|
+
| `GROUP BY expr AS alias` | `SELECT v, COUNT(*) FROM t GROUP BY v AS v` |
|
|
122
148
|
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
149
|
+
| Lambda expressions | `($x, $y?) -> ($x + COALESCE($y, 0))`, `($y) -> { $p = "x"; RETURN $p \|\| $y }` |
|
|
150
|
+
| `IN COMPACT` | `WHERE key IN COMPACT $values` |
|
|
123
151
|
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
124
152
|
|
|
125
153
|
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
@@ -204,11 +232,30 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
204
232
|
|---|---|
|
|
205
233
|
| `ARRAY(v1, v2, ...)` | `AsList(v1, v2, ...)` |
|
|
206
234
|
| `ARRAY_LENGTH(x)` / `ARRAY_SIZE(x)` | `ListLength(x)` |
|
|
207
|
-
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) ->
|
|
208
|
-
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) ->
|
|
235
|
+
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) -> (cond))` |
|
|
236
|
+
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) -> (cond)))` |
|
|
209
237
|
| `ARRAY_AGG(x)` | `AGGREGATE_LIST(x)` |
|
|
210
238
|
| `UNNEST(x)` | `FLATTEN BY x` |
|
|
211
239
|
|
|
240
|
+
Lambda expressions are represented with sqlglot's standard `exp.Lambda` AST node.
|
|
241
|
+
When a source dialect parses lambdas, the YDB generator emits YQL lambda syntax:
|
|
242
|
+
|
|
243
|
+
```sql
|
|
244
|
+
-- DuckDB input
|
|
245
|
+
SELECT list_filter(arr, x -> x > 0) FROM t
|
|
246
|
+
|
|
247
|
+
-- YDB output
|
|
248
|
+
SELECT ListFilter(arr, ($x) -> ($x > 0)) FROM `t`
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
YDB input also supports documented YQL lambda forms, including optional
|
|
252
|
+
arguments and block bodies with local named expressions:
|
|
253
|
+
|
|
254
|
+
```sql
|
|
255
|
+
($x, $y?) -> ($x + COALESCE($y, 0));
|
|
256
|
+
($y) -> { $prefix = "x"; RETURN $prefix || $y; };
|
|
257
|
+
```
|
|
258
|
+
|
|
212
259
|
### Conditional / math
|
|
213
260
|
|
|
214
261
|
| Input | YQL output |
|
|
@@ -75,6 +75,31 @@ LEFT JOIN (
|
|
|
75
75
|
|
|
76
76
|
The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subqueries.
|
|
77
77
|
|
|
78
|
+
#### GROUP BY aliases
|
|
79
|
+
|
|
80
|
+
YDB accepts aliases directly inside `GROUP BY` items. The generator uses this
|
|
81
|
+
form for grouped columns so later clauses and decorrelated subqueries can refer
|
|
82
|
+
to a stable grouping name:
|
|
83
|
+
|
|
84
|
+
```sql
|
|
85
|
+
-- input
|
|
86
|
+
SELECT user_id, COUNT(*) FROM events GROUP BY user_id
|
|
87
|
+
|
|
88
|
+
-- output
|
|
89
|
+
SELECT user_id, COUNT(*) FROM `events` GROUP BY user_id AS user_id
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
If a grouped column is selected under a generated alias, the `GROUP BY` item uses
|
|
93
|
+
that alias as well:
|
|
94
|
+
|
|
95
|
+
```sql
|
|
96
|
+
SELECT user_id AS _u_1, COUNT(*) FROM `events` GROUP BY user_id AS _u_1
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Positional `GROUP BY` references are expanded before generation. When a
|
|
100
|
+
positional reference points to a constant expression, the grouping item is
|
|
101
|
+
removed because YDB rejects grouping by constants.
|
|
102
|
+
|
|
78
103
|
---
|
|
79
104
|
|
|
80
105
|
### YDB → any SQL
|
|
@@ -92,7 +117,10 @@ The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-
|
|
|
92
117
|
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
93
118
|
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
94
119
|
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
120
|
+
| `GROUP BY expr AS alias` | `SELECT v, COUNT(*) FROM t GROUP BY v AS v` |
|
|
95
121
|
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
122
|
+
| Lambda expressions | `($x, $y?) -> ($x + COALESCE($y, 0))`, `($y) -> { $p = "x"; RETURN $p \|\| $y }` |
|
|
123
|
+
| `IN COMPACT` | `WHERE key IN COMPACT $values` |
|
|
96
124
|
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
97
125
|
|
|
98
126
|
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
@@ -177,11 +205,30 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
177
205
|
|---|---|
|
|
178
206
|
| `ARRAY(v1, v2, ...)` | `AsList(v1, v2, ...)` |
|
|
179
207
|
| `ARRAY_LENGTH(x)` / `ARRAY_SIZE(x)` | `ListLength(x)` |
|
|
180
|
-
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) ->
|
|
181
|
-
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) ->
|
|
208
|
+
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) -> (cond))` |
|
|
209
|
+
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) -> (cond)))` |
|
|
182
210
|
| `ARRAY_AGG(x)` | `AGGREGATE_LIST(x)` |
|
|
183
211
|
| `UNNEST(x)` | `FLATTEN BY x` |
|
|
184
212
|
|
|
213
|
+
Lambda expressions are represented with sqlglot's standard `exp.Lambda` AST node.
|
|
214
|
+
When a source dialect parses lambdas, the YDB generator emits YQL lambda syntax:
|
|
215
|
+
|
|
216
|
+
```sql
|
|
217
|
+
-- DuckDB input
|
|
218
|
+
SELECT list_filter(arr, x -> x > 0) FROM t
|
|
219
|
+
|
|
220
|
+
-- YDB output
|
|
221
|
+
SELECT ListFilter(arr, ($x) -> ($x > 0)) FROM `t`
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
YDB input also supports documented YQL lambda forms, including optional
|
|
225
|
+
arguments and block bodies with local named expressions:
|
|
226
|
+
|
|
227
|
+
```sql
|
|
228
|
+
($x, $y?) -> ($x + COALESCE($y, 0));
|
|
229
|
+
($y) -> { $prefix = "x"; RETURN $prefix || $y; };
|
|
230
|
+
```
|
|
231
|
+
|
|
185
232
|
### Conditional / math
|
|
186
233
|
|
|
187
234
|
| Input | YQL output |
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = "0.2.3"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import inspect as _inspect
|
|
2
2
|
import re
|
|
3
3
|
import typing as t
|
|
4
|
+
from collections import defaultdict
|
|
4
5
|
|
|
5
6
|
from sqlglot import Generator, TokenType, exp, generator, parser, tokens, transforms
|
|
6
7
|
from sqlglot.dialects.dialect import Dialect, NormalizationStrategy, concat_to_dpipe_sql, unit_to_var
|
|
@@ -176,6 +177,8 @@ def _wrap_udf_group_by(expression: exp.Expression) -> None:
|
|
|
176
177
|
# Resolve each GROUP BY expression to its "effective" form
|
|
177
178
|
# (expand alias references to the aliased expression).
|
|
178
179
|
def _effective(e: exp.Expression) -> exp.Expression:
|
|
180
|
+
if isinstance(e, exp.Alias):
|
|
181
|
+
return e.this
|
|
179
182
|
if isinstance(e, exp.Column):
|
|
180
183
|
name = e.name
|
|
181
184
|
if name in alias_to_expr:
|
|
@@ -419,6 +422,21 @@ class YdbTuple(exp.Expression):
|
|
|
419
422
|
arg_types = {"expressions": True, "nullable": False}
|
|
420
423
|
|
|
421
424
|
|
|
425
|
+
class YdbAtString(exp.Expression):
|
|
426
|
+
"""YDB @@...@@ string literal."""
|
|
427
|
+
arg_types = {"this": True}
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
class YdbPostfixCall(exp.Expression):
|
|
431
|
+
"""YDB call of an expression result, e.g. $grep(x) or DateTime::Format(fmt)(ts)."""
|
|
432
|
+
arg_types = {"this": True, "expressions": False}
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
class YdbLambdaBlock(exp.Expression):
|
|
436
|
+
"""YDB lambda body with local named expressions followed by RETURN."""
|
|
437
|
+
arg_types = {"this": True, "expressions": False}
|
|
438
|
+
|
|
439
|
+
|
|
422
440
|
# Container types that use Generic<T, ...> syntax in YDB
|
|
423
441
|
_YDB_GENERIC_TYPES = {
|
|
424
442
|
"List": exp.DataType.Type.LIST,
|
|
@@ -457,6 +475,9 @@ def _reassemble_ctes(
|
|
|
457
475
|
pending_names.clear()
|
|
458
476
|
|
|
459
477
|
for stmt in statements:
|
|
478
|
+
if stmt is None:
|
|
479
|
+
continue
|
|
480
|
+
|
|
460
481
|
if (
|
|
461
482
|
isinstance(stmt, exp.Alias)
|
|
462
483
|
and isinstance(stmt.args.get("alias"), exp.Identifier)
|
|
@@ -467,6 +488,10 @@ def _reassemble_ctes(
|
|
|
467
488
|
# Unwrap Subquery — CTE.this must be Select, not Subquery
|
|
468
489
|
if isinstance(inner, exp.Subquery):
|
|
469
490
|
inner = inner.this
|
|
491
|
+
if not isinstance(inner, exp.Query):
|
|
492
|
+
_flush_as_aliases()
|
|
493
|
+
result.append(stmt)
|
|
494
|
+
continue
|
|
470
495
|
# Replace any $prev_cte refs inside this CTE body
|
|
471
496
|
inner = _replace_param_table_refs(inner, pending_names)
|
|
472
497
|
pending_aliases.append(stmt)
|
|
@@ -580,16 +605,74 @@ class YDB(Dialect):
|
|
|
580
605
|
),
|
|
581
606
|
}
|
|
582
607
|
|
|
608
|
+
LAMBDAS = {
|
|
609
|
+
**parser.Parser.LAMBDAS,
|
|
610
|
+
TokenType.ARROW: lambda self, expressions: self._parse_ydb_lambda(expressions),
|
|
611
|
+
}
|
|
612
|
+
|
|
583
613
|
STATEMENT_PARSERS = {
|
|
584
614
|
**parser.Parser.STATEMENT_PARSERS,
|
|
585
615
|
TokenType.DECLARE: lambda self: self._parse_ydb_declare(),
|
|
586
616
|
TokenType.PARAMETER: lambda self: self._parse_ydb_named_expr(),
|
|
587
617
|
}
|
|
588
618
|
|
|
619
|
+
PRIMARY_PARSERS = {
|
|
620
|
+
**parser.Parser.PRIMARY_PARSERS,
|
|
621
|
+
TokenType.STRING: lambda self, token: self._parse_ydb_string(token),
|
|
622
|
+
}
|
|
623
|
+
|
|
589
624
|
def parse(self, raw_tokens, sql=None):
|
|
590
|
-
|
|
625
|
+
self.reset()
|
|
626
|
+
self.sql = sql or ""
|
|
627
|
+
|
|
628
|
+
chunks: t.List[t.List[tokens.Token]] = [[]]
|
|
629
|
+
brace_depth = 0
|
|
630
|
+
total = len(raw_tokens)
|
|
631
|
+
|
|
632
|
+
for i, token in enumerate(raw_tokens):
|
|
633
|
+
if token.token_type == TokenType.L_BRACE:
|
|
634
|
+
brace_depth += 1
|
|
635
|
+
elif token.token_type == TokenType.R_BRACE and brace_depth:
|
|
636
|
+
brace_depth -= 1
|
|
637
|
+
|
|
638
|
+
if token.token_type == TokenType.SEMICOLON and not brace_depth:
|
|
639
|
+
if token.comments:
|
|
640
|
+
chunks.append([token])
|
|
641
|
+
if i < total - 1:
|
|
642
|
+
chunks.append([])
|
|
643
|
+
else:
|
|
644
|
+
chunks[-1].append(token)
|
|
645
|
+
|
|
646
|
+
self._chunks = chunks
|
|
647
|
+
statements = self._parse_ydb_chunks(self.__class__._parse_statement, chunks)
|
|
591
648
|
return _reassemble_ctes(statements)
|
|
592
649
|
|
|
650
|
+
def _parse_ydb_chunks(self, parse_method, chunks):
|
|
651
|
+
expressions = []
|
|
652
|
+
|
|
653
|
+
# sqlglot >= 29 has this helper; 28.x only exposes the lower-level
|
|
654
|
+
# parser state, so keep a local fallback for the supported range.
|
|
655
|
+
if hasattr(self, "_parse_batch_statements"):
|
|
656
|
+
return self._parse_batch_statements(
|
|
657
|
+
parse_method=parse_method,
|
|
658
|
+
sep_first_statement=False,
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
for chunk in chunks:
|
|
662
|
+
self._index = -1
|
|
663
|
+
self._tokens = chunk
|
|
664
|
+
self._tokens_size = len(chunk)
|
|
665
|
+
self._advance()
|
|
666
|
+
|
|
667
|
+
expressions.append(parse_method(self))
|
|
668
|
+
|
|
669
|
+
if self._index < len(self._tokens):
|
|
670
|
+
self.raise_error("Invalid expression / Unexpected token")
|
|
671
|
+
|
|
672
|
+
self.check_errors()
|
|
673
|
+
|
|
674
|
+
return expressions
|
|
675
|
+
|
|
593
676
|
def expression(self, exp_class_or_instance, token=None, comments=None, **kwargs):
|
|
594
677
|
"""Bridge sqlglot's two `Parser.expression()` calling conventions.
|
|
595
678
|
|
|
@@ -642,9 +725,40 @@ class YDB(Dialect):
|
|
|
642
725
|
)
|
|
643
726
|
)
|
|
644
727
|
|
|
728
|
+
def _parse_lambda_arg(self) -> t.Optional[exp.Expression]:
|
|
729
|
+
if self._match(TokenType.PARAMETER):
|
|
730
|
+
name = self._parse_var(any_token=True)
|
|
731
|
+
if not name:
|
|
732
|
+
return None
|
|
733
|
+
|
|
734
|
+
parameter = exp.Parameter(this=name)
|
|
735
|
+
if self._match(TokenType.PLACEHOLDER):
|
|
736
|
+
parameter.meta["optional"] = True
|
|
737
|
+
return self.expression(parameter)
|
|
738
|
+
|
|
739
|
+
arg = super()._parse_lambda_arg()
|
|
740
|
+
if arg and self._match(TokenType.PLACEHOLDER):
|
|
741
|
+
arg.meta["optional"] = True
|
|
742
|
+
return arg
|
|
743
|
+
|
|
645
744
|
def _parse_ydb_declare(self) -> exp.Declare:
|
|
646
745
|
items = self._parse_csv(self._parse_ydb_declareitem)
|
|
647
|
-
|
|
746
|
+
declare = self.expression(exp.Declare(expressions=items))
|
|
747
|
+
|
|
748
|
+
item_comments = {
|
|
749
|
+
comment
|
|
750
|
+
for item in items
|
|
751
|
+
if item is not None
|
|
752
|
+
for comment in (item.comments or [])
|
|
753
|
+
}
|
|
754
|
+
if item_comments and declare.comments:
|
|
755
|
+
declare.comments = [
|
|
756
|
+
comment
|
|
757
|
+
for comment in declare.comments
|
|
758
|
+
if comment.strip().startswith("!") or comment not in item_comments
|
|
759
|
+
]
|
|
760
|
+
|
|
761
|
+
return declare
|
|
648
762
|
|
|
649
763
|
def _parse_ydb_declareitem(self) -> t.Optional[exp.DeclareItem]:
|
|
650
764
|
if not self._match(TokenType.PARAMETER):
|
|
@@ -654,7 +768,61 @@ class YDB(Dialect):
|
|
|
654
768
|
return None
|
|
655
769
|
self._match(TokenType.ALIAS)
|
|
656
770
|
kind = self._parse_types()
|
|
657
|
-
|
|
771
|
+
comments = self._prev.comments if self._prev else None
|
|
772
|
+
return self.expression(
|
|
773
|
+
exp.DeclareItem(this=name, kind=kind),
|
|
774
|
+
comments=comments,
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
def _parse_ydb_string(self, token: tokens.Token) -> exp.Literal:
|
|
778
|
+
literal = self.expression(exp.Literal(this=token.text, is_string=True), token)
|
|
779
|
+
if (
|
|
780
|
+
self._curr
|
|
781
|
+
and self._curr.token_type == TokenType.VAR
|
|
782
|
+
and self._curr.text.lower() == "u"
|
|
783
|
+
and token.end + 1 == self._curr.start
|
|
784
|
+
):
|
|
785
|
+
literal.meta["ydb_string_suffix"] = self._curr.text
|
|
786
|
+
self._advance()
|
|
787
|
+
return literal
|
|
788
|
+
|
|
789
|
+
def _parse_unary(self) -> t.Optional[exp.Expression]:
|
|
790
|
+
return self._parse_ydb_postfix_calls(super()._parse_unary())
|
|
791
|
+
|
|
792
|
+
def _parse_type(
|
|
793
|
+
self,
|
|
794
|
+
parse_interval: bool = True,
|
|
795
|
+
fallback_to_identifier: bool = False,
|
|
796
|
+
) -> t.Optional[exp.Expression]:
|
|
797
|
+
if (
|
|
798
|
+
self._curr
|
|
799
|
+
and self._next
|
|
800
|
+
and self._curr.token_type == TokenType.STRING
|
|
801
|
+
and self._next.token_type == TokenType.VAR
|
|
802
|
+
and self._next.text.lower() == "u"
|
|
803
|
+
and self._curr.end + 1 == self._next.start
|
|
804
|
+
):
|
|
805
|
+
token = self._curr
|
|
806
|
+
suffix = self._next.text
|
|
807
|
+
self._advance(2)
|
|
808
|
+
literal = self.expression(exp.Literal(this=token.text, is_string=True), token)
|
|
809
|
+
literal.meta["ydb_string_suffix"] = suffix
|
|
810
|
+
return literal
|
|
811
|
+
|
|
812
|
+
return super()._parse_type(
|
|
813
|
+
parse_interval=parse_interval,
|
|
814
|
+
fallback_to_identifier=fallback_to_identifier,
|
|
815
|
+
)
|
|
816
|
+
|
|
817
|
+
def _parse_ydb_postfix_calls(
|
|
818
|
+
self,
|
|
819
|
+
expression: t.Optional[exp.Expression],
|
|
820
|
+
) -> t.Optional[exp.Expression]:
|
|
821
|
+
while expression is not None and self._match(TokenType.L_PAREN):
|
|
822
|
+
args = self._parse_function_args(alias=True)
|
|
823
|
+
self._match_r_paren(expression)
|
|
824
|
+
expression = self.expression(YdbPostfixCall(this=expression, expressions=args))
|
|
825
|
+
return expression
|
|
658
826
|
|
|
659
827
|
def _parse_types(self, *args, **kwargs) -> t.Optional[exp.Expression]:
|
|
660
828
|
# YDB generic types use Name<...> syntax; token type varies by keyword status
|
|
@@ -683,6 +851,17 @@ class YDB(Dialect):
|
|
|
683
851
|
nested=True,
|
|
684
852
|
)
|
|
685
853
|
|
|
854
|
+
if name == "Struct":
|
|
855
|
+
self._advance() # consume 'Struct'
|
|
856
|
+
self._advance() # consume '<'
|
|
857
|
+
fields = self._parse_csv(self._parse_ydb_struct_field)
|
|
858
|
+
self._match(TokenType.GT)
|
|
859
|
+
return exp.DataType(
|
|
860
|
+
this=exp.DataType.Type.STRUCT,
|
|
861
|
+
expressions=[field for field in fields if field],
|
|
862
|
+
nested=True,
|
|
863
|
+
)
|
|
864
|
+
|
|
686
865
|
if name == "Tuple":
|
|
687
866
|
self._advance() # consume 'Tuple'
|
|
688
867
|
self._advance() # consume '<'
|
|
@@ -707,13 +886,28 @@ class YDB(Dialect):
|
|
|
707
886
|
dtype.set("nullable", True)
|
|
708
887
|
return dtype
|
|
709
888
|
|
|
889
|
+
def _parse_ydb_struct_field(self) -> t.Optional[exp.ColumnDef]:
|
|
890
|
+
if self._curr and self._curr.token_type == TokenType.GT:
|
|
891
|
+
return None
|
|
892
|
+
|
|
893
|
+
field = self._parse_id_var(any_token=True)
|
|
894
|
+
if not field:
|
|
895
|
+
return None
|
|
896
|
+
|
|
897
|
+
if not self._match(TokenType.COLON):
|
|
898
|
+
self.raise_error("Expected colon after struct field name")
|
|
899
|
+
|
|
900
|
+
kind = self._parse_types()
|
|
901
|
+
return self.expression(exp.ColumnDef(this=field, kind=kind))
|
|
902
|
+
|
|
710
903
|
def _parse_table_alias(self, alias_tokens=None):
|
|
711
904
|
# Prevent YDB-specific keywords from being consumed as table aliases
|
|
712
|
-
if self._curr and self._curr.text.upper() in ("FLATTEN", "ASSUME"):
|
|
905
|
+
if self._curr and self._curr.text.upper() in ("FLATTEN", "ASSUME", "VIEW"):
|
|
713
906
|
# Also check that what follows is a YDB construct, not a regular alias
|
|
714
907
|
if self._next and (
|
|
715
908
|
self._next.text.upper() in ("BY", "LIST", "DICT")
|
|
716
909
|
or self._next.token_type == TokenType.ORDER_BY
|
|
910
|
+
or self._curr.text.upper() == "VIEW"
|
|
717
911
|
):
|
|
718
912
|
return None
|
|
719
913
|
return super()._parse_table_alias(alias_tokens=alias_tokens)
|
|
@@ -731,8 +925,70 @@ class YDB(Dialect):
|
|
|
731
925
|
this.set("order", self.expression(AssumeOrderBy(this=order)))
|
|
732
926
|
return super()._parse_query_modifiers(this)
|
|
733
927
|
|
|
928
|
+
def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]:
|
|
929
|
+
# This mirrors sqlglot's Parser._parse_group control flow so YDB keeps
|
|
930
|
+
# support for GROUP BY modifiers (WITH ROLLUP, CUBE, GROUPING SETS,
|
|
931
|
+
# TOTALS). The YDB-specific change is parsing each group item through
|
|
932
|
+
# _parse_alias(..., explicit=True), enabling `GROUP BY expr AS alias`.
|
|
933
|
+
if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
|
|
934
|
+
return None
|
|
935
|
+
comments = self._prev_comments
|
|
936
|
+
|
|
937
|
+
elements: dict[str, t.Any] = defaultdict(list)
|
|
938
|
+
|
|
939
|
+
if self._match(TokenType.ALL):
|
|
940
|
+
elements["all"] = True
|
|
941
|
+
elif self._match(TokenType.DISTINCT):
|
|
942
|
+
elements["all"] = False
|
|
943
|
+
|
|
944
|
+
if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False):
|
|
945
|
+
return self.expression(exp.Group(**elements), comments=comments)
|
|
946
|
+
|
|
947
|
+
def _parse_group_expression():
|
|
948
|
+
if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False):
|
|
949
|
+
return None
|
|
950
|
+
return self._parse_alias(self._parse_disjunction(), explicit=True)
|
|
951
|
+
|
|
952
|
+
while True:
|
|
953
|
+
index = self._index
|
|
954
|
+
|
|
955
|
+
elements["expressions"].extend(self._parse_csv(_parse_group_expression))
|
|
956
|
+
|
|
957
|
+
before_with_index = self._index
|
|
958
|
+
with_prefix = self._match(TokenType.WITH)
|
|
959
|
+
|
|
960
|
+
if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix):
|
|
961
|
+
key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube"
|
|
962
|
+
elements[key].append(cube_or_rollup)
|
|
963
|
+
elif grouping_sets := self._parse_grouping_sets():
|
|
964
|
+
elements["grouping_sets"].append(grouping_sets)
|
|
965
|
+
elif self._match_text_seq("TOTALS"):
|
|
966
|
+
elements["totals"] = True
|
|
967
|
+
|
|
968
|
+
if before_with_index <= self._index <= before_with_index + 1:
|
|
969
|
+
self._retreat(before_with_index)
|
|
970
|
+
break
|
|
971
|
+
|
|
972
|
+
if index == self._index:
|
|
973
|
+
break
|
|
974
|
+
|
|
975
|
+
return self.expression(exp.Group(**elements), comments=comments)
|
|
976
|
+
|
|
734
977
|
def _parse_table(self, *args, **kwargs) -> t.Optional[exp.Expression]:
|
|
978
|
+
if self._match(TokenType.L_BRACKET):
|
|
979
|
+
parts = []
|
|
980
|
+
while self._curr and not self._match(TokenType.R_BRACKET, advance=False):
|
|
981
|
+
parts.append(self._curr.text)
|
|
982
|
+
self._advance()
|
|
983
|
+
self._match(TokenType.R_BRACKET)
|
|
984
|
+
table = self.expression(exp.Table(this=exp.to_identifier("".join(parts))))
|
|
985
|
+
table.set("alias", self._parse_table_alias())
|
|
986
|
+
return table
|
|
987
|
+
|
|
735
988
|
table = super()._parse_table(*args, **kwargs)
|
|
989
|
+
if table and self._match(TokenType.VIEW):
|
|
990
|
+
table.set("ydb_index_view", self._parse_id_var(any_token=True))
|
|
991
|
+
table.set("alias", self._parse_table_alias())
|
|
736
992
|
if table and self._curr and self._curr.text.upper() == "FLATTEN":
|
|
737
993
|
self._advance()
|
|
738
994
|
kind: t.Optional[str] = None
|
|
@@ -762,8 +1018,22 @@ class YDB(Dialect):
|
|
|
762
1018
|
return exp.EQ(this=key, expression=value)
|
|
763
1019
|
|
|
764
1020
|
def _parse_primary(self) -> t.Optional[exp.Expression]:
|
|
1021
|
+
if (
|
|
1022
|
+
self._curr
|
|
1023
|
+
and self._curr.token_type == TokenType.PARAMETER
|
|
1024
|
+
and self._next
|
|
1025
|
+
and self._next.token_type == TokenType.PARAMETER
|
|
1026
|
+
):
|
|
1027
|
+
return self._parse_at_raw_string()
|
|
1028
|
+
|
|
765
1029
|
if self._match(TokenType.L_PAREN):
|
|
766
1030
|
comments = self._prev_comments
|
|
1031
|
+
if self._next_matching_rparen_is_arrow():
|
|
1032
|
+
expressions = self._parse_csv(self._parse_lambda_arg)
|
|
1033
|
+
self._match_r_paren()
|
|
1034
|
+
self._match(TokenType.ARROW)
|
|
1035
|
+
return self._parse_ydb_lambda(expressions)
|
|
1036
|
+
|
|
767
1037
|
query = self._parse_select()
|
|
768
1038
|
|
|
769
1039
|
if query:
|
|
@@ -797,6 +1067,42 @@ class YDB(Dialect):
|
|
|
797
1067
|
return this
|
|
798
1068
|
return super()._parse_primary()
|
|
799
1069
|
|
|
1070
|
+
def _next_matching_rparen_is_arrow(self) -> bool:
|
|
1071
|
+
depth = 1
|
|
1072
|
+
# _tokens_size is not available in all supported sqlglot versions.
|
|
1073
|
+
for i in range(self._index, len(self._tokens)):
|
|
1074
|
+
token = self._tokens[i]
|
|
1075
|
+
if token.token_type == TokenType.L_PAREN:
|
|
1076
|
+
depth += 1
|
|
1077
|
+
elif token.token_type == TokenType.R_PAREN:
|
|
1078
|
+
depth -= 1
|
|
1079
|
+
if depth == 0:
|
|
1080
|
+
return (
|
|
1081
|
+
i + 1 < len(self._tokens)
|
|
1082
|
+
and self._tokens[i + 1].token_type == TokenType.ARROW
|
|
1083
|
+
)
|
|
1084
|
+
return False
|
|
1085
|
+
|
|
1086
|
+
def _parse_at_raw_string(self) -> YdbAtString:
|
|
1087
|
+
self._advance()
|
|
1088
|
+
self._advance()
|
|
1089
|
+
|
|
1090
|
+
parts = []
|
|
1091
|
+
while self._curr:
|
|
1092
|
+
if (
|
|
1093
|
+
self._curr.token_type == TokenType.PARAMETER
|
|
1094
|
+
and self._next
|
|
1095
|
+
and self._next.token_type == TokenType.PARAMETER
|
|
1096
|
+
):
|
|
1097
|
+
self._advance()
|
|
1098
|
+
self._advance()
|
|
1099
|
+
break
|
|
1100
|
+
|
|
1101
|
+
parts.append(self._curr.text)
|
|
1102
|
+
self._advance()
|
|
1103
|
+
|
|
1104
|
+
return self.expression(YdbAtString(this="".join(parts)))
|
|
1105
|
+
|
|
800
1106
|
def _parse_lambda_body(self, params):
|
|
801
1107
|
if (
|
|
802
1108
|
self._curr is None
|
|
@@ -807,17 +1113,42 @@ class YDB(Dialect):
|
|
|
807
1113
|
return None
|
|
808
1114
|
self._advance()
|
|
809
1115
|
self._advance()
|
|
810
|
-
self.
|
|
1116
|
+
return self._parse_ydb_lambda(params)
|
|
1117
|
+
|
|
1118
|
+
def _parse_ydb_lambda(self, params):
|
|
1119
|
+
has_brace = self._match(TokenType.L_BRACE)
|
|
1120
|
+
assignments = []
|
|
1121
|
+
|
|
1122
|
+
if has_brace:
|
|
1123
|
+
while self._curr and self._curr.text != "RETURN":
|
|
1124
|
+
assignment = self._parse_ydb_named_expr()
|
|
1125
|
+
if not assignment:
|
|
1126
|
+
self.raise_error("Expected lambda body expression after '->'")
|
|
1127
|
+
assignments.append(assignment)
|
|
1128
|
+
self._match(TokenType.SEMICOLON)
|
|
1129
|
+
|
|
1130
|
+
if not self._match_text_seq("RETURN"):
|
|
1131
|
+
self.raise_error("Expected lambda body RETURN after '->'")
|
|
1132
|
+
|
|
1133
|
+
body = self._parse_disjunction()
|
|
811
1134
|
|
|
812
|
-
if not (self._curr.text == "RETURN"):
|
|
813
|
-
self.raise_error("Expected lambda body expression after '->'")
|
|
814
|
-
self._advance()
|
|
815
|
-
body = self._parse_conjunction()
|
|
816
1135
|
if not body:
|
|
817
1136
|
self.raise_error("Expected lambda body expression after '->'")
|
|
818
1137
|
|
|
819
|
-
self._match(TokenType.
|
|
820
|
-
|
|
1138
|
+
self._match(TokenType.SEMICOLON)
|
|
1139
|
+
if has_brace:
|
|
1140
|
+
self._match(TokenType.R_BRACE, expression=body)
|
|
1141
|
+
if assignments:
|
|
1142
|
+
body = self.expression(YdbLambdaBlock(this=body, expressions=assignments))
|
|
1143
|
+
|
|
1144
|
+
return self.expression(exp.Lambda(this=body, expressions=params))
|
|
1145
|
+
|
|
1146
|
+
def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In:
|
|
1147
|
+
if self._match_text_seq("COMPACT"):
|
|
1148
|
+
expression = self.expression(exp.In(this=this, field=self._parse_column()))
|
|
1149
|
+
expression.meta["compact"] = True
|
|
1150
|
+
return expression
|
|
1151
|
+
return super()._parse_in(this, alias=alias)
|
|
821
1152
|
|
|
822
1153
|
class Generator(generator.Generator):
|
|
823
1154
|
"""
|
|
@@ -912,6 +1243,10 @@ class YDB(Dialect):
|
|
|
912
1243
|
prefix = f"{expression.db}/" if expression.db else ""
|
|
913
1244
|
sql = f"`{prefix}{expression.name}`"
|
|
914
1245
|
|
|
1246
|
+
ydb_index_view = self.sql(expression, "ydb_index_view")
|
|
1247
|
+
if ydb_index_view:
|
|
1248
|
+
sql += f" VIEW {ydb_index_view}"
|
|
1249
|
+
|
|
915
1250
|
if expression.alias:
|
|
916
1251
|
sql += f" AS {expression.alias}"
|
|
917
1252
|
|
|
@@ -940,6 +1275,11 @@ class YDB(Dialect):
|
|
|
940
1275
|
expr = self.sql(expression, "expression")
|
|
941
1276
|
return f"{this}::{expr}"
|
|
942
1277
|
|
|
1278
|
+
def literal_sql(self, expression: exp.Literal) -> str:
|
|
1279
|
+
sql = super().literal_sql(expression)
|
|
1280
|
+
suffix = expression.meta.get("ydb_string_suffix")
|
|
1281
|
+
return f"{sql}{suffix}" if suffix else sql
|
|
1282
|
+
|
|
943
1283
|
def declareitem_sql(self, expression: exp.DeclareItem) -> str:
|
|
944
1284
|
name = self.sql(expression, "this")
|
|
945
1285
|
kind = self.sql(expression, "kind")
|
|
@@ -961,6 +1301,48 @@ class YDB(Dialect):
|
|
|
961
1301
|
sql = f"Tuple<{inner}>"
|
|
962
1302
|
return f"Optional<{sql}>" if expression.args.get("nullable") else sql
|
|
963
1303
|
|
|
1304
|
+
def ydbatstring_sql(self, expression: YdbAtString) -> str:
|
|
1305
|
+
return f"@@{expression.this}@@"
|
|
1306
|
+
|
|
1307
|
+
def ydbpostfixcall_sql(self, expression: YdbPostfixCall) -> str:
|
|
1308
|
+
this = self.sql(expression, "this")
|
|
1309
|
+
args = self.expressions(expression, flat=True)
|
|
1310
|
+
return f"{this}({args})"
|
|
1311
|
+
|
|
1312
|
+
def ydblambdablock_sql(self, expression: YdbLambdaBlock) -> str:
|
|
1313
|
+
assignments = [self.sql(assignment) for assignment in expression.expressions]
|
|
1314
|
+
statements = [*assignments, f"RETURN {self.sql(expression, 'this')}"]
|
|
1315
|
+
return "{ " + "; ".join(statements) + " }"
|
|
1316
|
+
|
|
1317
|
+
def in_sql(self, expression: exp.In) -> str:
|
|
1318
|
+
if expression.meta.get("compact"):
|
|
1319
|
+
return f"{self.sql(expression, 'this')} IN COMPACT {self.sql(expression, 'field')}"
|
|
1320
|
+
return super().in_sql(expression)
|
|
1321
|
+
|
|
1322
|
+
def maybe_comment(
|
|
1323
|
+
self,
|
|
1324
|
+
sql: str,
|
|
1325
|
+
expression: t.Optional[exp.Expression] = None,
|
|
1326
|
+
comments: t.Optional[t.List[str]] = None,
|
|
1327
|
+
separated: bool = False,
|
|
1328
|
+
) -> str:
|
|
1329
|
+
comments = (
|
|
1330
|
+
((expression and expression.comments) if comments is None else comments)
|
|
1331
|
+
if self.comments
|
|
1332
|
+
else None
|
|
1333
|
+
)
|
|
1334
|
+
if not comments:
|
|
1335
|
+
return sql
|
|
1336
|
+
|
|
1337
|
+
line_directives = [comment.strip() for comment in comments if comment.strip().startswith("!")]
|
|
1338
|
+
if not line_directives:
|
|
1339
|
+
return super().maybe_comment(sql, expression, comments=comments, separated=separated)
|
|
1340
|
+
|
|
1341
|
+
remaining_comments = [comment for comment in comments if not comment.strip().startswith("!")]
|
|
1342
|
+
sql = super().maybe_comment(sql, expression, comments=remaining_comments, separated=separated)
|
|
1343
|
+
prefix = "\n".join(f"--{directive}" for directive in line_directives)
|
|
1344
|
+
return f"{prefix}\n{sql}" if sql else prefix
|
|
1345
|
+
|
|
964
1346
|
def alias_sql(self, expression: exp.Alias) -> str:
|
|
965
1347
|
alias = expression.args.get("alias")
|
|
966
1348
|
if alias and alias.name.startswith("$"):
|
|
@@ -1105,6 +1487,12 @@ class YDB(Dialect):
|
|
|
1105
1487
|
"""
|
|
1106
1488
|
nullable = expression.args.get("nullable")
|
|
1107
1489
|
|
|
1490
|
+
def _struct_field_sql(field: exp.Expression) -> str:
|
|
1491
|
+
if isinstance(field, exp.Identifier) and field.args.get("quoted"):
|
|
1492
|
+
name = field.name.replace("'", "\\'")
|
|
1493
|
+
return f"'{name}'"
|
|
1494
|
+
return self.sql(field)
|
|
1495
|
+
|
|
1108
1496
|
# YDB generic container types rendered with <> syntax and correct casing
|
|
1109
1497
|
if expression.args.get("nested"):
|
|
1110
1498
|
type_value = expression.this
|
|
@@ -1122,6 +1510,15 @@ class YDB(Dialect):
|
|
|
1122
1510
|
sql = f"Tuple<{inner}>"
|
|
1123
1511
|
return f"Optional<{sql}>" if nullable else sql
|
|
1124
1512
|
|
|
1513
|
+
if type_value == exp.DataType.Type.STRUCT:
|
|
1514
|
+
inner = ", ".join(
|
|
1515
|
+
f"{_struct_field_sql(col.this)}: {self.sql(col, 'kind')}"
|
|
1516
|
+
for col in expression.expressions
|
|
1517
|
+
if isinstance(col, exp.ColumnDef)
|
|
1518
|
+
)
|
|
1519
|
+
sql = f"Struct<{inner}>"
|
|
1520
|
+
return f"Optional<{sql}>" if nullable else sql
|
|
1521
|
+
|
|
1125
1522
|
inner = ", ".join(self.sql(e) for e in expression.expressions)
|
|
1126
1523
|
name = {
|
|
1127
1524
|
exp.DataType.Type.LIST: "List",
|
|
@@ -1447,7 +1844,7 @@ class YDB(Dialect):
|
|
|
1447
1844
|
f"decorrelated in YDB — rewrite manually using a $variable subquery"
|
|
1448
1845
|
)
|
|
1449
1846
|
continue
|
|
1450
|
-
if scope.external_columns:
|
|
1847
|
+
if scope.external_columns and scope.scope_type != ScopeType.CTE:
|
|
1451
1848
|
self.decorrelate(select, parent, scope.external_columns, next_alias_name)
|
|
1452
1849
|
if scope.scope_type == ScopeType.SUBQUERY:
|
|
1453
1850
|
self.unnest(select, parent, next_alias_name)
|
|
@@ -1501,6 +1898,9 @@ class YDB(Dialect):
|
|
|
1501
1898
|
"""
|
|
1502
1899
|
Unnests a subquery by transforming it into a join
|
|
1503
1900
|
"""
|
|
1901
|
+
if isinstance(select.parent, exp.CTE):
|
|
1902
|
+
return
|
|
1903
|
+
|
|
1504
1904
|
if len(select.selects) > 1:
|
|
1505
1905
|
return
|
|
1506
1906
|
self.ensure_select_aliases(select)
|
|
@@ -2073,14 +2473,30 @@ class YDB(Dialect):
|
|
|
2073
2473
|
Returns:
|
|
2074
2474
|
YDB-specific SQL for lambda functions
|
|
2075
2475
|
"""
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2476
|
+
def _arg_name(arg: exp.Expression) -> str:
|
|
2477
|
+
if isinstance(arg, exp.Parameter):
|
|
2478
|
+
return arg.name
|
|
2479
|
+
return arg.name if hasattr(arg, "name") else self.sql(arg).lstrip("$")
|
|
2080
2480
|
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2481
|
+
def _arg_sql(arg: exp.Expression) -> str:
|
|
2482
|
+
name = _arg_name(arg)
|
|
2483
|
+
sql = f"${name}" if name and not name.startswith("$") else self.sql(arg)
|
|
2484
|
+
return f"{sql}?" if arg.meta.get("optional") else sql
|
|
2485
|
+
|
|
2486
|
+
def _prefix_lambda_refs(node: exp.Expression) -> exp.Expression:
|
|
2487
|
+
if (
|
|
2488
|
+
isinstance(node, exp.Identifier)
|
|
2489
|
+
and not node.name.startswith("$")
|
|
2490
|
+
):
|
|
2491
|
+
return exp.Identifier(this=f"${node.name}", quoted=False)
|
|
2492
|
+
return node
|
|
2493
|
+
|
|
2494
|
+
args = ", ".join(_arg_sql(arg) for arg in expression.expressions)
|
|
2495
|
+
body = expression.this.copy().transform(_prefix_lambda_refs)
|
|
2496
|
+
if isinstance(body, YdbLambdaBlock):
|
|
2497
|
+
return f"({args}) {arrow_sep} {self.sql(body)}"
|
|
2498
|
+
body_sql = self.sql(body)
|
|
2499
|
+
return f"({args}) {arrow_sep} {body_sql if isinstance(body, exp.Paren) else f'({body_sql})'}"
|
|
2084
2500
|
|
|
2085
2501
|
def _is_simple_expression(self, expr: exp.Expression) -> bool:
|
|
2086
2502
|
"""
|
|
@@ -2578,7 +2994,7 @@ class YDB(Dialect):
|
|
|
2578
2994
|
transformed.append(alias_map[gb_sql].copy())
|
|
2579
2995
|
elif isinstance(gb_expr, (exp.Column, exp.Identifier)):
|
|
2580
2996
|
# Add column AS alias so YDB resolves unambiguously.
|
|
2581
|
-
# Strip any table qualifier from the column (e.g. y.a
|
|
2997
|
+
# Strip any table qualifier from the column (e.g. y.a -> a).
|
|
2582
2998
|
# Use the SELECT-level alias if the column is aliased there
|
|
2583
2999
|
# (e.g. `a_id AS _u_1` in SELECT means GROUP BY `a_id AS _u_1`).
|
|
2584
3000
|
column_name = gb_expr.alias_or_name
|
|
@@ -2599,6 +3015,8 @@ class YDB(Dialect):
|
|
|
2599
3015
|
# Build the GROUP BY clause
|
|
2600
3016
|
if group_by_items:
|
|
2601
3017
|
result = f" GROUP BY {group_by_items}"
|
|
3018
|
+
elif not (rollup or cube or grouping_sets):
|
|
3019
|
+
return ""
|
|
2602
3020
|
else:
|
|
2603
3021
|
result = " GROUP BY"
|
|
2604
3022
|
|
|
@@ -2699,6 +3117,9 @@ class YDB(Dialect):
|
|
|
2699
3117
|
FlattenBy: lambda self, e: self.flattenby_sql(e),
|
|
2700
3118
|
AssumeOrderBy: lambda self, e: self.assumeorderby_sql(e),
|
|
2701
3119
|
YdbTuple: lambda self, e: self.ydbtuple_sql(e),
|
|
3120
|
+
YdbAtString: lambda self, e: self.ydbatstring_sql(e),
|
|
3121
|
+
YdbPostfixCall: lambda self, e: self.ydbpostfixcall_sql(e),
|
|
3122
|
+
YdbLambdaBlock: lambda self, e: self.ydblambdablock_sql(e),
|
|
2702
3123
|
exp.Create: create_sql,
|
|
2703
3124
|
exp.DefaultColumnConstraint: lambda self, e: "",
|
|
2704
3125
|
exp.DateTrunc: _date_trunc_sql,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ydb-sqlglot-plugin
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: YDB dialect plugin for sqlglot
|
|
5
5
|
Author: YDB Team
|
|
6
6
|
License: Apache-2.0
|
|
@@ -102,6 +102,31 @@ LEFT JOIN (
|
|
|
102
102
|
|
|
103
103
|
The same rewriting applies to `EXISTS`, `IN (subquery)`, and `ANY/ALL` subqueries.
|
|
104
104
|
|
|
105
|
+
#### GROUP BY aliases
|
|
106
|
+
|
|
107
|
+
YDB accepts aliases directly inside `GROUP BY` items. The generator uses this
|
|
108
|
+
form for grouped columns so later clauses and decorrelated subqueries can refer
|
|
109
|
+
to a stable grouping name:
|
|
110
|
+
|
|
111
|
+
```sql
|
|
112
|
+
-- input
|
|
113
|
+
SELECT user_id, COUNT(*) FROM events GROUP BY user_id
|
|
114
|
+
|
|
115
|
+
-- output
|
|
116
|
+
SELECT user_id, COUNT(*) FROM `events` GROUP BY user_id AS user_id
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
If a grouped column is selected under a generated alias, the `GROUP BY` item uses
|
|
120
|
+
that alias as well:
|
|
121
|
+
|
|
122
|
+
```sql
|
|
123
|
+
SELECT user_id AS _u_1, COUNT(*) FROM `events` GROUP BY user_id AS _u_1
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Positional `GROUP BY` references are expanded before generation. When a
|
|
127
|
+
positional reference points to a constant expression, the grouping item is
|
|
128
|
+
removed because YDB rejects grouping by constants.
|
|
129
|
+
|
|
105
130
|
---
|
|
106
131
|
|
|
107
132
|
### YDB → any SQL
|
|
@@ -119,7 +144,10 @@ The plugin parses YDB/YQL back into sqlglot's AST, enabling round-trips, YDB-to-
|
|
|
119
144
|
| `Optional<T>` / `T?` | `CAST(x AS Optional<Utf8>)` |
|
|
120
145
|
| Container types | `CAST(x AS List<Int32>)`, `Dict<Utf8, Int64>`, `Set<Utf8>`, `Tuple<Int32, Utf8>` |
|
|
121
146
|
| `ASSUME ORDER BY` | `SELECT * FROM t ASSUME ORDER BY id` |
|
|
147
|
+
| `GROUP BY expr AS alias` | `SELECT v, COUNT(*) FROM t GROUP BY v AS v` |
|
|
122
148
|
| Named expressions | `$t = (SELECT 1 AS x)` |
|
|
149
|
+
| Lambda expressions | `($x, $y?) -> ($x + COALESCE($y, 0))`, `($y) -> { $p = "x"; RETURN $p \|\| $y }` |
|
|
150
|
+
| `IN COMPACT` | `WHERE key IN COMPACT $values` |
|
|
123
151
|
| `PRAGMA` | `PRAGMA AnsiImplicitCrossJoin` |
|
|
124
152
|
|
|
125
153
|
Table names without backticks are accepted on input; the generator always produces backtick-quoted output.
|
|
@@ -204,11 +232,30 @@ Functions below are recognized by sqlglot as standard SQL expressions and transl
|
|
|
204
232
|
|---|---|
|
|
205
233
|
| `ARRAY(v1, v2, ...)` | `AsList(v1, v2, ...)` |
|
|
206
234
|
| `ARRAY_LENGTH(x)` / `ARRAY_SIZE(x)` | `ListLength(x)` |
|
|
207
|
-
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) ->
|
|
208
|
-
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) ->
|
|
235
|
+
| `ARRAY_FILTER(arr, x -> cond)` | `ListFilter(arr, ($x) -> (cond))` |
|
|
236
|
+
| `ARRAY_ANY(arr, x -> cond)` | `ListHasItems(ListFilter(arr, ($x) -> (cond)))` |
|
|
209
237
|
| `ARRAY_AGG(x)` | `AGGREGATE_LIST(x)` |
|
|
210
238
|
| `UNNEST(x)` | `FLATTEN BY x` |
|
|
211
239
|
|
|
240
|
+
Lambda expressions are represented with sqlglot's standard `exp.Lambda` AST node.
|
|
241
|
+
When a source dialect parses lambdas, the YDB generator emits YQL lambda syntax:
|
|
242
|
+
|
|
243
|
+
```sql
|
|
244
|
+
-- DuckDB input
|
|
245
|
+
SELECT list_filter(arr, x -> x > 0) FROM t
|
|
246
|
+
|
|
247
|
+
-- YDB output
|
|
248
|
+
SELECT ListFilter(arr, ($x) -> ($x > 0)) FROM `t`
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
YDB input also supports documented YQL lambda forms, including optional
|
|
252
|
+
arguments and block bodies with local named expressions:
|
|
253
|
+
|
|
254
|
+
```sql
|
|
255
|
+
($x, $y?) -> ($x + COALESCE($y, 0));
|
|
256
|
+
($y) -> { $prefix = "x"; RETURN $prefix || $y; };
|
|
257
|
+
```
|
|
258
|
+
|
|
212
259
|
### Conditional / math
|
|
213
260
|
|
|
214
261
|
| Input | YQL output |
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
VERSION = "0.2.2"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/requires.txt
RENAMED
|
File without changes
|
{ydb_sqlglot_plugin-0.2.2 → ydb_sqlglot_plugin-0.2.3}/ydb_sqlglot_plugin.egg-info/top_level.txt
RENAMED
|
File without changes
|