sqlglot 27.29.0__py3-none-any.whl → 28.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/__main__.py +6 -4
- sqlglot/_version.py +2 -2
- sqlglot/dialects/bigquery.py +116 -295
- sqlglot/dialects/clickhouse.py +67 -2
- sqlglot/dialects/databricks.py +38 -1
- sqlglot/dialects/dialect.py +327 -286
- sqlglot/dialects/dremio.py +4 -1
- sqlglot/dialects/duckdb.py +718 -22
- sqlglot/dialects/exasol.py +243 -10
- sqlglot/dialects/hive.py +8 -8
- sqlglot/dialects/mysql.py +11 -2
- sqlglot/dialects/oracle.py +29 -0
- sqlglot/dialects/postgres.py +46 -24
- sqlglot/dialects/presto.py +47 -16
- sqlglot/dialects/redshift.py +16 -0
- sqlglot/dialects/risingwave.py +3 -0
- sqlglot/dialects/singlestore.py +12 -3
- sqlglot/dialects/snowflake.py +199 -271
- sqlglot/dialects/spark.py +2 -2
- sqlglot/dialects/spark2.py +11 -48
- sqlglot/dialects/sqlite.py +9 -0
- sqlglot/dialects/teradata.py +5 -8
- sqlglot/dialects/trino.py +6 -0
- sqlglot/dialects/tsql.py +61 -25
- sqlglot/diff.py +4 -2
- sqlglot/errors.py +69 -0
- sqlglot/expressions.py +484 -84
- sqlglot/generator.py +143 -41
- sqlglot/helper.py +2 -2
- sqlglot/optimizer/annotate_types.py +247 -140
- sqlglot/optimizer/canonicalize.py +6 -1
- sqlglot/optimizer/eliminate_joins.py +1 -1
- sqlglot/optimizer/eliminate_subqueries.py +2 -2
- sqlglot/optimizer/merge_subqueries.py +5 -5
- sqlglot/optimizer/normalize.py +20 -13
- sqlglot/optimizer/normalize_identifiers.py +17 -3
- sqlglot/optimizer/optimizer.py +4 -0
- sqlglot/optimizer/pushdown_predicates.py +1 -1
- sqlglot/optimizer/qualify.py +14 -6
- sqlglot/optimizer/qualify_columns.py +113 -352
- sqlglot/optimizer/qualify_tables.py +112 -70
- sqlglot/optimizer/resolver.py +374 -0
- sqlglot/optimizer/scope.py +27 -16
- sqlglot/optimizer/simplify.py +1074 -964
- sqlglot/optimizer/unnest_subqueries.py +12 -2
- sqlglot/parser.py +276 -160
- sqlglot/planner.py +2 -2
- sqlglot/schema.py +15 -4
- sqlglot/tokens.py +42 -7
- sqlglot/transforms.py +77 -22
- sqlglot/typing/__init__.py +316 -0
- sqlglot/typing/bigquery.py +376 -0
- sqlglot/typing/hive.py +12 -0
- sqlglot/typing/presto.py +24 -0
- sqlglot/typing/snowflake.py +505 -0
- sqlglot/typing/spark2.py +58 -0
- sqlglot/typing/tsql.py +9 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/METADATA +2 -2
- sqlglot-28.4.1.dist-info/RECORD +92 -0
- sqlglot-27.29.0.dist-info/RECORD +0 -84
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/WHEEL +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/top_level.txt +0 -0
sqlglot/planner.py
CHANGED
|
@@ -94,7 +94,7 @@ class Step:
|
|
|
94
94
|
"""
|
|
95
95
|
ctes = ctes or {}
|
|
96
96
|
expression = expression.unnest()
|
|
97
|
-
with_ = expression.args.get("
|
|
97
|
+
with_ = expression.args.get("with_")
|
|
98
98
|
|
|
99
99
|
# CTEs break the mold of scope and introduce themselves to all in the context.
|
|
100
100
|
if with_:
|
|
@@ -104,7 +104,7 @@ class Step:
|
|
|
104
104
|
step.name = cte.alias
|
|
105
105
|
ctes[step.name] = step # type: ignore
|
|
106
106
|
|
|
107
|
-
from_ = expression.args.get("
|
|
107
|
+
from_ = expression.args.get("from_")
|
|
108
108
|
|
|
109
109
|
if isinstance(expression, exp.Select) and from_:
|
|
110
110
|
step = Scan.from_expression(from_.this, ctes)
|
sqlglot/schema.py
CHANGED
|
@@ -18,7 +18,13 @@ if t.TYPE_CHECKING:
|
|
|
18
18
|
class Schema(abc.ABC):
|
|
19
19
|
"""Abstract base class for database schemas"""
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
@property
|
|
22
|
+
def dialect(self) -> t.Optional[Dialect]:
|
|
23
|
+
"""
|
|
24
|
+
Returns None by default. Subclasses that require dialect-specific
|
|
25
|
+
behavior should override this property.
|
|
26
|
+
"""
|
|
27
|
+
return None
|
|
22
28
|
|
|
23
29
|
@abc.abstractmethod
|
|
24
30
|
def add_table(
|
|
@@ -222,15 +228,20 @@ class MappingSchema(AbstractMappingSchema, Schema):
|
|
|
222
228
|
dialect: DialectType = None,
|
|
223
229
|
normalize: bool = True,
|
|
224
230
|
) -> None:
|
|
225
|
-
self.dialect = dialect
|
|
226
231
|
self.visible = {} if visible is None else visible
|
|
227
232
|
self.normalize = normalize
|
|
233
|
+
self._dialect = Dialect.get_or_raise(dialect)
|
|
228
234
|
self._type_mapping_cache: t.Dict[str, exp.DataType] = {}
|
|
229
235
|
self._depth = 0
|
|
230
236
|
schema = {} if schema is None else schema
|
|
231
237
|
|
|
232
238
|
super().__init__(self._normalize(schema) if self.normalize else schema)
|
|
233
239
|
|
|
240
|
+
@property
|
|
241
|
+
def dialect(self) -> Dialect:
|
|
242
|
+
"""Returns the dialect for this mapping schema."""
|
|
243
|
+
return self._dialect
|
|
244
|
+
|
|
234
245
|
@classmethod
|
|
235
246
|
def from_mapping_schema(cls, mapping_schema: MappingSchema) -> MappingSchema:
|
|
236
247
|
return MappingSchema(
|
|
@@ -455,8 +466,8 @@ class MappingSchema(AbstractMappingSchema, Schema):
|
|
|
455
466
|
The resulting expression type.
|
|
456
467
|
"""
|
|
457
468
|
if schema_type not in self._type_mapping_cache:
|
|
458
|
-
dialect = dialect
|
|
459
|
-
udt =
|
|
469
|
+
dialect = Dialect.get_or_raise(dialect) if dialect else self.dialect
|
|
470
|
+
udt = dialect.SUPPORTS_USER_DEFINED_TYPES
|
|
460
471
|
|
|
461
472
|
try:
|
|
462
473
|
expression = exp.DataType.build(schema_type, dialect=dialect, udt=udt)
|
sqlglot/tokens.py
CHANGED
|
@@ -41,6 +41,7 @@ class TokenType(AutoName):
|
|
|
41
41
|
DCOLON = auto()
|
|
42
42
|
DCOLONDOLLAR = auto()
|
|
43
43
|
DCOLONPERCENT = auto()
|
|
44
|
+
DCOLONQMARK = auto()
|
|
44
45
|
DQMARK = auto()
|
|
45
46
|
SEMICOLON = auto()
|
|
46
47
|
STAR = auto()
|
|
@@ -82,7 +83,10 @@ class TokenType(AutoName):
|
|
|
82
83
|
PARAMETER = auto()
|
|
83
84
|
SESSION = auto()
|
|
84
85
|
SESSION_PARAMETER = auto()
|
|
86
|
+
SESSION_USER = auto()
|
|
85
87
|
DAMP = auto()
|
|
88
|
+
AMP_LT = auto()
|
|
89
|
+
AMP_GT = auto()
|
|
86
90
|
XOR = auto()
|
|
87
91
|
DSTAR = auto()
|
|
88
92
|
QMARK_AMP = auto()
|
|
@@ -131,6 +135,7 @@ class TokenType(AutoName):
|
|
|
131
135
|
UINT = auto()
|
|
132
136
|
BIGINT = auto()
|
|
133
137
|
UBIGINT = auto()
|
|
138
|
+
BIGNUM = auto() # unlimited precision int
|
|
134
139
|
INT128 = auto()
|
|
135
140
|
UINT128 = auto()
|
|
136
141
|
INT256 = auto()
|
|
@@ -143,6 +148,7 @@ class TokenType(AutoName):
|
|
|
143
148
|
DECIMAL64 = auto()
|
|
144
149
|
DECIMAL128 = auto()
|
|
145
150
|
DECIMAL256 = auto()
|
|
151
|
+
DECFLOAT = auto()
|
|
146
152
|
UDECIMAL = auto()
|
|
147
153
|
BIGDECIMAL = auto()
|
|
148
154
|
CHAR = auto()
|
|
@@ -165,6 +171,7 @@ class TokenType(AutoName):
|
|
|
165
171
|
JSONB = auto()
|
|
166
172
|
TIME = auto()
|
|
167
173
|
TIMETZ = auto()
|
|
174
|
+
TIME_NS = auto()
|
|
168
175
|
TIMESTAMP = auto()
|
|
169
176
|
TIMESTAMPTZ = auto()
|
|
170
177
|
TIMESTAMPLTZ = auto()
|
|
@@ -198,6 +205,8 @@ class TokenType(AutoName):
|
|
|
198
205
|
POINT = auto()
|
|
199
206
|
RING = auto()
|
|
200
207
|
LINESTRING = auto()
|
|
208
|
+
LOCALTIME = auto()
|
|
209
|
+
LOCALTIMESTAMP = auto()
|
|
201
210
|
MULTILINESTRING = auto()
|
|
202
211
|
POLYGON = auto()
|
|
203
212
|
MULTIPOLYGON = auto()
|
|
@@ -270,6 +279,8 @@ class TokenType(AutoName):
|
|
|
270
279
|
CURRENT_TIME = auto()
|
|
271
280
|
CURRENT_TIMESTAMP = auto()
|
|
272
281
|
CURRENT_USER = auto()
|
|
282
|
+
CURRENT_ROLE = auto()
|
|
283
|
+
CURRENT_CATALOG = auto()
|
|
273
284
|
DECLARE = auto()
|
|
274
285
|
DEFAULT = auto()
|
|
275
286
|
DELETE = auto()
|
|
@@ -289,6 +300,7 @@ class TokenType(AutoName):
|
|
|
289
300
|
EXISTS = auto()
|
|
290
301
|
FALSE = auto()
|
|
291
302
|
FETCH = auto()
|
|
303
|
+
FILE = auto()
|
|
292
304
|
FILE_FORMAT = auto()
|
|
293
305
|
FILTER = auto()
|
|
294
306
|
FINAL = auto()
|
|
@@ -312,6 +324,7 @@ class TokenType(AutoName):
|
|
|
312
324
|
ILIKE = auto()
|
|
313
325
|
IN = auto()
|
|
314
326
|
INDEX = auto()
|
|
327
|
+
INDEXED_BY = auto()
|
|
315
328
|
INNER = auto()
|
|
316
329
|
INSERT = auto()
|
|
317
330
|
INSTALL = auto()
|
|
@@ -336,6 +349,7 @@ class TokenType(AutoName):
|
|
|
336
349
|
LOAD = auto()
|
|
337
350
|
LOCK = auto()
|
|
338
351
|
MAP = auto()
|
|
352
|
+
MATCH = auto()
|
|
339
353
|
MATCH_CONDITION = auto()
|
|
340
354
|
MATCH_RECOGNIZE = auto()
|
|
341
355
|
MEMBER_OF = auto()
|
|
@@ -375,6 +389,7 @@ class TokenType(AutoName):
|
|
|
375
389
|
PUT = auto()
|
|
376
390
|
QUALIFY = auto()
|
|
377
391
|
QUOTE = auto()
|
|
392
|
+
QDCOLON = auto()
|
|
378
393
|
RANGE = auto()
|
|
379
394
|
RECURSIVE = auto()
|
|
380
395
|
REFRESH = auto()
|
|
@@ -538,6 +553,7 @@ class _Tokenizer(type):
|
|
|
538
553
|
}
|
|
539
554
|
|
|
540
555
|
klass._STRING_ESCAPES = set(klass.STRING_ESCAPES)
|
|
556
|
+
klass._ESCAPE_FOLLOW_CHARS = set(klass.ESCAPE_FOLLOW_CHARS)
|
|
541
557
|
klass._IDENTIFIER_ESCAPES = set(klass.IDENTIFIER_ESCAPES)
|
|
542
558
|
klass._COMMENTS = {
|
|
543
559
|
**dict(
|
|
@@ -589,6 +605,7 @@ class _Tokenizer(type):
|
|
|
589
605
|
tokens_preceding_hint={
|
|
590
606
|
_TOKEN_TYPE_TO_INDEX[v] for v in klass.TOKENS_PRECEDING_HINT
|
|
591
607
|
},
|
|
608
|
+
escape_follow_chars=klass._ESCAPE_FOLLOW_CHARS,
|
|
592
609
|
)
|
|
593
610
|
token_types = RsTokenTypeSettings(
|
|
594
611
|
bit_string=_TOKEN_TYPE_TO_INDEX[TokenType.BIT_STRING],
|
|
@@ -658,6 +675,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
658
675
|
QUOTES: t.List[t.Tuple[str, str] | str] = ["'"]
|
|
659
676
|
STRING_ESCAPES = ["'"]
|
|
660
677
|
VAR_SINGLE_TOKENS: t.Set[str] = set()
|
|
678
|
+
ESCAPE_FOLLOW_CHARS: t.List[str] = []
|
|
661
679
|
|
|
662
680
|
# The strings in this list can always be used as escapes, regardless of the surrounding
|
|
663
681
|
# identifier delimiters. By default, the closing delimiter is assumed to also act as an
|
|
@@ -688,6 +706,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
688
706
|
_STRING_ESCAPES: t.Set[str] = set()
|
|
689
707
|
_KEYWORD_TRIE: t.Dict = {}
|
|
690
708
|
_RS_TOKENIZER: t.Optional[t.Any] = None
|
|
709
|
+
_ESCAPE_FOLLOW_CHARS: t.Set[str] = set()
|
|
691
710
|
|
|
692
711
|
KEYWORDS: t.Dict[str, TokenType] = {
|
|
693
712
|
**{f"{{%{postfix}": TokenType.BLOCK_START for postfix in ("", "+", "-")},
|
|
@@ -697,6 +716,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
697
716
|
HINT_START: TokenType.HINT,
|
|
698
717
|
"==": TokenType.EQ,
|
|
699
718
|
"::": TokenType.DCOLON,
|
|
719
|
+
"?::": TokenType.QDCOLON,
|
|
700
720
|
"||": TokenType.DPIPE,
|
|
701
721
|
"|>": TokenType.PIPE_GT,
|
|
702
722
|
">=": TokenType.GTE,
|
|
@@ -747,6 +767,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
747
767
|
"CURRENT_TIME": TokenType.CURRENT_TIME,
|
|
748
768
|
"CURRENT_TIMESTAMP": TokenType.CURRENT_TIMESTAMP,
|
|
749
769
|
"CURRENT_USER": TokenType.CURRENT_USER,
|
|
770
|
+
"CURRENT_CATALOG": TokenType.CURRENT_CATALOG,
|
|
750
771
|
"DATABASE": TokenType.DATABASE,
|
|
751
772
|
"DEFAULT": TokenType.DEFAULT,
|
|
752
773
|
"DELETE": TokenType.DELETE,
|
|
@@ -766,6 +787,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
766
787
|
"FALSE": TokenType.FALSE,
|
|
767
788
|
"FETCH": TokenType.FETCH,
|
|
768
789
|
"FILTER": TokenType.FILTER,
|
|
790
|
+
"FILE": TokenType.FILE,
|
|
769
791
|
"FIRST": TokenType.FIRST,
|
|
770
792
|
"FULL": TokenType.FULL,
|
|
771
793
|
"FUNCTION": TokenType.FUNCTION,
|
|
@@ -798,6 +820,8 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
798
820
|
"LIKE": TokenType.LIKE,
|
|
799
821
|
"LIMIT": TokenType.LIMIT,
|
|
800
822
|
"LOAD": TokenType.LOAD,
|
|
823
|
+
"LOCALTIME": TokenType.LOCALTIME,
|
|
824
|
+
"LOCALTIMESTAMP": TokenType.LOCALTIMESTAMP,
|
|
801
825
|
"LOCK": TokenType.LOCK,
|
|
802
826
|
"MERGE": TokenType.MERGE,
|
|
803
827
|
"NAMESPACE": TokenType.NAMESPACE,
|
|
@@ -844,6 +868,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
844
868
|
"SELECT": TokenType.SELECT,
|
|
845
869
|
"SEMI": TokenType.SEMI,
|
|
846
870
|
"SESSION": TokenType.SESSION,
|
|
871
|
+
"SESSION_USER": TokenType.SESSION_USER,
|
|
847
872
|
"SET": TokenType.SET,
|
|
848
873
|
"SETTINGS": TokenType.SETTINGS,
|
|
849
874
|
"SHOW": TokenType.SHOW,
|
|
@@ -908,8 +933,10 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
908
933
|
"DECIMAL64": TokenType.DECIMAL64,
|
|
909
934
|
"DECIMAL128": TokenType.DECIMAL128,
|
|
910
935
|
"DECIMAL256": TokenType.DECIMAL256,
|
|
936
|
+
"DECFLOAT": TokenType.DECFLOAT,
|
|
911
937
|
"BIGDECIMAL": TokenType.BIGDECIMAL,
|
|
912
938
|
"BIGNUMERIC": TokenType.BIGDECIMAL,
|
|
939
|
+
"BIGNUM": TokenType.BIGNUM,
|
|
913
940
|
"LIST": TokenType.LIST,
|
|
914
941
|
"MAP": TokenType.MAP,
|
|
915
942
|
"NULLABLE": TokenType.NULLABLE,
|
|
@@ -951,6 +978,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
951
978
|
"VARBINARY": TokenType.VARBINARY,
|
|
952
979
|
"TIME": TokenType.TIME,
|
|
953
980
|
"TIMETZ": TokenType.TIMETZ,
|
|
981
|
+
"TIME_NS": TokenType.TIME_NS,
|
|
954
982
|
"TIMESTAMP": TokenType.TIMESTAMP,
|
|
955
983
|
"TIMESTAMPTZ": TokenType.TIMESTAMPTZ,
|
|
956
984
|
"TIMESTAMPLTZ": TokenType.TIMESTAMPLTZ,
|
|
@@ -1340,6 +1368,8 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
1340
1368
|
elif self._peek.upper() == "E" and not scientific:
|
|
1341
1369
|
scientific += 1
|
|
1342
1370
|
self._advance()
|
|
1371
|
+
elif self._peek == "_" and self.dialect.NUMBERS_CAN_BE_UNDERSCORE_SEPARATED:
|
|
1372
|
+
self._advance()
|
|
1343
1373
|
elif self._peek.isidentifier():
|
|
1344
1374
|
number_text = self._text
|
|
1345
1375
|
literal = ""
|
|
@@ -1354,12 +1384,8 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
1354
1384
|
self._add(TokenType.NUMBER, number_text)
|
|
1355
1385
|
self._add(TokenType.DCOLON, "::")
|
|
1356
1386
|
return self._add(token_type, literal)
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
if self.dialect.NUMBERS_CAN_BE_UNDERSCORE_SEPARATED and replaced.isdigit():
|
|
1360
|
-
return self._add(TokenType.NUMBER, number_text + replaced)
|
|
1361
|
-
if self.dialect.IDENTIFIERS_CAN_START_WITH_DIGIT:
|
|
1362
|
-
return self._add(TokenType.VAR)
|
|
1387
|
+
elif self.dialect.IDENTIFIERS_CAN_START_WITH_DIGIT:
|
|
1388
|
+
return self._add(TokenType.VAR)
|
|
1363
1389
|
|
|
1364
1390
|
self._advance(-len(literal))
|
|
1365
1391
|
return self._add(TokenType.NUMBER, number_text)
|
|
@@ -1495,14 +1521,23 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
|
1495
1521
|
self._advance(2)
|
|
1496
1522
|
text += unescaped_sequence
|
|
1497
1523
|
continue
|
|
1524
|
+
|
|
1525
|
+
is_valid_custom_escape = (
|
|
1526
|
+
self.ESCAPE_FOLLOW_CHARS
|
|
1527
|
+
and self._char == "\\"
|
|
1528
|
+
and self._peek not in self.ESCAPE_FOLLOW_CHARS
|
|
1529
|
+
)
|
|
1530
|
+
|
|
1498
1531
|
if (
|
|
1499
1532
|
(self.STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS or not raw_string)
|
|
1500
1533
|
and self._char in escapes
|
|
1501
|
-
and (self._peek == delimiter or self._peek in escapes)
|
|
1534
|
+
and (self._peek == delimiter or self._peek in escapes or is_valid_custom_escape)
|
|
1502
1535
|
and (self._char not in self._QUOTES or self._char == self._peek)
|
|
1503
1536
|
):
|
|
1504
1537
|
if self._peek == delimiter:
|
|
1505
1538
|
text += self._peek
|
|
1539
|
+
elif is_valid_custom_escape and self._char != self._peek:
|
|
1540
|
+
text += self._peek
|
|
1506
1541
|
else:
|
|
1507
1542
|
text += self._char + self._peek
|
|
1508
1543
|
|
sqlglot/transforms.py
CHANGED
|
@@ -4,7 +4,7 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from sqlglot import expressions as exp
|
|
6
6
|
from sqlglot.errors import UnsupportedError
|
|
7
|
-
from sqlglot.helper import find_new_name, name_sequence
|
|
7
|
+
from sqlglot.helper import find_new_name, name_sequence, seq_get
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
if t.TYPE_CHECKING:
|
|
@@ -14,6 +14,7 @@ if t.TYPE_CHECKING:
|
|
|
14
14
|
|
|
15
15
|
def preprocess(
|
|
16
16
|
transforms: t.List[t.Callable[[exp.Expression], exp.Expression]],
|
|
17
|
+
generator: t.Optional[t.Callable[[Generator, exp.Expression], str]] = None,
|
|
17
18
|
) -> t.Callable[[Generator, exp.Expression], str]:
|
|
18
19
|
"""
|
|
19
20
|
Creates a new transform by chaining a sequence of transformations and converts the resulting
|
|
@@ -37,6 +38,9 @@ def preprocess(
|
|
|
37
38
|
except UnsupportedError as unsupported_error:
|
|
38
39
|
self.unsupported(str(unsupported_error))
|
|
39
40
|
|
|
41
|
+
if generator:
|
|
42
|
+
return generator(self, expression)
|
|
43
|
+
|
|
40
44
|
_sql_handler = getattr(self, expression.key + "_sql", None)
|
|
41
45
|
if _sql_handler:
|
|
42
46
|
return _sql_handler(expression)
|
|
@@ -110,10 +114,10 @@ def unnest_generate_date_array_using_recursive_cte(expression: exp.Expression) -
|
|
|
110
114
|
count += 1
|
|
111
115
|
|
|
112
116
|
if recursive_ctes:
|
|
113
|
-
with_expression = expression.args.get("
|
|
117
|
+
with_expression = expression.args.get("with_") or exp.With()
|
|
114
118
|
with_expression.set("recursive", True)
|
|
115
119
|
with_expression.set("expressions", [*recursive_ctes, *with_expression.expressions])
|
|
116
|
-
expression.set("
|
|
120
|
+
expression.set("with_", with_expression)
|
|
117
121
|
|
|
118
122
|
return expression
|
|
119
123
|
|
|
@@ -310,14 +314,14 @@ def unnest_to_explode(
|
|
|
310
314
|
return exp.Inline if has_multi_expr else exp.Explode
|
|
311
315
|
|
|
312
316
|
if isinstance(expression, exp.Select):
|
|
313
|
-
from_ = expression.args.get("
|
|
317
|
+
from_ = expression.args.get("from_")
|
|
314
318
|
|
|
315
319
|
if from_ and isinstance(from_.this, exp.Unnest):
|
|
316
320
|
unnest = from_.this
|
|
317
321
|
alias = unnest.args.get("alias")
|
|
318
322
|
exprs = unnest.expressions
|
|
319
323
|
has_multi_expr = len(exprs) > 1
|
|
320
|
-
this, *
|
|
324
|
+
this, *_ = _unnest_zip_exprs(unnest, exprs, has_multi_expr)
|
|
321
325
|
|
|
322
326
|
columns = alias.columns if alias else []
|
|
323
327
|
offset = unnest.args.get("offset")
|
|
@@ -328,10 +332,7 @@ def unnest_to_explode(
|
|
|
328
332
|
|
|
329
333
|
unnest.replace(
|
|
330
334
|
exp.Table(
|
|
331
|
-
this=_udtf_type(unnest, has_multi_expr)(
|
|
332
|
-
this=this,
|
|
333
|
-
expressions=expressions,
|
|
334
|
-
),
|
|
335
|
+
this=_udtf_type(unnest, has_multi_expr)(this=this),
|
|
335
336
|
alias=exp.TableAlias(this=alias.this, columns=columns) if alias else None,
|
|
336
337
|
)
|
|
337
338
|
)
|
|
@@ -494,7 +495,7 @@ def explode_projection_to_unnest(
|
|
|
494
495
|
expression.set("expressions", expressions)
|
|
495
496
|
|
|
496
497
|
if not arrays:
|
|
497
|
-
if expression.args.get("
|
|
498
|
+
if expression.args.get("from_"):
|
|
498
499
|
expression.join(series, copy=False, join_type="CROSS")
|
|
499
500
|
else:
|
|
500
501
|
expression.from_(series, copy=False)
|
|
@@ -638,7 +639,7 @@ def eliminate_full_outer_join(expression: exp.Expression) -> exp.Expression:
|
|
|
638
639
|
expression.set("limit", None)
|
|
639
640
|
index, full_outer_join = full_outer_joins[0]
|
|
640
641
|
|
|
641
|
-
tables = (expression.args["
|
|
642
|
+
tables = (expression.args["from_"].alias_or_name, full_outer_join.alias_or_name)
|
|
642
643
|
join_conditions = full_outer_join.args.get("on") or exp.and_(
|
|
643
644
|
*[
|
|
644
645
|
exp.column(col, tables[0]).eq(exp.column(col, tables[1]))
|
|
@@ -647,10 +648,12 @@ def eliminate_full_outer_join(expression: exp.Expression) -> exp.Expression:
|
|
|
647
648
|
)
|
|
648
649
|
|
|
649
650
|
full_outer_join.set("side", "left")
|
|
650
|
-
anti_join_clause =
|
|
651
|
+
anti_join_clause = (
|
|
652
|
+
exp.select("1").from_(expression.args["from_"]).where(join_conditions)
|
|
653
|
+
)
|
|
651
654
|
expression_copy.args["joins"][index].set("side", "right")
|
|
652
655
|
expression_copy = expression_copy.where(exp.Exists(this=anti_join_clause).not_())
|
|
653
|
-
expression_copy.set("
|
|
656
|
+
expression_copy.set("with_", None) # remove CTEs from RIGHT side
|
|
654
657
|
expression.set("order", None) # remove order by from LEFT side
|
|
655
658
|
|
|
656
659
|
return exp.union(expression, expression_copy, copy=False, distinct=False)
|
|
@@ -670,14 +673,14 @@ def move_ctes_to_top_level(expression: E) -> E:
|
|
|
670
673
|
|
|
671
674
|
TODO: handle name clashes whilst moving CTEs (it can get quite tricky & costly).
|
|
672
675
|
"""
|
|
673
|
-
top_level_with = expression.args.get("
|
|
676
|
+
top_level_with = expression.args.get("with_")
|
|
674
677
|
for inner_with in expression.find_all(exp.With):
|
|
675
678
|
if inner_with.parent is expression:
|
|
676
679
|
continue
|
|
677
680
|
|
|
678
681
|
if not top_level_with:
|
|
679
682
|
top_level_with = inner_with.pop()
|
|
680
|
-
expression.set("
|
|
683
|
+
expression.set("with_", top_level_with)
|
|
681
684
|
else:
|
|
682
685
|
if inner_with.recursive:
|
|
683
686
|
top_level_with.set("recursive", True)
|
|
@@ -874,13 +877,12 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
|
|
|
874
877
|
where = query.args.get("where")
|
|
875
878
|
joins = query.args.get("joins", [])
|
|
876
879
|
|
|
877
|
-
# knockout: we do not support left correlation (see point 2)
|
|
878
|
-
assert not scope.is_correlated_subquery, "Correlated queries are not supported"
|
|
879
|
-
|
|
880
|
-
# nothing to do - we check it here after knockout above
|
|
881
880
|
if not where or not any(c.args.get("join_mark") for c in where.find_all(exp.Column)):
|
|
882
881
|
continue
|
|
883
882
|
|
|
883
|
+
# knockout: we do not support left correlation (see point 2)
|
|
884
|
+
assert not scope.is_correlated_subquery, "Correlated queries are not supported"
|
|
885
|
+
|
|
884
886
|
# make sure we have AND of ORs to have clear join terms
|
|
885
887
|
where = normalize(where.this)
|
|
886
888
|
assert normalized(where), "Cannot normalize JOIN predicates"
|
|
@@ -904,7 +906,7 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
|
|
|
904
906
|
|
|
905
907
|
old_joins = {join.alias_or_name: join for join in joins}
|
|
906
908
|
new_joins = {}
|
|
907
|
-
query_from = query.args["
|
|
909
|
+
query_from = query.args["from_"]
|
|
908
910
|
|
|
909
911
|
for table, predicates in joins_ons.items():
|
|
910
912
|
join_what = old_joins.get(table, query_from).this.copy()
|
|
@@ -930,11 +932,11 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
|
|
|
930
932
|
), "Cannot determine which table to use in the new FROM clause"
|
|
931
933
|
|
|
932
934
|
new_from_name = list(only_old_joins)[0]
|
|
933
|
-
query.set("
|
|
935
|
+
query.set("from_", exp.From(this=old_joins[new_from_name].this))
|
|
934
936
|
|
|
935
937
|
if new_joins:
|
|
936
938
|
for n, j in old_joins.items(): # preserve any other joins
|
|
937
|
-
if n not in new_joins and n != query.args["
|
|
939
|
+
if n not in new_joins and n != query.args["from_"].name:
|
|
938
940
|
if not j.kind:
|
|
939
941
|
j.set("kind", "CROSS")
|
|
940
942
|
new_joins[n] = j
|
|
@@ -999,3 +1001,56 @@ def eliminate_window_clause(expression: exp.Expression) -> exp.Expression:
|
|
|
999
1001
|
_inline_inherited_window(window)
|
|
1000
1002
|
|
|
1001
1003
|
return expression
|
|
1004
|
+
|
|
1005
|
+
|
|
1006
|
+
def inherit_struct_field_names(expression: exp.Expression) -> exp.Expression:
|
|
1007
|
+
"""
|
|
1008
|
+
Inherit field names from the first struct in an array.
|
|
1009
|
+
|
|
1010
|
+
BigQuery supports implicitly inheriting names from the first STRUCT in an array:
|
|
1011
|
+
|
|
1012
|
+
Example:
|
|
1013
|
+
ARRAY[
|
|
1014
|
+
STRUCT('Alice' AS name, 85 AS score), -- defines names
|
|
1015
|
+
STRUCT('Bob', 92), -- inherits names
|
|
1016
|
+
STRUCT('Diana', 95) -- inherits names
|
|
1017
|
+
]
|
|
1018
|
+
|
|
1019
|
+
This transformation makes the field names explicit on all structs by adding
|
|
1020
|
+
PropertyEQ nodes, in order to facilitate transpilation to other dialects.
|
|
1021
|
+
|
|
1022
|
+
Args:
|
|
1023
|
+
expression: The expression tree to transform
|
|
1024
|
+
|
|
1025
|
+
Returns:
|
|
1026
|
+
The modified expression with field names inherited in all structs
|
|
1027
|
+
"""
|
|
1028
|
+
if (
|
|
1029
|
+
isinstance(expression, exp.Array)
|
|
1030
|
+
and expression.args.get("struct_name_inheritance")
|
|
1031
|
+
and isinstance(first_item := seq_get(expression.expressions, 0), exp.Struct)
|
|
1032
|
+
and all(isinstance(fld, exp.PropertyEQ) for fld in first_item.expressions)
|
|
1033
|
+
):
|
|
1034
|
+
field_names = [fld.this for fld in first_item.expressions]
|
|
1035
|
+
|
|
1036
|
+
# Apply field names to subsequent structs that don't have them
|
|
1037
|
+
for struct in expression.expressions[1:]:
|
|
1038
|
+
if not isinstance(struct, exp.Struct) or len(struct.expressions) != len(field_names):
|
|
1039
|
+
continue
|
|
1040
|
+
|
|
1041
|
+
# Convert unnamed expressions to PropertyEQ with inherited names
|
|
1042
|
+
new_expressions = []
|
|
1043
|
+
for i, expr in enumerate(struct.expressions):
|
|
1044
|
+
if not isinstance(expr, exp.PropertyEQ):
|
|
1045
|
+
# Create PropertyEQ: field_name := value
|
|
1046
|
+
new_expressions.append(
|
|
1047
|
+
exp.PropertyEQ(
|
|
1048
|
+
this=exp.Identifier(this=field_names[i].copy()), expression=expr
|
|
1049
|
+
)
|
|
1050
|
+
)
|
|
1051
|
+
else:
|
|
1052
|
+
new_expressions.append(expr)
|
|
1053
|
+
|
|
1054
|
+
struct.set("expressions", new_expressions)
|
|
1055
|
+
|
|
1056
|
+
return expression
|