sqlglot 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. sqlglot/__main__.py +6 -4
  2. sqlglot/_version.py +2 -2
  3. sqlglot/dialects/bigquery.py +116 -295
  4. sqlglot/dialects/clickhouse.py +67 -2
  5. sqlglot/dialects/databricks.py +38 -1
  6. sqlglot/dialects/dialect.py +327 -286
  7. sqlglot/dialects/dremio.py +4 -1
  8. sqlglot/dialects/duckdb.py +718 -22
  9. sqlglot/dialects/exasol.py +243 -10
  10. sqlglot/dialects/hive.py +8 -8
  11. sqlglot/dialects/mysql.py +11 -2
  12. sqlglot/dialects/oracle.py +29 -0
  13. sqlglot/dialects/postgres.py +46 -24
  14. sqlglot/dialects/presto.py +47 -16
  15. sqlglot/dialects/redshift.py +16 -0
  16. sqlglot/dialects/risingwave.py +3 -0
  17. sqlglot/dialects/singlestore.py +12 -3
  18. sqlglot/dialects/snowflake.py +199 -271
  19. sqlglot/dialects/spark.py +2 -2
  20. sqlglot/dialects/spark2.py +11 -48
  21. sqlglot/dialects/sqlite.py +9 -0
  22. sqlglot/dialects/teradata.py +5 -8
  23. sqlglot/dialects/trino.py +6 -0
  24. sqlglot/dialects/tsql.py +61 -25
  25. sqlglot/diff.py +4 -2
  26. sqlglot/errors.py +69 -0
  27. sqlglot/expressions.py +484 -84
  28. sqlglot/generator.py +143 -41
  29. sqlglot/helper.py +2 -2
  30. sqlglot/optimizer/annotate_types.py +247 -140
  31. sqlglot/optimizer/canonicalize.py +6 -1
  32. sqlglot/optimizer/eliminate_joins.py +1 -1
  33. sqlglot/optimizer/eliminate_subqueries.py +2 -2
  34. sqlglot/optimizer/merge_subqueries.py +5 -5
  35. sqlglot/optimizer/normalize.py +20 -13
  36. sqlglot/optimizer/normalize_identifiers.py +17 -3
  37. sqlglot/optimizer/optimizer.py +4 -0
  38. sqlglot/optimizer/pushdown_predicates.py +1 -1
  39. sqlglot/optimizer/qualify.py +14 -6
  40. sqlglot/optimizer/qualify_columns.py +113 -352
  41. sqlglot/optimizer/qualify_tables.py +112 -70
  42. sqlglot/optimizer/resolver.py +374 -0
  43. sqlglot/optimizer/scope.py +27 -16
  44. sqlglot/optimizer/simplify.py +1074 -964
  45. sqlglot/optimizer/unnest_subqueries.py +12 -2
  46. sqlglot/parser.py +276 -160
  47. sqlglot/planner.py +2 -2
  48. sqlglot/schema.py +15 -4
  49. sqlglot/tokens.py +42 -7
  50. sqlglot/transforms.py +77 -22
  51. sqlglot/typing/__init__.py +316 -0
  52. sqlglot/typing/bigquery.py +376 -0
  53. sqlglot/typing/hive.py +12 -0
  54. sqlglot/typing/presto.py +24 -0
  55. sqlglot/typing/snowflake.py +505 -0
  56. sqlglot/typing/spark2.py +58 -0
  57. sqlglot/typing/tsql.py +9 -0
  58. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
  59. sqlglot-28.4.0.dist-info/RECORD +92 -0
  60. sqlglot-27.29.0.dist-info/RECORD +0 -84
  61. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
  62. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
  63. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0
sqlglot/planner.py CHANGED
@@ -94,7 +94,7 @@ class Step:
94
94
  """
95
95
  ctes = ctes or {}
96
96
  expression = expression.unnest()
97
- with_ = expression.args.get("with")
97
+ with_ = expression.args.get("with_")
98
98
 
99
99
  # CTEs break the mold of scope and introduce themselves to all in the context.
100
100
  if with_:
@@ -104,7 +104,7 @@ class Step:
104
104
  step.name = cte.alias
105
105
  ctes[step.name] = step # type: ignore
106
106
 
107
- from_ = expression.args.get("from")
107
+ from_ = expression.args.get("from_")
108
108
 
109
109
  if isinstance(expression, exp.Select) and from_:
110
110
  step = Scan.from_expression(from_.this, ctes)
sqlglot/schema.py CHANGED
@@ -18,7 +18,13 @@ if t.TYPE_CHECKING:
18
18
  class Schema(abc.ABC):
19
19
  """Abstract base class for database schemas"""
20
20
 
21
- dialect: DialectType
21
+ @property
22
+ def dialect(self) -> t.Optional[Dialect]:
23
+ """
24
+ Returns None by default. Subclasses that require dialect-specific
25
+ behavior should override this property.
26
+ """
27
+ return None
22
28
 
23
29
  @abc.abstractmethod
24
30
  def add_table(
@@ -222,15 +228,20 @@ class MappingSchema(AbstractMappingSchema, Schema):
222
228
  dialect: DialectType = None,
223
229
  normalize: bool = True,
224
230
  ) -> None:
225
- self.dialect = dialect
226
231
  self.visible = {} if visible is None else visible
227
232
  self.normalize = normalize
233
+ self._dialect = Dialect.get_or_raise(dialect)
228
234
  self._type_mapping_cache: t.Dict[str, exp.DataType] = {}
229
235
  self._depth = 0
230
236
  schema = {} if schema is None else schema
231
237
 
232
238
  super().__init__(self._normalize(schema) if self.normalize else schema)
233
239
 
240
+ @property
241
+ def dialect(self) -> Dialect:
242
+ """Returns the dialect for this mapping schema."""
243
+ return self._dialect
244
+
234
245
  @classmethod
235
246
  def from_mapping_schema(cls, mapping_schema: MappingSchema) -> MappingSchema:
236
247
  return MappingSchema(
@@ -455,8 +466,8 @@ class MappingSchema(AbstractMappingSchema, Schema):
455
466
  The resulting expression type.
456
467
  """
457
468
  if schema_type not in self._type_mapping_cache:
458
- dialect = dialect or self.dialect
459
- udt = Dialect.get_or_raise(dialect).SUPPORTS_USER_DEFINED_TYPES
469
+ dialect = Dialect.get_or_raise(dialect) if dialect else self.dialect
470
+ udt = dialect.SUPPORTS_USER_DEFINED_TYPES
460
471
 
461
472
  try:
462
473
  expression = exp.DataType.build(schema_type, dialect=dialect, udt=udt)
sqlglot/tokens.py CHANGED
@@ -41,6 +41,7 @@ class TokenType(AutoName):
41
41
  DCOLON = auto()
42
42
  DCOLONDOLLAR = auto()
43
43
  DCOLONPERCENT = auto()
44
+ DCOLONQMARK = auto()
44
45
  DQMARK = auto()
45
46
  SEMICOLON = auto()
46
47
  STAR = auto()
@@ -82,7 +83,10 @@ class TokenType(AutoName):
82
83
  PARAMETER = auto()
83
84
  SESSION = auto()
84
85
  SESSION_PARAMETER = auto()
86
+ SESSION_USER = auto()
85
87
  DAMP = auto()
88
+ AMP_LT = auto()
89
+ AMP_GT = auto()
86
90
  XOR = auto()
87
91
  DSTAR = auto()
88
92
  QMARK_AMP = auto()
@@ -131,6 +135,7 @@ class TokenType(AutoName):
131
135
  UINT = auto()
132
136
  BIGINT = auto()
133
137
  UBIGINT = auto()
138
+ BIGNUM = auto() # unlimited precision int
134
139
  INT128 = auto()
135
140
  UINT128 = auto()
136
141
  INT256 = auto()
@@ -143,6 +148,7 @@ class TokenType(AutoName):
143
148
  DECIMAL64 = auto()
144
149
  DECIMAL128 = auto()
145
150
  DECIMAL256 = auto()
151
+ DECFLOAT = auto()
146
152
  UDECIMAL = auto()
147
153
  BIGDECIMAL = auto()
148
154
  CHAR = auto()
@@ -165,6 +171,7 @@ class TokenType(AutoName):
165
171
  JSONB = auto()
166
172
  TIME = auto()
167
173
  TIMETZ = auto()
174
+ TIME_NS = auto()
168
175
  TIMESTAMP = auto()
169
176
  TIMESTAMPTZ = auto()
170
177
  TIMESTAMPLTZ = auto()
@@ -198,6 +205,8 @@ class TokenType(AutoName):
198
205
  POINT = auto()
199
206
  RING = auto()
200
207
  LINESTRING = auto()
208
+ LOCALTIME = auto()
209
+ LOCALTIMESTAMP = auto()
201
210
  MULTILINESTRING = auto()
202
211
  POLYGON = auto()
203
212
  MULTIPOLYGON = auto()
@@ -270,6 +279,8 @@ class TokenType(AutoName):
270
279
  CURRENT_TIME = auto()
271
280
  CURRENT_TIMESTAMP = auto()
272
281
  CURRENT_USER = auto()
282
+ CURRENT_ROLE = auto()
283
+ CURRENT_CATALOG = auto()
273
284
  DECLARE = auto()
274
285
  DEFAULT = auto()
275
286
  DELETE = auto()
@@ -289,6 +300,7 @@ class TokenType(AutoName):
289
300
  EXISTS = auto()
290
301
  FALSE = auto()
291
302
  FETCH = auto()
303
+ FILE = auto()
292
304
  FILE_FORMAT = auto()
293
305
  FILTER = auto()
294
306
  FINAL = auto()
@@ -312,6 +324,7 @@ class TokenType(AutoName):
312
324
  ILIKE = auto()
313
325
  IN = auto()
314
326
  INDEX = auto()
327
+ INDEXED_BY = auto()
315
328
  INNER = auto()
316
329
  INSERT = auto()
317
330
  INSTALL = auto()
@@ -336,6 +349,7 @@ class TokenType(AutoName):
336
349
  LOAD = auto()
337
350
  LOCK = auto()
338
351
  MAP = auto()
352
+ MATCH = auto()
339
353
  MATCH_CONDITION = auto()
340
354
  MATCH_RECOGNIZE = auto()
341
355
  MEMBER_OF = auto()
@@ -375,6 +389,7 @@ class TokenType(AutoName):
375
389
  PUT = auto()
376
390
  QUALIFY = auto()
377
391
  QUOTE = auto()
392
+ QDCOLON = auto()
378
393
  RANGE = auto()
379
394
  RECURSIVE = auto()
380
395
  REFRESH = auto()
@@ -538,6 +553,7 @@ class _Tokenizer(type):
538
553
  }
539
554
 
540
555
  klass._STRING_ESCAPES = set(klass.STRING_ESCAPES)
556
+ klass._ESCAPE_FOLLOW_CHARS = set(klass.ESCAPE_FOLLOW_CHARS)
541
557
  klass._IDENTIFIER_ESCAPES = set(klass.IDENTIFIER_ESCAPES)
542
558
  klass._COMMENTS = {
543
559
  **dict(
@@ -589,6 +605,7 @@ class _Tokenizer(type):
589
605
  tokens_preceding_hint={
590
606
  _TOKEN_TYPE_TO_INDEX[v] for v in klass.TOKENS_PRECEDING_HINT
591
607
  },
608
+ escape_follow_chars=klass._ESCAPE_FOLLOW_CHARS,
592
609
  )
593
610
  token_types = RsTokenTypeSettings(
594
611
  bit_string=_TOKEN_TYPE_TO_INDEX[TokenType.BIT_STRING],
@@ -658,6 +675,7 @@ class Tokenizer(metaclass=_Tokenizer):
658
675
  QUOTES: t.List[t.Tuple[str, str] | str] = ["'"]
659
676
  STRING_ESCAPES = ["'"]
660
677
  VAR_SINGLE_TOKENS: t.Set[str] = set()
678
+ ESCAPE_FOLLOW_CHARS: t.List[str] = []
661
679
 
662
680
  # The strings in this list can always be used as escapes, regardless of the surrounding
663
681
  # identifier delimiters. By default, the closing delimiter is assumed to also act as an
@@ -688,6 +706,7 @@ class Tokenizer(metaclass=_Tokenizer):
688
706
  _STRING_ESCAPES: t.Set[str] = set()
689
707
  _KEYWORD_TRIE: t.Dict = {}
690
708
  _RS_TOKENIZER: t.Optional[t.Any] = None
709
+ _ESCAPE_FOLLOW_CHARS: t.Set[str] = set()
691
710
 
692
711
  KEYWORDS: t.Dict[str, TokenType] = {
693
712
  **{f"{{%{postfix}": TokenType.BLOCK_START for postfix in ("", "+", "-")},
@@ -697,6 +716,7 @@ class Tokenizer(metaclass=_Tokenizer):
697
716
  HINT_START: TokenType.HINT,
698
717
  "==": TokenType.EQ,
699
718
  "::": TokenType.DCOLON,
719
+ "?::": TokenType.QDCOLON,
700
720
  "||": TokenType.DPIPE,
701
721
  "|>": TokenType.PIPE_GT,
702
722
  ">=": TokenType.GTE,
@@ -747,6 +767,7 @@ class Tokenizer(metaclass=_Tokenizer):
747
767
  "CURRENT_TIME": TokenType.CURRENT_TIME,
748
768
  "CURRENT_TIMESTAMP": TokenType.CURRENT_TIMESTAMP,
749
769
  "CURRENT_USER": TokenType.CURRENT_USER,
770
+ "CURRENT_CATALOG": TokenType.CURRENT_CATALOG,
750
771
  "DATABASE": TokenType.DATABASE,
751
772
  "DEFAULT": TokenType.DEFAULT,
752
773
  "DELETE": TokenType.DELETE,
@@ -766,6 +787,7 @@ class Tokenizer(metaclass=_Tokenizer):
766
787
  "FALSE": TokenType.FALSE,
767
788
  "FETCH": TokenType.FETCH,
768
789
  "FILTER": TokenType.FILTER,
790
+ "FILE": TokenType.FILE,
769
791
  "FIRST": TokenType.FIRST,
770
792
  "FULL": TokenType.FULL,
771
793
  "FUNCTION": TokenType.FUNCTION,
@@ -798,6 +820,8 @@ class Tokenizer(metaclass=_Tokenizer):
798
820
  "LIKE": TokenType.LIKE,
799
821
  "LIMIT": TokenType.LIMIT,
800
822
  "LOAD": TokenType.LOAD,
823
+ "LOCALTIME": TokenType.LOCALTIME,
824
+ "LOCALTIMESTAMP": TokenType.LOCALTIMESTAMP,
801
825
  "LOCK": TokenType.LOCK,
802
826
  "MERGE": TokenType.MERGE,
803
827
  "NAMESPACE": TokenType.NAMESPACE,
@@ -844,6 +868,7 @@ class Tokenizer(metaclass=_Tokenizer):
844
868
  "SELECT": TokenType.SELECT,
845
869
  "SEMI": TokenType.SEMI,
846
870
  "SESSION": TokenType.SESSION,
871
+ "SESSION_USER": TokenType.SESSION_USER,
847
872
  "SET": TokenType.SET,
848
873
  "SETTINGS": TokenType.SETTINGS,
849
874
  "SHOW": TokenType.SHOW,
@@ -908,8 +933,10 @@ class Tokenizer(metaclass=_Tokenizer):
908
933
  "DECIMAL64": TokenType.DECIMAL64,
909
934
  "DECIMAL128": TokenType.DECIMAL128,
910
935
  "DECIMAL256": TokenType.DECIMAL256,
936
+ "DECFLOAT": TokenType.DECFLOAT,
911
937
  "BIGDECIMAL": TokenType.BIGDECIMAL,
912
938
  "BIGNUMERIC": TokenType.BIGDECIMAL,
939
+ "BIGNUM": TokenType.BIGNUM,
913
940
  "LIST": TokenType.LIST,
914
941
  "MAP": TokenType.MAP,
915
942
  "NULLABLE": TokenType.NULLABLE,
@@ -951,6 +978,7 @@ class Tokenizer(metaclass=_Tokenizer):
951
978
  "VARBINARY": TokenType.VARBINARY,
952
979
  "TIME": TokenType.TIME,
953
980
  "TIMETZ": TokenType.TIMETZ,
981
+ "TIME_NS": TokenType.TIME_NS,
954
982
  "TIMESTAMP": TokenType.TIMESTAMP,
955
983
  "TIMESTAMPTZ": TokenType.TIMESTAMPTZ,
956
984
  "TIMESTAMPLTZ": TokenType.TIMESTAMPLTZ,
@@ -1340,6 +1368,8 @@ class Tokenizer(metaclass=_Tokenizer):
1340
1368
  elif self._peek.upper() == "E" and not scientific:
1341
1369
  scientific += 1
1342
1370
  self._advance()
1371
+ elif self._peek == "_" and self.dialect.NUMBERS_CAN_BE_UNDERSCORE_SEPARATED:
1372
+ self._advance()
1343
1373
  elif self._peek.isidentifier():
1344
1374
  number_text = self._text
1345
1375
  literal = ""
@@ -1354,12 +1384,8 @@ class Tokenizer(metaclass=_Tokenizer):
1354
1384
  self._add(TokenType.NUMBER, number_text)
1355
1385
  self._add(TokenType.DCOLON, "::")
1356
1386
  return self._add(token_type, literal)
1357
- else:
1358
- replaced = literal.replace("_", "")
1359
- if self.dialect.NUMBERS_CAN_BE_UNDERSCORE_SEPARATED and replaced.isdigit():
1360
- return self._add(TokenType.NUMBER, number_text + replaced)
1361
- if self.dialect.IDENTIFIERS_CAN_START_WITH_DIGIT:
1362
- return self._add(TokenType.VAR)
1387
+ elif self.dialect.IDENTIFIERS_CAN_START_WITH_DIGIT:
1388
+ return self._add(TokenType.VAR)
1363
1389
 
1364
1390
  self._advance(-len(literal))
1365
1391
  return self._add(TokenType.NUMBER, number_text)
@@ -1495,14 +1521,23 @@ class Tokenizer(metaclass=_Tokenizer):
1495
1521
  self._advance(2)
1496
1522
  text += unescaped_sequence
1497
1523
  continue
1524
+
1525
+ is_valid_custom_escape = (
1526
+ self.ESCAPE_FOLLOW_CHARS
1527
+ and self._char == "\\"
1528
+ and self._peek not in self.ESCAPE_FOLLOW_CHARS
1529
+ )
1530
+
1498
1531
  if (
1499
1532
  (self.STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS or not raw_string)
1500
1533
  and self._char in escapes
1501
- and (self._peek == delimiter or self._peek in escapes)
1534
+ and (self._peek == delimiter or self._peek in escapes or is_valid_custom_escape)
1502
1535
  and (self._char not in self._QUOTES or self._char == self._peek)
1503
1536
  ):
1504
1537
  if self._peek == delimiter:
1505
1538
  text += self._peek
1539
+ elif is_valid_custom_escape and self._char != self._peek:
1540
+ text += self._peek
1506
1541
  else:
1507
1542
  text += self._char + self._peek
1508
1543
 
sqlglot/transforms.py CHANGED
@@ -4,7 +4,7 @@ import typing as t
4
4
 
5
5
  from sqlglot import expressions as exp
6
6
  from sqlglot.errors import UnsupportedError
7
- from sqlglot.helper import find_new_name, name_sequence
7
+ from sqlglot.helper import find_new_name, name_sequence, seq_get
8
8
 
9
9
 
10
10
  if t.TYPE_CHECKING:
@@ -14,6 +14,7 @@ if t.TYPE_CHECKING:
14
14
 
15
15
  def preprocess(
16
16
  transforms: t.List[t.Callable[[exp.Expression], exp.Expression]],
17
+ generator: t.Optional[t.Callable[[Generator, exp.Expression], str]] = None,
17
18
  ) -> t.Callable[[Generator, exp.Expression], str]:
18
19
  """
19
20
  Creates a new transform by chaining a sequence of transformations and converts the resulting
@@ -37,6 +38,9 @@ def preprocess(
37
38
  except UnsupportedError as unsupported_error:
38
39
  self.unsupported(str(unsupported_error))
39
40
 
41
+ if generator:
42
+ return generator(self, expression)
43
+
40
44
  _sql_handler = getattr(self, expression.key + "_sql", None)
41
45
  if _sql_handler:
42
46
  return _sql_handler(expression)
@@ -110,10 +114,10 @@ def unnest_generate_date_array_using_recursive_cte(expression: exp.Expression) -
110
114
  count += 1
111
115
 
112
116
  if recursive_ctes:
113
- with_expression = expression.args.get("with") or exp.With()
117
+ with_expression = expression.args.get("with_") or exp.With()
114
118
  with_expression.set("recursive", True)
115
119
  with_expression.set("expressions", [*recursive_ctes, *with_expression.expressions])
116
- expression.set("with", with_expression)
120
+ expression.set("with_", with_expression)
117
121
 
118
122
  return expression
119
123
 
@@ -310,14 +314,14 @@ def unnest_to_explode(
310
314
  return exp.Inline if has_multi_expr else exp.Explode
311
315
 
312
316
  if isinstance(expression, exp.Select):
313
- from_ = expression.args.get("from")
317
+ from_ = expression.args.get("from_")
314
318
 
315
319
  if from_ and isinstance(from_.this, exp.Unnest):
316
320
  unnest = from_.this
317
321
  alias = unnest.args.get("alias")
318
322
  exprs = unnest.expressions
319
323
  has_multi_expr = len(exprs) > 1
320
- this, *expressions = _unnest_zip_exprs(unnest, exprs, has_multi_expr)
324
+ this, *_ = _unnest_zip_exprs(unnest, exprs, has_multi_expr)
321
325
 
322
326
  columns = alias.columns if alias else []
323
327
  offset = unnest.args.get("offset")
@@ -328,10 +332,7 @@ def unnest_to_explode(
328
332
 
329
333
  unnest.replace(
330
334
  exp.Table(
331
- this=_udtf_type(unnest, has_multi_expr)(
332
- this=this,
333
- expressions=expressions,
334
- ),
335
+ this=_udtf_type(unnest, has_multi_expr)(this=this),
335
336
  alias=exp.TableAlias(this=alias.this, columns=columns) if alias else None,
336
337
  )
337
338
  )
@@ -494,7 +495,7 @@ def explode_projection_to_unnest(
494
495
  expression.set("expressions", expressions)
495
496
 
496
497
  if not arrays:
497
- if expression.args.get("from"):
498
+ if expression.args.get("from_"):
498
499
  expression.join(series, copy=False, join_type="CROSS")
499
500
  else:
500
501
  expression.from_(series, copy=False)
@@ -638,7 +639,7 @@ def eliminate_full_outer_join(expression: exp.Expression) -> exp.Expression:
638
639
  expression.set("limit", None)
639
640
  index, full_outer_join = full_outer_joins[0]
640
641
 
641
- tables = (expression.args["from"].alias_or_name, full_outer_join.alias_or_name)
642
+ tables = (expression.args["from_"].alias_or_name, full_outer_join.alias_or_name)
642
643
  join_conditions = full_outer_join.args.get("on") or exp.and_(
643
644
  *[
644
645
  exp.column(col, tables[0]).eq(exp.column(col, tables[1]))
@@ -647,10 +648,12 @@ def eliminate_full_outer_join(expression: exp.Expression) -> exp.Expression:
647
648
  )
648
649
 
649
650
  full_outer_join.set("side", "left")
650
- anti_join_clause = exp.select("1").from_(expression.args["from"]).where(join_conditions)
651
+ anti_join_clause = (
652
+ exp.select("1").from_(expression.args["from_"]).where(join_conditions)
653
+ )
651
654
  expression_copy.args["joins"][index].set("side", "right")
652
655
  expression_copy = expression_copy.where(exp.Exists(this=anti_join_clause).not_())
653
- expression_copy.set("with", None) # remove CTEs from RIGHT side
656
+ expression_copy.set("with_", None) # remove CTEs from RIGHT side
654
657
  expression.set("order", None) # remove order by from LEFT side
655
658
 
656
659
  return exp.union(expression, expression_copy, copy=False, distinct=False)
@@ -670,14 +673,14 @@ def move_ctes_to_top_level(expression: E) -> E:
670
673
 
671
674
  TODO: handle name clashes whilst moving CTEs (it can get quite tricky & costly).
672
675
  """
673
- top_level_with = expression.args.get("with")
676
+ top_level_with = expression.args.get("with_")
674
677
  for inner_with in expression.find_all(exp.With):
675
678
  if inner_with.parent is expression:
676
679
  continue
677
680
 
678
681
  if not top_level_with:
679
682
  top_level_with = inner_with.pop()
680
- expression.set("with", top_level_with)
683
+ expression.set("with_", top_level_with)
681
684
  else:
682
685
  if inner_with.recursive:
683
686
  top_level_with.set("recursive", True)
@@ -874,13 +877,12 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
874
877
  where = query.args.get("where")
875
878
  joins = query.args.get("joins", [])
876
879
 
877
- # knockout: we do not support left correlation (see point 2)
878
- assert not scope.is_correlated_subquery, "Correlated queries are not supported"
879
-
880
- # nothing to do - we check it here after knockout above
881
880
  if not where or not any(c.args.get("join_mark") for c in where.find_all(exp.Column)):
882
881
  continue
883
882
 
883
+ # knockout: we do not support left correlation (see point 2)
884
+ assert not scope.is_correlated_subquery, "Correlated queries are not supported"
885
+
884
886
  # make sure we have AND of ORs to have clear join terms
885
887
  where = normalize(where.this)
886
888
  assert normalized(where), "Cannot normalize JOIN predicates"
@@ -904,7 +906,7 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
904
906
 
905
907
  old_joins = {join.alias_or_name: join for join in joins}
906
908
  new_joins = {}
907
- query_from = query.args["from"]
909
+ query_from = query.args["from_"]
908
910
 
909
911
  for table, predicates in joins_ons.items():
910
912
  join_what = old_joins.get(table, query_from).this.copy()
@@ -930,11 +932,11 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
930
932
  ), "Cannot determine which table to use in the new FROM clause"
931
933
 
932
934
  new_from_name = list(only_old_joins)[0]
933
- query.set("from", exp.From(this=old_joins[new_from_name].this))
935
+ query.set("from_", exp.From(this=old_joins[new_from_name].this))
934
936
 
935
937
  if new_joins:
936
938
  for n, j in old_joins.items(): # preserve any other joins
937
- if n not in new_joins and n != query.args["from"].name:
939
+ if n not in new_joins and n != query.args["from_"].name:
938
940
  if not j.kind:
939
941
  j.set("kind", "CROSS")
940
942
  new_joins[n] = j
@@ -999,3 +1001,56 @@ def eliminate_window_clause(expression: exp.Expression) -> exp.Expression:
999
1001
  _inline_inherited_window(window)
1000
1002
 
1001
1003
  return expression
1004
+
1005
+
1006
+ def inherit_struct_field_names(expression: exp.Expression) -> exp.Expression:
1007
+ """
1008
+ Inherit field names from the first struct in an array.
1009
+
1010
+ BigQuery supports implicitly inheriting names from the first STRUCT in an array:
1011
+
1012
+ Example:
1013
+ ARRAY[
1014
+ STRUCT('Alice' AS name, 85 AS score), -- defines names
1015
+ STRUCT('Bob', 92), -- inherits names
1016
+ STRUCT('Diana', 95) -- inherits names
1017
+ ]
1018
+
1019
+ This transformation makes the field names explicit on all structs by adding
1020
+ PropertyEQ nodes, in order to facilitate transpilation to other dialects.
1021
+
1022
+ Args:
1023
+ expression: The expression tree to transform
1024
+
1025
+ Returns:
1026
+ The modified expression with field names inherited in all structs
1027
+ """
1028
+ if (
1029
+ isinstance(expression, exp.Array)
1030
+ and expression.args.get("struct_name_inheritance")
1031
+ and isinstance(first_item := seq_get(expression.expressions, 0), exp.Struct)
1032
+ and all(isinstance(fld, exp.PropertyEQ) for fld in first_item.expressions)
1033
+ ):
1034
+ field_names = [fld.this for fld in first_item.expressions]
1035
+
1036
+ # Apply field names to subsequent structs that don't have them
1037
+ for struct in expression.expressions[1:]:
1038
+ if not isinstance(struct, exp.Struct) or len(struct.expressions) != len(field_names):
1039
+ continue
1040
+
1041
+ # Convert unnamed expressions to PropertyEQ with inherited names
1042
+ new_expressions = []
1043
+ for i, expr in enumerate(struct.expressions):
1044
+ if not isinstance(expr, exp.PropertyEQ):
1045
+ # Create PropertyEQ: field_name := value
1046
+ new_expressions.append(
1047
+ exp.PropertyEQ(
1048
+ this=exp.Identifier(this=field_names[i].copy()), expression=expr
1049
+ )
1050
+ )
1051
+ else:
1052
+ new_expressions.append(expr)
1053
+
1054
+ struct.set("expressions", new_expressions)
1055
+
1056
+ return expression