sqlglot 27.13.1__py3-none-any.whl → 27.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlglot/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '27.13.1'
32
- __version_tuple__ = version_tuple = (27, 13, 1)
31
+ __version__ = version = '27.14.0'
32
+ __version_tuple__ = version_tuple = (27, 14, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -859,10 +859,12 @@ class BigQuery(Dialect):
859
859
  exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
860
860
  ),
861
861
  "MAKE_INTERVAL": lambda self: self._parse_make_interval(),
862
- "PREDICT": lambda self: self._parse_predict(),
862
+ "PREDICT": lambda self: self._parse_ml(exp.Predict),
863
+ "TRANSLATE": lambda self: self._parse_translate(),
863
864
  "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(),
864
- "GENERATE_EMBEDDING": lambda self: self._parse_generate_embedding(),
865
+ "GENERATE_EMBEDDING": lambda self: self._parse_ml(exp.GenerateEmbedding),
865
866
  "VECTOR_SEARCH": lambda self: self._parse_vector_search(),
867
+ "FORECAST": lambda self: self._parse_ml(exp.MLForecast),
866
868
  }
867
869
  FUNCTION_PARSERS.pop("TRIM")
868
870
 
@@ -1146,34 +1148,35 @@ class BigQuery(Dialect):
1146
1148
 
1147
1149
  return expr
1148
1150
 
1149
- def _parse_predict(self) -> exp.Predict:
1151
+ def _parse_ml(self, expr_type: t.Type[E]) -> E:
1150
1152
  self._match_text_seq("MODEL")
1151
1153
  this = self._parse_table()
1152
1154
 
1153
1155
  self._match(TokenType.COMMA)
1154
1156
  self._match_text_seq("TABLE")
1155
1157
 
1156
- return self.expression(
1157
- exp.Predict,
1158
- this=this,
1159
- expression=self._parse_table(),
1160
- params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
1158
+ # Certain functions like ML.FORECAST require a STRUCT argument but not a TABLE/SELECT one
1159
+ expression = (
1160
+ self._parse_table() if not self._match(TokenType.STRUCT, advance=False) else None
1161
1161
  )
1162
1162
 
1163
- def _parse_generate_embedding(self) -> exp.GenerateEmbedding:
1164
- self._match_text_seq("MODEL")
1165
- this = self._parse_table()
1166
-
1167
1163
  self._match(TokenType.COMMA)
1168
- self._match_text_seq("TABLE")
1169
1164
 
1170
1165
  return self.expression(
1171
- exp.GenerateEmbedding,
1166
+ expr_type,
1172
1167
  this=this,
1173
- expression=self._parse_table(),
1174
- params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
1168
+ expression=expression,
1169
+ params_struct=self._parse_bitwise(),
1175
1170
  )
1176
1171
 
1172
+ def _parse_translate(self) -> exp.Translate | exp.MLTranslate:
1173
+ # Check if this is ML.TRANSLATE by looking at previous tokens
1174
+ token = seq_get(self._tokens, self._index - 4)
1175
+ if token and token.text.upper() == "ML":
1176
+ return self._parse_ml(exp.MLTranslate)
1177
+
1178
+ return exp.Translate.from_arg_list(self._parse_function_args())
1179
+
1177
1180
  def _parse_features_at_time(self) -> exp.FeaturesAtTime:
1178
1181
  self._match(TokenType.TABLE)
1179
1182
  this = self._parse_table()
@@ -189,6 +189,7 @@ def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> st
189
189
 
190
190
 
191
191
  class ClickHouse(Dialect):
192
+ INDEX_OFFSET = 1
192
193
  NORMALIZE_FUNCTIONS: bool | str = False
193
194
  NULL_ORDERING = "nulls_are_last"
194
195
  SUPPORTS_USER_DEFINED_TYPES = False
@@ -674,6 +674,7 @@ class Dialect(metaclass=_Dialect):
674
674
  exp.DataType.Type.BOOLEAN: {
675
675
  exp.Between,
676
676
  exp.Boolean,
677
+ exp.Contains,
677
678
  exp.EndsWith,
678
679
  exp.In,
679
680
  exp.LogicalAnd,
@@ -818,7 +819,6 @@ class Dialect(metaclass=_Dialect):
818
819
  exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
819
820
  exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"),
820
821
  exp.Coalesce: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
821
- exp.Contains: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
822
822
  exp.Count: lambda self, e: self._annotate_with_type(
823
823
  e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT
824
824
  ),
@@ -365,6 +365,9 @@ class DuckDB(Dialect):
365
365
  "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)),
366
366
  "ARRAY_REVERSE_SORT": _build_sort_array_desc,
367
367
  "ARRAY_SORT": exp.SortArray.from_arg_list,
368
+ "BIT_AND": exp.BitwiseAndAgg.from_arg_list,
369
+ "BIT_OR": exp.BitwiseOrAgg.from_arg_list,
370
+ "BIT_XOR": exp.BitwiseXorAgg.from_arg_list,
368
371
  "DATEDIFF": _build_date_diff,
369
372
  "DATE_DIFF": _build_date_diff,
370
373
  "DATE_TRUNC": date_trunc_to_time,
@@ -649,7 +652,10 @@ class DuckDB(Dialect):
649
652
  exp.ArrayUniqueAgg: lambda self, e: self.func(
650
653
  "LIST", exp.Distinct(expressions=[e.this])
651
654
  ),
655
+ exp.BitwiseAndAgg: rename_func("BIT_AND"),
656
+ exp.BitwiseOrAgg: rename_func("BIT_OR"),
652
657
  exp.BitwiseXor: rename_func("XOR"),
658
+ exp.BitwiseXorAgg: rename_func("BIT_XOR"),
653
659
  exp.CommentColumnConstraint: no_comment_column_constraint_sql,
654
660
  exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"),
655
661
  exp.CurrentDate: lambda *_: "CURRENT_DATE",
@@ -995,13 +1001,18 @@ class DuckDB(Dialect):
995
1001
 
996
1002
  def join_sql(self, expression: exp.Join) -> str:
997
1003
  if (
998
- expression.side == "LEFT"
1004
+ not expression.args.get("using")
999
1005
  and not expression.args.get("on")
1000
- and isinstance(expression.this, exp.Unnest)
1006
+ and not expression.method
1007
+ and (expression.kind in ("", "INNER", "OUTER"))
1001
1008
  ):
1002
- # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause
1009
+ # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause
1003
1010
  # DuckDB doesn't, but we can just add a dummy ON clause that is always true
1004
- return super().join_sql(expression.on(exp.true()))
1011
+ if isinstance(expression.this, exp.Unnest):
1012
+ return super().join_sql(expression.on(exp.true()))
1013
+
1014
+ expression.args.pop("side", None)
1015
+ expression.args.pop("kind", None)
1005
1016
 
1006
1017
  return super().join_sql(expression)
1007
1018
 
@@ -125,7 +125,7 @@ class Fabric(TSQL):
125
125
  exp.DataType.Type.TIMESTAMPTZ: "DATETIME2",
126
126
  exp.DataType.Type.TINYINT: "SMALLINT",
127
127
  exp.DataType.Type.UTINYINT: "SMALLINT",
128
- exp.DataType.Type.UUID: "VARBINARY(MAX)",
128
+ exp.DataType.Type.UUID: "UNIQUEIDENTIFIER",
129
129
  exp.DataType.Type.XML: "VARCHAR",
130
130
  }
131
131
 
sqlglot/dialects/hive.py CHANGED
@@ -309,6 +309,7 @@ class Hive(Dialect):
309
309
  STRICT_CAST = False
310
310
  VALUES_FOLLOWED_BY_PAREN = False
311
311
  JOINS_HAVE_EQUAL_PRECEDENCE = True
312
+ ADD_JOIN_ON_TRUE = True
312
313
 
313
314
  FUNCTIONS = {
314
315
  **parser.Parser.FUNCTIONS,
sqlglot/dialects/mysql.py CHANGED
@@ -579,9 +579,11 @@ class MySQL(Dialect):
579
579
  full: t.Optional[bool] = None,
580
580
  global_: t.Optional[bool] = None,
581
581
  ) -> exp.Show:
582
+ json = self._match_text_seq("JSON")
583
+
582
584
  if target:
583
585
  if isinstance(target, str):
584
- self._match_text_seq(target)
586
+ self._match_text_seq(*target.split(" "))
585
587
  target_id = self._parse_id_var()
586
588
  else:
587
589
  target_id = None
@@ -618,6 +620,12 @@ class MySQL(Dialect):
618
620
  mutex = True if self._match_text_seq("MUTEX") else None
619
621
  mutex = False if self._match_text_seq("STATUS") else mutex
620
622
 
623
+ for_table = self._parse_id_var() if self._match_text_seq("FOR", "TABLE") else None
624
+ for_group = self._parse_string() if self._match_text_seq("FOR", "GROUP") else None
625
+ for_user = self._parse_string() if self._match_text_seq("FOR", "USER") else None
626
+ for_role = self._parse_string() if self._match_text_seq("FOR", "ROLE") else None
627
+ into_outfile = self._parse_string() if self._match_text_seq("INTO", "OUTFILE") else None
628
+
621
629
  return self.expression(
622
630
  exp.Show,
623
631
  this=this,
@@ -634,6 +642,12 @@ class MySQL(Dialect):
634
642
  offset=offset,
635
643
  limit=limit,
636
644
  mutex=mutex,
645
+ for_table=for_table,
646
+ for_group=for_group,
647
+ for_user=for_user,
648
+ for_role=for_role,
649
+ into_outfile=into_outfile,
650
+ json=json,
637
651
  **{"global": global_}, # type: ignore
638
652
  )
639
653
 
@@ -1200,6 +1214,10 @@ class MySQL(Dialect):
1200
1214
  target = f" FROM{target}"
1201
1215
  elif expression.name == "GRANTS":
1202
1216
  target = f" FOR{target}"
1217
+ elif expression.name in ("LINKS", "PARTITIONS"):
1218
+ target = f" ON{target}" if target else ""
1219
+ elif expression.name == "PROJECTIONS":
1220
+ target = f" ON TABLE{target}" if target else ""
1203
1221
 
1204
1222
  db = self._prefixed_sql("FROM", expression, "db")
1205
1223
 
@@ -1227,7 +1245,14 @@ class MySQL(Dialect):
1227
1245
  else:
1228
1246
  mutex_or_status = ""
1229
1247
 
1230
- return f"SHOW{full}{global_}{this}{target}{types}{db}{query}{log}{position}{channel}{mutex_or_status}{like}{where}{offset}{limit}"
1248
+ for_table = self._prefixed_sql("FOR TABLE", expression, "for_table")
1249
+ for_group = self._prefixed_sql("FOR GROUP", expression, "for_group")
1250
+ for_user = self._prefixed_sql("FOR USER", expression, "for_user")
1251
+ for_role = self._prefixed_sql("FOR ROLE", expression, "for_role")
1252
+ into_outfile = self._prefixed_sql("INTO OUTFILE", expression, "into_outfile")
1253
+ json = " JSON" if expression.args.get("json") else ""
1254
+
1255
+ return f"SHOW{full}{global_}{this}{json}{target}{for_table}{types}{db}{query}{log}{position}{channel}{mutex_or_status}{like}{where}{offset}{limit}{for_group}{for_user}{for_role}{into_outfile}"
1231
1256
 
1232
1257
  def altercolumn_sql(self, expression: exp.AlterColumn) -> str:
1233
1258
  dtype = self.sql(expression, "dtype")
@@ -325,6 +325,9 @@ class Postgres(Dialect):
325
325
  "@@": TokenType.DAT,
326
326
  "@>": TokenType.AT_GT,
327
327
  "<@": TokenType.LT_AT,
328
+ "?&": TokenType.QMARK_AMP,
329
+ "?|": TokenType.QMARK_PIPE,
330
+ "#-": TokenType.HASH_DASH,
328
331
  "|/": TokenType.PIPE_SLASH,
329
332
  "||/": TokenType.DPIPE_SLASH,
330
333
  "BEGIN": TokenType.BEGIN,
@@ -386,6 +389,9 @@ class Postgres(Dialect):
386
389
 
387
390
  FUNCTIONS = {
388
391
  **parser.Parser.FUNCTIONS,
392
+ "BIT_AND": exp.BitwiseAndAgg.from_arg_list,
393
+ "BIT_OR": exp.BitwiseOrAgg.from_arg_list,
394
+ "BIT_XOR": exp.BitwiseXorAgg.from_arg_list,
389
395
  "DATE_TRUNC": build_timestamp_trunc,
390
396
  "DIV": lambda args: exp.cast(
391
397
  binary_from_function(exp.IntDiv)(args), exp.DataType.Type.DECIMAL
@@ -584,7 +590,10 @@ class Postgres(Dialect):
584
590
  exp.AnyValue: _versioned_anyvalue_sql,
585
591
  exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"),
586
592
  exp.ArrayFilter: filter_array_using_unnest,
593
+ exp.BitwiseAndAgg: rename_func("BIT_AND"),
594
+ exp.BitwiseOrAgg: rename_func("BIT_OR"),
587
595
  exp.BitwiseXor: lambda self, e: self.binary(e, "#"),
596
+ exp.BitwiseXorAgg: rename_func("BIT_XOR"),
588
597
  exp.ColumnDef: transforms.preprocess([_auto_increment_to_serial, _serial_to_generated]),
589
598
  exp.CurrentDate: no_paren_current_date_sql,
590
599
  exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
@@ -162,6 +162,7 @@ class Redshift(Postgres):
162
162
  ALTER_SET_TYPE = "TYPE"
163
163
  SUPPORTS_DECODE_CASE = True
164
164
  SUPPORTS_BETWEEN_FLAGS = False
165
+ LIMIT_FETCH = "LIMIT"
165
166
 
166
167
  # Redshift doesn't have `WITH` as part of their with_properties so we remove it
167
168
  WITH_PROPERTIES_PREFIX = " "
@@ -16,7 +16,7 @@ from sqlglot.dialects.dialect import (
16
16
  date_add_interval_sql,
17
17
  timestampdiff_sql,
18
18
  )
19
- from sqlglot.dialects.mysql import MySQL, _remove_ts_or_ds_to_date, date_add_sql
19
+ from sqlglot.dialects.mysql import MySQL, _remove_ts_or_ds_to_date, date_add_sql, _show_parser
20
20
  from sqlglot.expressions import DataType
21
21
  from sqlglot.generator import unsupported_args
22
22
  from sqlglot.helper import seq_get
@@ -55,6 +55,17 @@ class SingleStore(MySQL):
55
55
  "FF6": "%f", # only 6 digits are supported in python formats
56
56
  }
57
57
 
58
+ VECTOR_TYPE_ALIASES = {
59
+ "I8": "TINYINT",
60
+ "I16": "SMALLINT",
61
+ "I32": "INT",
62
+ "I64": "BIGINT",
63
+ "F32": "FLOAT",
64
+ "F64": "DOUBLE",
65
+ }
66
+
67
+ INVERSE_VECTOR_TYPE_ALIASES = {v: k for k, v in VECTOR_TYPE_ALIASES.items()}
68
+
58
69
  class Tokenizer(MySQL.Tokenizer):
59
70
  BYTE_STRINGS = [("e'", "'"), ("E'", "'")]
60
71
 
@@ -249,6 +260,63 @@ class SingleStore(MySQL):
249
260
  COLUMN_OPERATORS.pop(TokenType.DHASH_ARROW)
250
261
  COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER)
251
262
 
263
+ SHOW_PARSERS = {
264
+ **MySQL.Parser.SHOW_PARSERS,
265
+ "AGGREGATES": _show_parser("AGGREGATES"),
266
+ "CDC EXTRACTOR POOL": _show_parser("CDC EXTRACTOR POOL"),
267
+ "CREATE AGGREGATE": _show_parser("CREATE AGGREGATE", target=True),
268
+ "CREATE PIPELINE": _show_parser("CREATE PIPELINE", target=True),
269
+ "CREATE PROJECTION": _show_parser("CREATE PROJECTION", target=True),
270
+ "DATABASE STATUS": _show_parser("DATABASE STATUS"),
271
+ "DISTRIBUTED_PLANCACHE STATUS": _show_parser("DISTRIBUTED_PLANCACHE STATUS"),
272
+ "FULLTEXT SERVICE METRICS LOCAL": _show_parser("FULLTEXT SERVICE METRICS LOCAL"),
273
+ "FULLTEXT SERVICE METRICS FOR NODE": _show_parser(
274
+ "FULLTEXT SERVICE METRICS FOR NODE", target=True
275
+ ),
276
+ "FULLTEXT SERVICE STATUS": _show_parser("FULLTEXT SERVICE STATUS"),
277
+ "FUNCTIONS": _show_parser("FUNCTIONS"),
278
+ "GROUPS": _show_parser("GROUPS"),
279
+ "GROUPS FOR ROLE": _show_parser("GROUPS FOR ROLE", target=True),
280
+ "GROUPS FOR USER": _show_parser("GROUPS FOR USER", target=True),
281
+ "INDEXES": _show_parser("INDEX", target="FROM"),
282
+ "KEYS": _show_parser("INDEX", target="FROM"),
283
+ "LINKS": _show_parser("LINKS", target="ON"),
284
+ "LOAD ERRORS": _show_parser("LOAD ERRORS"),
285
+ "LOAD WARNINGS": _show_parser("LOAD WARNINGS"),
286
+ "PARTITIONS": _show_parser("PARTITIONS", target="ON"),
287
+ "PIPELINES": _show_parser("PIPELINES"),
288
+ "PLAN": _show_parser("PLAN", target=True),
289
+ "PLANCACHE": _show_parser("PLANCACHE"),
290
+ "PROCEDURES": _show_parser("PROCEDURES"),
291
+ "PROJECTIONS": _show_parser("PROJECTIONS", target="ON TABLE"),
292
+ "REPLICATION STATUS": _show_parser("REPLICATION STATUS"),
293
+ "REPRODUCTION": _show_parser("REPRODUCTION"),
294
+ "RESOURCE POOLS": _show_parser("RESOURCE POOLS"),
295
+ "ROLES": _show_parser("ROLES"),
296
+ "ROLES FOR USER": _show_parser("ROLES FOR USER", target=True),
297
+ "ROLES FOR GROUP": _show_parser("ROLES FOR GROUP", target=True),
298
+ "STATUS EXTENDED": _show_parser("STATUS EXTENDED"),
299
+ "USERS": _show_parser("USERS"),
300
+ "USERS FOR ROLE": _show_parser("USERS FOR ROLE", target=True),
301
+ "USERS FOR GROUP": _show_parser("USERS FOR GROUP", target=True),
302
+ }
303
+
304
+ ALTER_PARSERS = {
305
+ **MySQL.Parser.ALTER_PARSERS,
306
+ "CHANGE": lambda self: self.expression(
307
+ exp.RenameColumn, this=self._parse_column(), to=self._parse_column()
308
+ ),
309
+ }
310
+
311
+ def _parse_vector_expressions(
312
+ self, expressions: t.List[exp.Expression]
313
+ ) -> t.List[exp.Expression]:
314
+ type_name = expressions[1].name.upper()
315
+ if type_name in self.dialect.VECTOR_TYPE_ALIASES:
316
+ type_name = self.dialect.VECTOR_TYPE_ALIASES[type_name]
317
+
318
+ return [exp.DataType.build(type_name, dialect=self.dialect), expressions[0]]
319
+
252
320
  class Generator(MySQL.Generator):
253
321
  SUPPORTS_UESCAPE = False
254
322
  NULL_ORDERING_SUPPORTED = True
@@ -467,6 +535,29 @@ class SingleStore(MySQL):
467
535
  exp.MatchAgainst: unsupported_args("modifier")(
468
536
  lambda self, e: super().matchagainst_sql(e)
469
537
  ),
538
+ exp.Show: unsupported_args(
539
+ "history",
540
+ "terse",
541
+ "offset",
542
+ "starts_with",
543
+ "limit",
544
+ "from",
545
+ "scope",
546
+ "scope_kind",
547
+ "mutex",
548
+ "query",
549
+ "channel",
550
+ "log",
551
+ "types",
552
+ "privileges",
553
+ )(lambda self, e: super().show_sql(e)),
554
+ exp.Describe: unsupported_args(
555
+ "style",
556
+ "kind",
557
+ "expressions",
558
+ "partition",
559
+ "format",
560
+ )(lambda self, e: super().describe_sql(e)),
470
561
  }
471
562
  TRANSFORMS.pop(exp.JSONExtractScalar)
472
563
  TRANSFORMS.pop(exp.CurrentDate)
@@ -1685,6 +1776,14 @@ class SingleStore(MySQL):
1685
1776
  return f"DECIMAL({precision}, {scale[0]})"
1686
1777
  else:
1687
1778
  return f"DECIMAL({precision})"
1779
+ if expression.is_type(exp.DataType.Type.VECTOR):
1780
+ expressions = expression.expressions
1781
+ if len(expressions) == 2:
1782
+ type_name = self.sql(expressions[0])
1783
+ if type_name in self.dialect.INVERSE_VECTOR_TYPE_ALIASES:
1784
+ type_name = self.dialect.INVERSE_VECTOR_TYPE_ALIASES[type_name]
1785
+
1786
+ return f"VECTOR({self.sql(expressions[1])}, {type_name})"
1688
1787
 
1689
1788
  return super().datatype_sql(expression)
1690
1789
 
@@ -1741,3 +1840,25 @@ class SingleStore(MySQL):
1741
1840
 
1742
1841
  self.unsupported("STANDARD_HASH function is not supported in SingleStore")
1743
1842
  return self.func("SHA", expression.this)
1843
+
1844
+ @unsupported_args("is_database", "exists", "cluster", "identity", "option", "partition")
1845
+ def truncatetable_sql(self, expression: exp.TruncateTable) -> str:
1846
+ statements = []
1847
+ for expression in expression.expressions:
1848
+ statements.append(f"TRUNCATE {self.sql(expression)}")
1849
+
1850
+ return "; ".join(statements)
1851
+
1852
+ @unsupported_args("exists")
1853
+ def renamecolumn_sql(self, expression: exp.RenameColumn) -> str:
1854
+ old_column = self.sql(expression, "this")
1855
+ new_column = self.sql(expression, "to")
1856
+ return f"CHANGE {old_column} {new_column}"
1857
+
1858
+ @unsupported_args("drop", "comment", "allow_null", "visible", "using")
1859
+ def altercolumn_sql(self, expression: exp.AlterColumn) -> str:
1860
+ alter = super().altercolumn_sql(expression)
1861
+
1862
+ collate = self.sql(expression, "collate")
1863
+ collate = f" COLLATE {collate}" if collate else ""
1864
+ return f"{alter}{collate}"
@@ -32,6 +32,7 @@ from sqlglot.dialects.dialect import (
32
32
  )
33
33
  from sqlglot.generator import unsupported_args
34
34
  from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get
35
+ from sqlglot.optimizer.annotate_types import TypeAnnotator
35
36
  from sqlglot.optimizer.scope import build_scope, find_all_in_scope
36
37
  from sqlglot.tokens import TokenType
37
38
 
@@ -482,6 +483,15 @@ def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression:
482
483
  return expression
483
484
 
484
485
 
486
+ def _annotate_reverse(self: TypeAnnotator, expression: exp.Reverse) -> exp.Reverse:
487
+ expression = self._annotate_by_args(expression, "this")
488
+ if expression.is_type(exp.DataType.Type.NULL):
489
+ # Snowflake treats REVERSE(NULL) as a VARCHAR
490
+ self._set_type(expression, exp.DataType.Type.VARCHAR)
491
+
492
+ return expression
493
+
494
+
485
495
  class Snowflake(Dialect):
486
496
  # https://docs.snowflake.com/en/sql-reference/identifiers-syntax
487
497
  NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE
@@ -500,9 +510,17 @@ class Snowflake(Dialect):
500
510
  **Dialect.ANNOTATORS,
501
511
  **{
502
512
  expr_type: lambda self, e: self._annotate_by_args(e, "this")
503
- for expr_type in (exp.Reverse,)
513
+ for expr_type in (
514
+ exp.Left,
515
+ exp.Right,
516
+ exp.Substring,
517
+ )
504
518
  },
505
519
  exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"),
520
+ exp.Length: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.INT),
521
+ exp.Replace: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
522
+ exp.Reverse: _annotate_reverse,
523
+ exp.Space: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
506
524
  }
507
525
 
508
526
  TIME_MAPPING = {
@@ -664,7 +682,8 @@ class Snowflake(Dialect):
664
682
  "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ),
665
683
  "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP),
666
684
  "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ),
667
- "TO_VARCHAR": exp.ToChar.from_arg_list,
685
+ "TO_VARCHAR": build_timetostr_or_tochar,
686
+ "TO_JSON": exp.JSONFormat.from_arg_list,
668
687
  "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list,
669
688
  "ZEROIFNULL": _build_if_from_zeroifnull,
670
689
  }
@@ -1161,7 +1180,6 @@ class Snowflake(Dialect):
1161
1180
  "RM": TokenType.COMMAND,
1162
1181
  "SAMPLE": TokenType.TABLE_SAMPLE,
1163
1182
  "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW,
1164
- "SESSION": TokenType.SESSION,
1165
1183
  "SQL_DOUBLE": TokenType.DOUBLE,
1166
1184
  "SQL_VARCHAR": TokenType.VARCHAR,
1167
1185
  "STAGE": TokenType.STAGE,
@@ -1274,6 +1292,7 @@ class Snowflake(Dialect):
1274
1292
  exp.ParseJSON: lambda self, e: self.func(
1275
1293
  "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this
1276
1294
  ),
1295
+ exp.JSONFormat: rename_func("TO_JSON"),
1277
1296
  exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
1278
1297
  exp.PercentileCont: transforms.preprocess(
1279
1298
  [transforms.add_within_group_for_percentiles]
@@ -1298,6 +1317,8 @@ class Snowflake(Dialect):
1298
1317
  ]
1299
1318
  ),
1300
1319
  exp.SHA: rename_func("SHA1"),
1320
+ exp.MD5Digest: rename_func("MD5_BINARY"),
1321
+ exp.LowerHex: rename_func("TO_CHAR"),
1301
1322
  exp.SortArray: rename_func("ARRAY_SORT"),
1302
1323
  exp.StarMap: rename_func("OBJECT_CONSTRUCT"),
1303
1324
  exp.StartsWith: rename_func("STARTSWITH"),
@@ -1345,9 +1366,10 @@ class Snowflake(Dialect):
1345
1366
 
1346
1367
  TYPE_MAPPING = {
1347
1368
  **generator.Generator.TYPE_MAPPING,
1369
+ exp.DataType.Type.BIGDECIMAL: "DOUBLE",
1348
1370
  exp.DataType.Type.NESTED: "OBJECT",
1349
1371
  exp.DataType.Type.STRUCT: "OBJECT",
1350
- exp.DataType.Type.BIGDECIMAL: "DOUBLE",
1372
+ exp.DataType.Type.TEXT: "VARCHAR",
1351
1373
  }
1352
1374
 
1353
1375
  TOKEN_MAPPING = {
@@ -110,6 +110,7 @@ class SQLite(Dialect):
110
110
  STRING_ALIASES = True
111
111
  ALTER_RENAME_REQUIRES_COLUMN = False
112
112
  JOINS_HAVE_EQUAL_PRECEDENCE = True
113
+ ADD_JOIN_ON_TRUE = True
113
114
 
114
115
  FUNCTIONS = {
115
116
  **parser.Parser.FUNCTIONS,
sqlglot/expressions.py CHANGED
@@ -134,6 +134,11 @@ class Expression(metaclass=_Expression):
134
134
 
135
135
  return hash((self.__class__, self.hashable_args))
136
136
 
137
+ def __reduce__(self) -> t.Tuple[t.Callable, t.Tuple[t.Dict[str, t.Any]]]:
138
+ from sqlglot.serde import dump, load
139
+
140
+ return (load, (dump(self),))
141
+
137
142
  @property
138
143
  def this(self) -> t.Any:
139
144
  """
@@ -1646,6 +1651,12 @@ class Show(Expression):
1646
1651
  "position": False,
1647
1652
  "types": False,
1648
1653
  "privileges": False,
1654
+ "for_table": False,
1655
+ "for_group": False,
1656
+ "for_user": False,
1657
+ "for_role": False,
1658
+ "into_outfile": False,
1659
+ "json": False,
1649
1660
  }
1650
1661
 
1651
1662
 
@@ -2197,7 +2208,7 @@ class Copy(DML):
2197
2208
  arg_types = {
2198
2209
  "this": True,
2199
2210
  "kind": True,
2200
- "files": True,
2211
+ "files": False,
2201
2212
  "credentials": False,
2202
2213
  "format": False,
2203
2214
  "params": False,
@@ -6694,11 +6705,26 @@ class JSONBContains(Binary, Func):
6694
6705
  _sql_names = ["JSONB_CONTAINS"]
6695
6706
 
6696
6707
 
6708
+ # https://www.postgresql.org/docs/9.5/functions-json.html
6709
+ class JSONBContainsAnyTopKeys(Binary, Func):
6710
+ pass
6711
+
6712
+
6713
+ # https://www.postgresql.org/docs/9.5/functions-json.html
6714
+ class JSONBContainsAllTopKeys(Binary, Func):
6715
+ pass
6716
+
6717
+
6697
6718
  class JSONBExists(Func):
6698
6719
  arg_types = {"this": True, "path": True}
6699
6720
  _sql_names = ["JSONB_EXISTS"]
6700
6721
 
6701
6722
 
6723
+ # https://www.postgresql.org/docs/9.5/functions-json.html
6724
+ class JSONBDeleteAtPath(Binary, Func):
6725
+ pass
6726
+
6727
+
6702
6728
  class JSONExtract(Binary, Func):
6703
6729
  arg_types = {
6704
6730
  "this": True,
@@ -6963,6 +6989,11 @@ class Predict(Func):
6963
6989
  arg_types = {"this": True, "expression": True, "params_struct": False}
6964
6990
 
6965
6991
 
6992
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-translate#mltranslate_function
6993
+ class MLTranslate(Func):
6994
+ arg_types = {"this": True, "expression": True, "params_struct": True}
6995
+
6996
+
6966
6997
  # https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-feature-time
6967
6998
  class FeaturesAtTime(Func):
6968
6999
  arg_types = {"this": True, "time": False, "num_rows": False, "ignore_feature_nulls": False}
@@ -6973,6 +7004,10 @@ class GenerateEmbedding(Func):
6973
7004
  arg_types = {"this": True, "expression": True, "params_struct": False}
6974
7005
 
6975
7006
 
7007
+ class MLForecast(Func):
7008
+ arg_types = {"this": True, "expression": False, "params_struct": False}
7009
+
7010
+
6976
7011
  # https://cloud.google.com/bigquery/docs/reference/standard-sql/search_functions#vector_search
6977
7012
  class VectorSearch(Func):
6978
7013
  arg_types = {
sqlglot/generator.py CHANGED
@@ -160,6 +160,9 @@ class Generator(metaclass=_Generator):
160
160
  exp.Intersect: lambda self, e: self.set_operations(e),
161
161
  exp.IntervalSpan: lambda self, e: f"{self.sql(e, 'this')} TO {self.sql(e, 'expression')}",
162
162
  exp.Int64: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.BIGINT)),
163
+ exp.JSONBContainsAnyTopKeys: lambda self, e: self.binary(e, "?|"),
164
+ exp.JSONBContainsAllTopKeys: lambda self, e: self.binary(e, "?&"),
165
+ exp.JSONBDeleteAtPath: lambda self, e: self.binary(e, "#-"),
163
166
  exp.LanguageProperty: lambda self, e: self.naked_property(e),
164
167
  exp.LocationProperty: lambda self, e: self.naked_property(e),
165
168
  exp.LogProperty: lambda _, e: f"{'NO ' if e.args.get('no') else ''}LOG",
@@ -4214,21 +4217,31 @@ class Generator(metaclass=_Generator):
4214
4217
  def opclass_sql(self, expression: exp.Opclass) -> str:
4215
4218
  return f"{self.sql(expression, 'this')} {self.sql(expression, 'expression')}"
4216
4219
 
4217
- def predict_sql(self, expression: exp.Predict) -> str:
4220
+ def _ml_sql(self, expression: exp.Func, name: str) -> str:
4218
4221
  model = self.sql(expression, "this")
4219
4222
  model = f"MODEL {model}"
4220
- table = self.sql(expression, "expression")
4221
- table = f"TABLE {table}" if not isinstance(expression.expression, exp.Subquery) else table
4222
- parameters = self.sql(expression, "params_struct")
4223
- return self.func("PREDICT", model, table, parameters or None)
4223
+ expr = expression.expression
4224
+ if expr:
4225
+ expr_sql = self.sql(expression, "expression")
4226
+ expr_sql = f"TABLE {expr_sql}" if not isinstance(expr, exp.Subquery) else expr_sql
4227
+ else:
4228
+ expr_sql = None
4229
+
4230
+ parameters = self.sql(expression, "params_struct") or None
4231
+
4232
+ return self.func(name, model, expr_sql, parameters)
4233
+
4234
+ def predict_sql(self, expression: exp.Predict) -> str:
4235
+ return self._ml_sql(expression, "PREDICT")
4224
4236
 
4225
4237
  def generateembedding_sql(self, expression: exp.GenerateEmbedding) -> str:
4226
- model = self.sql(expression, "this")
4227
- model = f"MODEL {model}"
4228
- table = self.sql(expression, "expression")
4229
- table = f"TABLE {table}" if not isinstance(expression.expression, exp.Subquery) else table
4230
- parameters = self.sql(expression, "params_struct")
4231
- return self.func("GENERATE_EMBEDDING", model, table, parameters or None)
4238
+ return self._ml_sql(expression, "GENERATE_EMBEDDING")
4239
+
4240
+ def mltranslate_sql(self, expression: exp.MLTranslate) -> str:
4241
+ return self._ml_sql(expression, "TRANSLATE")
4242
+
4243
+ def mlforecast_sql(self, expression: exp.MLForecast) -> str:
4244
+ return self._ml_sql(expression, "FORECAST")
4232
4245
 
4233
4246
  def featuresattime_sql(self, expression: exp.FeaturesAtTime) -> str:
4234
4247
  this_sql = self.sql(expression, "this")
@@ -4579,8 +4592,8 @@ class Generator(metaclass=_Generator):
4579
4592
 
4580
4593
  credentials = self.sql(expression, "credentials")
4581
4594
  credentials = self.seg(credentials) if credentials else ""
4582
- kind = self.seg("FROM" if expression.args.get("kind") else "TO")
4583
4595
  files = self.expressions(expression, key="files", flat=True)
4596
+ kind = self.seg("FROM" if expression.args.get("kind") else "TO") if files else ""
4584
4597
 
4585
4598
  sep = ", " if self.dialect.COPY_PARAMS_ARE_CSV else " "
4586
4599
  params = self.expressions(
@@ -4596,7 +4609,7 @@ class Generator(metaclass=_Generator):
4596
4609
  if params:
4597
4610
  if self.COPY_PARAMS_ARE_WRAPPED:
4598
4611
  params = f" WITH ({params})"
4599
- elif not self.pretty:
4612
+ elif not self.pretty and (files or credentials):
4600
4613
  params = f" {params}"
4601
4614
 
4602
4615
  return f"COPY{this}{kind} {files}{credentials}{params}"
@@ -193,6 +193,12 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
193
193
  # Caches the ids of annotated sub-Expressions, to ensure we only visit them once
194
194
  self._visited: t.Set[int] = set()
195
195
 
196
+ # Caches NULL-annotated expressions to set them to UNKNOWN after type inference is completed
197
+ self._null_expressions: t.Dict[int, exp.Expression] = {}
198
+
199
+ # Databricks and Spark ≥v3 actually support NULL (i.e., VOID) as a type
200
+ self._supports_null_type = schema.dialect in ("databricks", "spark")
201
+
196
202
  # Maps an exp.SetOperation's id (e.g. UNION) to its projection types. This is computed if the
197
203
  # exp.SetOperation is the expression of a scope source, as selecting from it multiple times
198
204
  # would reprocess the entire subtree to coerce the types of its operands' projections
@@ -201,13 +207,33 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
201
207
  def _set_type(
202
208
  self, expression: exp.Expression, target_type: t.Optional[exp.DataType | exp.DataType.Type]
203
209
  ) -> None:
210
+ prev_type = expression.type
211
+ expression_id = id(expression)
212
+
204
213
  expression.type = target_type or exp.DataType.Type.UNKNOWN # type: ignore
205
- self._visited.add(id(expression))
214
+ self._visited.add(expression_id)
215
+
216
+ if (
217
+ not self._supports_null_type
218
+ and t.cast(exp.DataType, expression.type).this == exp.DataType.Type.NULL
219
+ ):
220
+ self._null_expressions[expression_id] = expression
221
+ elif prev_type and t.cast(exp.DataType, prev_type).this == exp.DataType.Type.NULL:
222
+ self._null_expressions.pop(expression_id, None)
206
223
 
207
224
  def annotate(self, expression: E) -> E:
208
225
  for scope in traverse_scope(expression):
209
226
  self.annotate_scope(scope)
210
- return self._maybe_annotate(expression) # This takes care of non-traversable expressions
227
+
228
+ # This takes care of non-traversable expressions
229
+ expression = self._maybe_annotate(expression)
230
+
231
+ # Replace NULL type with UNKNOWN, since the former is not an actual type;
232
+ # it is mostly used to aid type coercion, e.g. in query set operations.
233
+ for expr in self._null_expressions.values():
234
+ expr.type = exp.DataType.Type.UNKNOWN
235
+
236
+ return expression
211
237
 
212
238
  def annotate_scope(self, scope: Scope) -> None:
213
239
  selects = {}
sqlglot/parser.py CHANGED
@@ -569,6 +569,7 @@ class Parser(metaclass=_Parser):
569
569
  TokenType.USE,
570
570
  TokenType.VOLATILE,
571
571
  TokenType.WINDOW,
572
+ *ALTERABLES,
572
573
  *CREATABLES,
573
574
  *SUBQUERY_PREDICATES,
574
575
  *TYPE_TOKENS,
@@ -941,6 +942,9 @@ class Parser(metaclass=_Parser):
941
942
  TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
942
943
  TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
943
944
  TokenType.FOR: lambda self, this: self._parse_comprehension(this),
945
+ TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys),
946
+ TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys),
947
+ TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath),
944
948
  }
945
949
 
946
950
  PIPE_SYNTAX_TRANSFORM_PARSERS = {
@@ -1554,6 +1558,10 @@ class Parser(metaclass=_Parser):
1554
1558
  # is true for Snowflake but not for BigQuery which can also process strings
1555
1559
  JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False
1556
1560
 
1561
+ # Dialects like Databricks support JOINS without join criteria
1562
+ # Adding an ON TRUE, makes transpilation semantically correct for other dialects
1563
+ ADD_JOIN_ON_TRUE = False
1564
+
1557
1565
  __slots__ = (
1558
1566
  "error_level",
1559
1567
  "error_message_context",
@@ -3204,9 +3212,10 @@ class Parser(metaclass=_Parser):
3204
3212
  elif self._match(TokenType.FROM):
3205
3213
  from_ = self._parse_from(skip_from_token=True, consume_pipe=True)
3206
3214
  # Support parentheses for duckdb FROM-first syntax
3207
- select = self._parse_select()
3215
+ select = self._parse_select(from_=from_)
3208
3216
  if select:
3209
- select.set("from", from_)
3217
+ if not select.args.get("from"):
3218
+ select.set("from", from_)
3210
3219
  this = select
3211
3220
  else:
3212
3221
  this = exp.select("*").from_(t.cast(exp.From, from_))
@@ -3234,6 +3243,7 @@ class Parser(metaclass=_Parser):
3234
3243
  parse_subquery_alias: bool = True,
3235
3244
  parse_set_operation: bool = True,
3236
3245
  consume_pipe: bool = True,
3246
+ from_: t.Optional[exp.From] = None,
3237
3247
  ) -> t.Optional[exp.Expression]:
3238
3248
  query = self._parse_select_query(
3239
3249
  nested=nested,
@@ -3242,13 +3252,12 @@ class Parser(metaclass=_Parser):
3242
3252
  parse_set_operation=parse_set_operation,
3243
3253
  )
3244
3254
 
3245
- if (
3246
- consume_pipe
3247
- and self._match(TokenType.PIPE_GT, advance=False)
3248
- and isinstance(query, exp.Query)
3249
- ):
3250
- query = self._parse_pipe_syntax_query(query)
3251
- query = query.subquery(copy=False) if query and table else query
3255
+ if consume_pipe and self._match(TokenType.PIPE_GT, advance=False):
3256
+ if not query and from_:
3257
+ query = exp.select("*").from_(from_)
3258
+ if isinstance(query, exp.Query):
3259
+ query = self._parse_pipe_syntax_query(query)
3260
+ query = query.subquery(copy=False) if query and table else query
3252
3261
 
3253
3262
  return query
3254
3263
 
@@ -3875,6 +3884,16 @@ class Parser(metaclass=_Parser):
3875
3884
 
3876
3885
  comments = [c for token in (method, side, kind) if token for c in token.comments]
3877
3886
  comments = (join_comments or []) + comments
3887
+
3888
+ if (
3889
+ self.ADD_JOIN_ON_TRUE
3890
+ and not kwargs.get("on")
3891
+ and not kwargs.get("using")
3892
+ and not kwargs.get("method")
3893
+ and kwargs.get("kind") in (None, "INNER", "OUTER")
3894
+ ):
3895
+ kwargs["on"] = exp.true()
3896
+
3878
3897
  return self.expression(exp.Join, comments=comments, **kwargs)
3879
3898
 
3880
3899
  def _parse_opclass(self) -> t.Optional[exp.Expression]:
@@ -4406,6 +4425,8 @@ class Parser(metaclass=_Parser):
4406
4425
  def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]:
4407
4426
  func = self._parse_function()
4408
4427
  if not func:
4428
+ if self._prev and self._prev.token_type == TokenType.COMMA:
4429
+ return None
4409
4430
  self.raise_error("Expecting an aggregation function in PIVOT")
4410
4431
 
4411
4432
  return self._parse_alias(func)
@@ -5395,7 +5416,7 @@ class Parser(metaclass=_Parser):
5395
5416
 
5396
5417
  # https://docs.snowflake.com/en/sql-reference/data-types-vector
5397
5418
  if type_token == TokenType.VECTOR and len(expressions) == 2:
5398
- expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect)
5419
+ expressions = self._parse_vector_expressions(expressions)
5399
5420
 
5400
5421
  if not self._match(TokenType.R_PAREN):
5401
5422
  self._retreat(index)
@@ -5531,6 +5552,11 @@ class Parser(metaclass=_Parser):
5531
5552
 
5532
5553
  return this
5533
5554
 
5555
+ def _parse_vector_expressions(
5556
+ self, expressions: t.List[exp.Expression]
5557
+ ) -> t.List[exp.Expression]:
5558
+ return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]]
5559
+
5534
5560
  def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]:
5535
5561
  index = self._index
5536
5562
 
@@ -5804,6 +5830,9 @@ class Parser(metaclass=_Parser):
5804
5830
 
5805
5831
  return func
5806
5832
 
5833
+ def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]:
5834
+ return self._parse_csv(lambda: self._parse_lambda(alias=alias))
5835
+
5807
5836
  def _parse_function_call(
5808
5837
  self,
5809
5838
  functions: t.Optional[t.Dict[str, t.Callable]] = None,
@@ -5868,7 +5897,7 @@ class Parser(metaclass=_Parser):
5868
5897
  known_function = function and not anonymous
5869
5898
 
5870
5899
  alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS
5871
- args = self._parse_csv(lambda: self._parse_lambda(alias=alias))
5900
+ args = self._parse_function_args(alias)
5872
5901
 
5873
5902
  post_func_comments = self._curr and self._curr.comments
5874
5903
  if known_function and post_func_comments:
@@ -8369,6 +8398,13 @@ class Parser(metaclass=_Parser):
8369
8398
  kind = self._match(TokenType.FROM) or not self._match_text_seq("TO")
8370
8399
 
8371
8400
  files = self._parse_csv(self._parse_file_location)
8401
+ if self._match(TokenType.EQ, advance=False):
8402
+ # Backtrack one token since we've consumed the lhs of a parameter assignment here.
8403
+ # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter
8404
+ # list via `_parse_wrapped(..)` below.
8405
+ self._advance(-1)
8406
+ files = []
8407
+
8372
8408
  credentials = self._parse_credentials()
8373
8409
 
8374
8410
  self._match_text_seq("WITH")
sqlglot/serde.py CHANGED
@@ -4,65 +4,115 @@ import typing as t
4
4
 
5
5
  from sqlglot import expressions as exp
6
6
 
7
- if t.TYPE_CHECKING:
8
- JSON = t.Union[dict, list, str, float, int, bool, None]
9
- Node = t.Union[t.List["Node"], exp.DataType.Type, exp.Expression, JSON]
10
7
 
11
-
12
- def dump(node: Node) -> JSON:
8
+ def dump(expr: exp.Expression) -> t.Dict[str, t.Any]:
13
9
  """
14
- Recursively dump an AST into a JSON-serializable dict.
10
+ Dump an Expression into a JSON serializable dict.
15
11
  """
16
- if isinstance(node, list):
17
- return [dump(i) for i in node]
18
- if isinstance(node, exp.DataType.Type):
19
- return {
20
- "class": "DataType.Type",
21
- "value": node.value,
22
- }
23
- if isinstance(node, exp.Expression):
12
+
13
+ root: t.Dict[str, t.Any] = {}
14
+ stack = [(expr, root)]
15
+
16
+ while stack:
17
+ node, payload = stack.pop()
18
+
24
19
  klass = node.__class__.__qualname__
20
+
25
21
  if node.__class__.__module__ != exp.__name__:
26
22
  klass = f"{node.__module__}.{klass}"
27
- obj: t.Dict = {
28
- "class": klass,
29
- "args": {k: dump(v) for k, v in node.args.items() if v is not None and v != []},
30
- }
23
+
24
+ payload["class"] = klass
25
+
31
26
  if node.type:
32
- obj["type"] = dump(node.type)
27
+ payload["type"] = dump(node.type)
33
28
  if node.comments:
34
- obj["comments"] = node.comments
29
+ payload["comments"] = node.comments
35
30
  if node._meta is not None:
36
- obj["meta"] = node._meta
31
+ payload["meta"] = node._meta
32
+ if node.args:
33
+ args: t.Dict[str, t.Any] = {}
34
+ payload["args"] = args
35
+
36
+ for k, vs in node.args.items():
37
+ if vs is None or vs == []:
38
+ continue
39
+ if hasattr(vs, "parent"):
40
+ args[k] = {}
41
+ stack.append((vs, args[k]))
42
+ elif type(vs) is list:
43
+ expressions: t.List[t.Any] = []
44
+ args[k] = expressions
37
45
 
38
- return obj
39
- return node
46
+ for v in vs:
47
+ if hasattr(v, "parent"):
48
+ expressions.append({})
49
+ stack.append((v, expressions[-1]))
50
+ else:
51
+ expressions.append(v)
52
+ elif isinstance(vs, exp.DataType.Type):
53
+ args[k] = {"class": "DataType.Type", "value": vs.value}
54
+ else:
55
+ args[k] = vs
40
56
 
57
+ return root
41
58
 
42
- def load(obj: JSON) -> Node:
59
+
60
+ @t.overload
61
+ def load(expression: None) -> None: ...
62
+
63
+
64
+ @t.overload
65
+ def load(expression: t.Dict[str, t.Any]) -> exp.Expression: ...
66
+
67
+
68
+ def load(expression):
43
69
  """
44
- Recursively load a dict (as returned by `dump`) into an AST.
70
+ Load a dict generated by dump into an Expression.
45
71
  """
46
- if isinstance(obj, list):
47
- return [load(i) for i in obj]
48
- if isinstance(obj, dict):
49
- class_name = obj["class"]
72
+ if expression is None:
73
+ return None
74
+
75
+ root = _load(expression)
76
+ stack = [(root, expression.get("args"))]
77
+
78
+ while stack:
79
+ obj, args = stack.pop()
80
+
81
+ if args:
82
+ for k, vs in args.items():
83
+ if isinstance(vs, list):
84
+ array = []
85
+ for v in vs:
86
+ if isinstance(v, dict):
87
+ child = _load(v)
88
+ array.append(child)
89
+ stack.append((child, v.get("args")))
90
+ else:
91
+ array.append(v)
92
+ obj.set(k, array)
93
+ elif isinstance(vs, dict):
94
+ child = _load(vs)
95
+ stack.append((child, vs.get("args")))
96
+ obj.set(k, child)
97
+ else:
98
+ obj.set(k, vs)
99
+ return root
50
100
 
51
- if class_name == "DataType.Type":
52
- return exp.DataType.Type(obj["value"])
53
101
 
54
- if "." in class_name:
55
- module_path, class_name = class_name.rsplit(".", maxsplit=1)
56
- module = __import__(module_path, fromlist=[class_name])
57
- else:
58
- module = exp
102
+ def _load(payload: t.Dict[str, t.Any]) -> exp.Expression | exp.DataType.Type:
103
+ class_name = payload["class"]
59
104
 
60
- klass = getattr(module, class_name)
105
+ if class_name == "DataType.Type":
106
+ return exp.DataType.Type(payload["value"])
61
107
 
62
- expression = klass(**{k: load(v) for k, v in obj["args"].items()})
63
- expression.type = t.cast(exp.DataType, load(obj.get("type")))
64
- expression.comments = obj.get("comments")
65
- expression._meta = obj.get("meta")
108
+ if "." in class_name:
109
+ module_path, class_name = class_name.rsplit(".", maxsplit=1)
110
+ module = __import__(module_path, fromlist=[class_name])
111
+ else:
112
+ module = exp
66
113
 
67
- return expression
68
- return obj
114
+ expression = getattr(module, class_name)()
115
+ expression.type = load(payload.get("type"))
116
+ expression.comments = payload.get("comments")
117
+ expression._meta = payload.get("meta")
118
+ return expression
sqlglot/tokens.py CHANGED
@@ -85,6 +85,9 @@ class TokenType(AutoName):
85
85
  DAMP = auto()
86
86
  XOR = auto()
87
87
  DSTAR = auto()
88
+ QMARK_AMP = auto()
89
+ QMARK_PIPE = auto()
90
+ HASH_DASH = auto()
88
91
 
89
92
  URI_START = auto()
90
93
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlglot
3
- Version: 27.13.1
3
+ Version: 27.14.0
4
4
  Summary: An easily customizable SQL parser and transpiler
5
5
  Author-email: Toby Mao <toby.mao@gmail.com>
6
6
  License-Expression: MIT
@@ -1,51 +1,51 @@
1
1
  sqlglot/__init__.py,sha256=za08rtdPh2v7dOpGdNomttlIVGgTrKja7rPd6sQwaTg,5391
2
2
  sqlglot/__main__.py,sha256=022c173KqxsiABWTEpUIq_tJUxuNiW7a7ABsxBXqvu8,2069
3
3
  sqlglot/_typing.py,sha256=-1HPyr3w5COlSJWqlgt8jhFk2dyMvBuvVBqIX1wyVCM,642
4
- sqlglot/_version.py,sha256=ou5bykwli5rw5_qblbMze4DFs75loRI8NxexhmePC64,708
4
+ sqlglot/_version.py,sha256=dO3zbTXGDFSvRTydZPKWrc0n8_OL2jG2s0_3yJeMyuY,708
5
5
  sqlglot/diff.py,sha256=PtOllQMQa1Sw1-V2Y8eypmDqGujXYPaTOp_WLsWkAWk,17314
6
6
  sqlglot/errors.py,sha256=QNKMr-pzLUDR-tuMmn_GK6iMHUIVdb_YSJ_BhGEvuso,2126
7
- sqlglot/expressions.py,sha256=oestq-dQt-OC2XLgG0B-I3cDgg7DptuXmcUW7_uP6aQ,253755
8
- sqlglot/generator.py,sha256=cO2HdHWKlL8Y7zs81hSsR-mFcy7U5x0Wd3A27jkn3lk,224692
7
+ sqlglot/expressions.py,sha256=LteUkI-eNj_xG4AvHTGuzqcZb2elWXygT5dR1FcxudI,254734
8
+ sqlglot/generator.py,sha256=5A8fDNIE6u_SuTiSLR4tLDfmoA5LRG9nHIEMqVutaMg,225102
9
9
  sqlglot/helper.py,sha256=9nZjFVRBtMKFC3EdzpDQ6jkazFO19po6BF8xHiNGZIo,15111
10
10
  sqlglot/jsonpath.py,sha256=SQgaxzaEYBN7At9dkTK4N1Spk6xHxvHL6QtCIP6iM30,7905
11
11
  sqlglot/lineage.py,sha256=Qj5ykuDNcATppb9vOjoIKBqRVLbu3OMPiZk9f3iyv40,15312
12
- sqlglot/parser.py,sha256=6QMiFaVbl7LJX-YMaZvJT_g28de2k3G5V-uDAx3zWTg,333268
12
+ sqlglot/parser.py,sha256=YECjuYjATlMIqgHyEyRDjG7Xm0Of2fHfsm2UgLCYNP0,334925
13
13
  sqlglot/planner.py,sha256=ql7Li-bWJRcyXzNaZy_n6bQ6B2ZfunEIB8Ztv2xaxq4,14634
14
14
  sqlglot/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  sqlglot/schema.py,sha256=13H2qKQs27EKdTpDLOvcNnSTDAUbYNKjWtJs4aQCSOA,20509
16
- sqlglot/serde.py,sha256=DQVJ95WrIvhYfe02Ytb4NQug2aMwDCEwpMBW1LKDqzE,2031
16
+ sqlglot/serde.py,sha256=MFPE_7FgJx_kAZZFoaobj38jCjT6jfGghJUjzZBEUMs,3426
17
17
  sqlglot/time.py,sha256=Q62gv6kL40OiRBF6BMESxKJcMVn7ZLNw7sv8H34z5FI,18400
18
- sqlglot/tokens.py,sha256=GuRPOipxixWdEJDxIat9ZLPAn3JhQ2cP94qvOelXogU,49076
18
+ sqlglot/tokens.py,sha256=_Esqa2AB1PI7o7EUkupGUQYCvriE71O8J7BQiYySHC0,49146
19
19
  sqlglot/transforms.py,sha256=utNDsCBsA7hPUK3-aby3DDgiY_XVMAKQqeoLm1EyihI,41218
20
20
  sqlglot/trie.py,sha256=v27uXMrHfqrXlJ6GmeTSMovsB_3o0ctnlKhdNt7W6fI,2245
21
21
  sqlglot/dialects/__init__.py,sha256=e3K2NHrZO7oXfBzEpRsvgWAgJ_UCEyg7SlUCRqvnPj4,3799
22
22
  sqlglot/dialects/athena.py,sha256=ofArmayYLev4qZQ15GM8mevG04qqR5WGFb2ZcuYm6x4,10966
23
- sqlglot/dialects/bigquery.py,sha256=IO5u1sdT0wzl76_z3FVm8FgtVnnMBrFPab-HhxuxROs,72078
24
- sqlglot/dialects/clickhouse.py,sha256=YLY8s7oCfdCoD0X2iOIltivXXjtT_nJdb3931P0TDeU,58553
23
+ sqlglot/dialects/bigquery.py,sha256=Oz9_pYUhVHBjHschYvar4V9hgz8KywokEV0y4hNXi7w,72371
24
+ sqlglot/dialects/clickhouse.py,sha256=OzcDAS7pWs889gu0aLYn_HIjTLiPRjGFG61p6-84stE,58574
25
25
  sqlglot/dialects/databricks.py,sha256=H4QTq7gg6tJylKc_YWsGp6049KydoI_wlQUHM7iCJtI,4753
26
- sqlglot/dialects/dialect.py,sha256=MFP2WHesP-wf2HXub8s0_XyeUnjXCk8QNcqn5AIhARw,73361
26
+ sqlglot/dialects/dialect.py,sha256=BU4-x1d8tRCSAfdpeP5b86me0wXfgpcEPdyk27wLgg4,73293
27
27
  sqlglot/dialects/doris.py,sha256=CFnF955Oav3IjZWA80ickOI8tPpCjxk7BN5R4Z6pA1U,25263
28
28
  sqlglot/dialects/dremio.py,sha256=nOMxu_4xVKSOmMGNSwdxXSPc243cNbbpb-xXzYdgdeg,8460
29
29
  sqlglot/dialects/drill.py,sha256=FOh7_KjPx_77pv0DiHKZog0CcmzqeF9_PEmGnJ1ESSM,5825
30
30
  sqlglot/dialects/druid.py,sha256=kh3snZtneehNOWqs3XcPjsrhNaRbkCQ8E4hHbWJ1fHM,690
31
- sqlglot/dialects/duckdb.py,sha256=ANzn5L5KS5pe9hn81Ta_4h5ngdPxMS1k_LdMuYGiff8,52470
31
+ sqlglot/dialects/duckdb.py,sha256=ZVXloJqjz_LiMFJ80v14P01hAwerje5NT2FeQdzAaqc,53023
32
32
  sqlglot/dialects/dune.py,sha256=gALut-fFfN2qMsr8LvZ1NQK3F3W9z2f4PwMvTMXVVVg,375
33
33
  sqlglot/dialects/exasol.py,sha256=ay3g_VyT5WvHTgNyJuCQu0nBt4bpllLZ9IdMBizEgYM,15761
34
- sqlglot/dialects/fabric.py,sha256=4Sng2ZhQSaf6eK3ituR9DqDZERaVwYS_UfdpusjsISg,10220
35
- sqlglot/dialects/hive.py,sha256=zFr6WmNZXw5C0m3nFc9ynrbzk8Wtb5gyzmJ0tPlMge8,32014
34
+ sqlglot/dialects/fabric.py,sha256=BdkvzM8s-m5DIdBwdjEYskp32ub7aHCAex_xlhQn92I,10222
35
+ sqlglot/dialects/hive.py,sha256=UGIkXjMCk5a9ndUXQtvfG560oi3emdpqOYLQCmGabBk,32046
36
36
  sqlglot/dialects/materialize.py,sha256=LD2q1kTRrCwkIu1BfoBvnjTGbupDtoQ8JQMDCIYAXHg,3533
37
- sqlglot/dialects/mysql.py,sha256=pAIWuwvirUrFbJE06UiST9iyiA4-IkA1sGiMgkeo1DY,47884
37
+ sqlglot/dialects/mysql.py,sha256=YuLyZBrEw4qSacd_1LGLJX4n-P99alnhQmANBlSFT4o,49408
38
38
  sqlglot/dialects/oracle.py,sha256=zWPCpzGiTlgCJ5E6FjfX3Rszjcw4SnHg6xeVboMYIyo,15972
39
- sqlglot/dialects/postgres.py,sha256=NsaNBKUrqzb3bjVi16IBlYi8FPJnHRTl46lQX1GDNzw,33849
39
+ sqlglot/dialects/postgres.py,sha256=Zr5b0Yl5yXDKANcvUtnRsWUXLkbeEBZjwzFDAGtuub0,34296
40
40
  sqlglot/dialects/presto.py,sha256=XVeYr2NP86x5enlRqI7MYR6le85_ucYg_BBRocGN3jM,33413
41
41
  sqlglot/dialects/prql.py,sha256=fwN-SPEGx-drwf1K0U2MByN-PkW3C_rOgQ3xeJeychg,7908
42
- sqlglot/dialects/redshift.py,sha256=_sQTom4CGozFDZXW9y6bHQcZ-KiQ7QJjjQqM5rVagSc,15889
42
+ sqlglot/dialects/redshift.py,sha256=FIwtP3yEg-way9pa32kxCJc6IaFkHVIvgYKZA-Ilmi0,15919
43
43
  sqlglot/dialects/risingwave.py,sha256=BqWwW1iT_OIVMwfRamaww79snnBwIgCfr22Go-ggO68,3289
44
- sqlglot/dialects/singlestore.py,sha256=_42yJP-YYbHswIBCKO5qxP5Z_AVksLh3bMX394iivwQ,55764
45
- sqlglot/dialects/snowflake.py,sha256=xIftl1twnug9C-MW-jUncKskKVOwds3jpBvCDFmp5kM,72084
44
+ sqlglot/dialects/singlestore.py,sha256=GJrMoUK366JsQZyS_L8oGVkMkiVty1tijy7zqWAJ6Ck,61290
45
+ sqlglot/dialects/snowflake.py,sha256=mmBa0iHgBDSo9M508cdluWzEjvMbTotLRERS3-ybsVI,73091
46
46
  sqlglot/dialects/spark.py,sha256=PzyhkelDzbCMgJ3RVHD6yyzLIFp9NdZfwVas5IymowM,10147
47
47
  sqlglot/dialects/spark2.py,sha256=qz36FT9k4iuiqboRpyG4VpKGkPR0P2fifmqgZ9gNUEU,14851
48
- sqlglot/dialects/sqlite.py,sha256=UIQ66shIt2bQoLd7tYG4NVzh4HwCfERgAaLyukz8HjE,13231
48
+ sqlglot/dialects/sqlite.py,sha256=zzXEbnaLjJeg6hPLHricjpfSkuf8tpXECnjcHtoqIbw,13263
49
49
  sqlglot/dialects/starrocks.py,sha256=2gav0PSNgRdAGXzawdznZliBpglJoQ0wBxPI7ZIMsRw,11314
50
50
  sqlglot/dialects/tableau.py,sha256=oIawDzUITxGCWaEMB8OaNMPWhbC3U-2y09pYPm4eazc,2190
51
51
  sqlglot/dialects/teradata.py,sha256=7LxCcRwP0Idd_OnCzA57NCdheVjHcKC2aFAKG5N49IU,18202
@@ -57,7 +57,7 @@ sqlglot/executor/env.py,sha256=tQhU5PpTBMcxgZIFddFqxWMNPtHN0vOOz72voncY3KY,8276
57
57
  sqlglot/executor/python.py,sha256=09GYRzrPn3lZGfDJY9pbONOvmYxsRyeSWjUiqkSRHGo,16661
58
58
  sqlglot/executor/table.py,sha256=xkuJlgLVNYUXsSUaX0zTcnFekldXLLU8LqDyjR5K9wY,4419
59
59
  sqlglot/optimizer/__init__.py,sha256=FdAvVz6rQLLkiiH21-SD4RxB5zS3WDeU-s03PZkJ-F4,343
60
- sqlglot/optimizer/annotate_types.py,sha256=MfxXNzYgxaqhv1yBmzuigWjc1oIw1ikZ_lXjcdT3RDc,25128
60
+ sqlglot/optimizer/annotate_types.py,sha256=RuBjs-mnWjH-wa02UKPMPq-3ymjQ_X7_t7Vz9xNhNOA,26202
61
61
  sqlglot/optimizer/canonicalize.py,sha256=RJpUbWDudjknRMtO_Kf8MGZ5Hv1twpPWac2u5kpV4Vw,7719
62
62
  sqlglot/optimizer/eliminate_ctes.py,sha256=fUBM0RUnPrm2sYptEWBux98B7fcx7W-BM1zVqfgDz9c,1448
63
63
  sqlglot/optimizer/eliminate_joins.py,sha256=2iYtG93aJGxvURqm1BVPosrnnnQ_IXI14RcD4pM8eHc,5942
@@ -76,8 +76,8 @@ sqlglot/optimizer/qualify_tables.py,sha256=dA4ZazL7ShQh2JgBwpHuG-4c5lBw1TNzCnuN7
76
76
  sqlglot/optimizer/scope.py,sha256=UOTrbwqcTc5iRQf0WStgYWXpE24w6riZy-tJYA18yTw,31229
77
77
  sqlglot/optimizer/simplify.py,sha256=-_yus42OYwqjQ9a2TSGhtG2G0pSkInUry1z7hEMz2pY,51062
78
78
  sqlglot/optimizer/unnest_subqueries.py,sha256=kzWUVDlxs8z9nmRx-8U-pHXPtVZhEIwkKqmKhr2QLvc,10908
79
- sqlglot-27.13.1.dist-info/licenses/LICENSE,sha256=p1Yk0B4oa0l8Rh-_dYyy75d8spjPd_vTloXfz4FWxys,1065
80
- sqlglot-27.13.1.dist-info/METADATA,sha256=rnD1jn-fbRMKBZZGZWvzMgUuE3KzqbHgqXMzDSnwIcw,20682
81
- sqlglot-27.13.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
- sqlglot-27.13.1.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
83
- sqlglot-27.13.1.dist-info/RECORD,,
79
+ sqlglot-27.14.0.dist-info/licenses/LICENSE,sha256=p1Yk0B4oa0l8Rh-_dYyy75d8spjPd_vTloXfz4FWxys,1065
80
+ sqlglot-27.14.0.dist-info/METADATA,sha256=BYDS9AxQRhQ_DsItPkMNCkZR3NfunsRSWM7kkNoPCDY,20682
81
+ sqlglot-27.14.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
+ sqlglot-27.14.0.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
83
+ sqlglot-27.14.0.dist-info/RECORD,,