sqlglot 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. sqlglot/__main__.py +6 -4
  2. sqlglot/_version.py +2 -2
  3. sqlglot/dialects/bigquery.py +116 -295
  4. sqlglot/dialects/clickhouse.py +67 -2
  5. sqlglot/dialects/databricks.py +38 -1
  6. sqlglot/dialects/dialect.py +327 -286
  7. sqlglot/dialects/dremio.py +4 -1
  8. sqlglot/dialects/duckdb.py +718 -22
  9. sqlglot/dialects/exasol.py +243 -10
  10. sqlglot/dialects/hive.py +8 -8
  11. sqlglot/dialects/mysql.py +11 -2
  12. sqlglot/dialects/oracle.py +29 -0
  13. sqlglot/dialects/postgres.py +46 -24
  14. sqlglot/dialects/presto.py +47 -16
  15. sqlglot/dialects/redshift.py +16 -0
  16. sqlglot/dialects/risingwave.py +3 -0
  17. sqlglot/dialects/singlestore.py +12 -3
  18. sqlglot/dialects/snowflake.py +199 -271
  19. sqlglot/dialects/spark.py +2 -2
  20. sqlglot/dialects/spark2.py +11 -48
  21. sqlglot/dialects/sqlite.py +9 -0
  22. sqlglot/dialects/teradata.py +5 -8
  23. sqlglot/dialects/trino.py +6 -0
  24. sqlglot/dialects/tsql.py +61 -25
  25. sqlglot/diff.py +4 -2
  26. sqlglot/errors.py +69 -0
  27. sqlglot/expressions.py +484 -84
  28. sqlglot/generator.py +143 -41
  29. sqlglot/helper.py +2 -2
  30. sqlglot/optimizer/annotate_types.py +247 -140
  31. sqlglot/optimizer/canonicalize.py +6 -1
  32. sqlglot/optimizer/eliminate_joins.py +1 -1
  33. sqlglot/optimizer/eliminate_subqueries.py +2 -2
  34. sqlglot/optimizer/merge_subqueries.py +5 -5
  35. sqlglot/optimizer/normalize.py +20 -13
  36. sqlglot/optimizer/normalize_identifiers.py +17 -3
  37. sqlglot/optimizer/optimizer.py +4 -0
  38. sqlglot/optimizer/pushdown_predicates.py +1 -1
  39. sqlglot/optimizer/qualify.py +14 -6
  40. sqlglot/optimizer/qualify_columns.py +113 -352
  41. sqlglot/optimizer/qualify_tables.py +112 -70
  42. sqlglot/optimizer/resolver.py +374 -0
  43. sqlglot/optimizer/scope.py +27 -16
  44. sqlglot/optimizer/simplify.py +1074 -964
  45. sqlglot/optimizer/unnest_subqueries.py +12 -2
  46. sqlglot/parser.py +276 -160
  47. sqlglot/planner.py +2 -2
  48. sqlglot/schema.py +15 -4
  49. sqlglot/tokens.py +42 -7
  50. sqlglot/transforms.py +77 -22
  51. sqlglot/typing/__init__.py +316 -0
  52. sqlglot/typing/bigquery.py +376 -0
  53. sqlglot/typing/hive.py +12 -0
  54. sqlglot/typing/presto.py +24 -0
  55. sqlglot/typing/snowflake.py +505 -0
  56. sqlglot/typing/spark2.py +58 -0
  57. sqlglot/typing/tsql.py +9 -0
  58. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
  59. sqlglot-28.4.0.dist-info/RECORD +92 -0
  60. sqlglot-27.29.0.dist-info/RECORD +0 -84
  61. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
  62. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
  63. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0
@@ -31,6 +31,7 @@ from sqlglot.dialects.dialect import (
31
31
  sequence_sql,
32
32
  build_regexp_extract,
33
33
  explode_to_unnest_sql,
34
+ sha2_digest_sql,
34
35
  )
35
36
  from sqlglot.dialects.hive import Hive
36
37
  from sqlglot.dialects.mysql import MySQL
@@ -39,11 +40,18 @@ from sqlglot.optimizer.scope import find_all_in_scope
39
40
  from sqlglot.tokens import TokenType
40
41
  from sqlglot.transforms import unqualify_columns
41
42
  from sqlglot.generator import unsupported_args
43
+ from sqlglot.typing.presto import EXPRESSION_METADATA
42
44
 
43
45
  DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub]
44
46
 
45
47
 
46
48
  def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str:
49
+ delimiters = expression.expression
50
+ if delimiters and not (
51
+ delimiters.is_string and delimiters.this == self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS
52
+ ):
53
+ self.unsupported("INITCAP does not support custom delimiters")
54
+
47
55
  regex = r"(\w)(\w*)"
48
56
  return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))"
49
57
 
@@ -267,20 +275,7 @@ class Presto(Dialect):
267
275
  # https://github.com/prestodb/presto/issues/2863
268
276
  NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
269
277
 
270
- # The result of certain math functions in Presto/Trino is of type
271
- # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT
272
- ANNOTATORS = {
273
- **Dialect.ANNOTATORS,
274
- exp.Floor: lambda self, e: self._annotate_by_args(e, "this"),
275
- exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"),
276
- exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"),
277
- exp.Round: lambda self, e: self._annotate_by_args(e, "this"),
278
- exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
279
- exp.Abs: lambda self, e: self._annotate_by_args(e, "this"),
280
- exp.Rand: lambda self, e: self._annotate_by_args(e, "this")
281
- if e.this
282
- else self._set_type(e, exp.DataType.Type.DOUBLE),
283
- }
278
+ EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
284
279
 
285
280
  SUPPORTED_SETTINGS = {
286
281
  *Dialect.SUPPORTED_SETTINGS,
@@ -429,10 +424,19 @@ class Presto(Dialect):
429
424
  TRANSFORMS = {
430
425
  **generator.Generator.TRANSFORMS,
431
426
  exp.AnyValue: rename_func("ARBITRARY"),
432
- exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"),
427
+ exp.ApproxQuantile: lambda self, e: self.func(
428
+ "APPROX_PERCENTILE",
429
+ e.this,
430
+ e.args.get("weight"),
431
+ e.args.get("quantile"),
432
+ e.args.get("accuracy"),
433
+ ),
433
434
  exp.ArgMax: rename_func("MAX_BY"),
434
435
  exp.ArgMin: rename_func("MIN_BY"),
435
- exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
436
+ exp.Array: transforms.preprocess(
437
+ [transforms.inherit_struct_field_names],
438
+ generator=lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
439
+ ),
436
440
  exp.ArrayAny: rename_func("ANY_MATCH"),
437
441
  exp.ArrayConcat: rename_func("CONCAT"),
438
442
  exp.ArrayContains: rename_func("CONTAINS"),
@@ -542,7 +546,9 @@ class Presto(Dialect):
542
546
  exp.Xor: bool_xor_sql,
543
547
  exp.MD5Digest: rename_func("MD5"),
544
548
  exp.SHA: rename_func("SHA1"),
549
+ exp.SHA1Digest: rename_func("SHA1"),
545
550
  exp.SHA2: sha256_sql,
551
+ exp.SHA2Digest: sha2_digest_sql,
546
552
  }
547
553
 
548
554
  RESERVED_KEYWORDS = {
@@ -606,6 +612,31 @@ class Presto(Dialect):
606
612
  "with",
607
613
  }
608
614
 
615
+ def extract_sql(self, expression: exp.Extract) -> str:
616
+ date_part = expression.name
617
+
618
+ if not date_part.startswith("EPOCH"):
619
+ return super().extract_sql(expression)
620
+
621
+ if date_part == "EPOCH_MILLISECOND":
622
+ scale = 10**3
623
+ elif date_part == "EPOCH_MICROSECOND":
624
+ scale = 10**6
625
+ elif date_part == "EPOCH_NANOSECOND":
626
+ scale = 10**9
627
+ else:
628
+ scale = None
629
+
630
+ value = expression.expression
631
+
632
+ ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP"))
633
+ to_unix: exp.Expression = exp.TimeToUnix(this=ts)
634
+
635
+ if scale:
636
+ to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
637
+
638
+ return self.sql(to_unix)
639
+
609
640
  def jsonformat_sql(self, expression: exp.JSONFormat) -> str:
610
641
  this = expression.this
611
642
  is_json = expression.args.get("is_json")
@@ -47,6 +47,8 @@ class Redshift(Postgres):
47
47
  COPY_PARAMS_ARE_CSV = False
48
48
  HEX_LOWERCASE = True
49
49
  HAS_DISTINCT_ARRAY_CONSTRUCTORS = True
50
+ COALESCE_COMPARISON_NON_STANDARD = True
51
+ REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL = False
50
52
 
51
53
  # ref: https://docs.aws.amazon.com/redshift/latest/dg/r_FORMAT_strings.html
52
54
  TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'"
@@ -68,6 +70,13 @@ class Redshift(Postgres):
68
70
  "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff),
69
71
  "GETDATE": exp.CurrentTimestamp.from_arg_list,
70
72
  "LISTAGG": exp.GroupConcat.from_arg_list,
73
+ "REGEXP_SUBSTR": lambda args: exp.RegexpExtract(
74
+ this=seq_get(args, 0),
75
+ expression=seq_get(args, 1),
76
+ position=seq_get(args, 2),
77
+ occurrence=seq_get(args, 3),
78
+ parameters=seq_get(args, 4),
79
+ ),
71
80
  "SPLIT_TO_ARRAY": lambda args: exp.StringToArray(
72
81
  this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(",")
73
82
  ),
@@ -200,6 +209,7 @@ class Redshift(Postgres):
200
209
  exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"),
201
210
  exp.GroupConcat: rename_func("LISTAGG"),
202
211
  exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))),
212
+ exp.RegexpExtract: rename_func("REGEXP_SUBSTR"),
203
213
  exp.Select: transforms.preprocess(
204
214
  [
205
215
  transforms.eliminate_window_clause,
@@ -218,6 +228,9 @@ class Redshift(Postgres):
218
228
  exp.TsOrDsAdd: date_delta_sql("DATEADD"),
219
229
  exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
220
230
  exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e),
231
+ exp.SHA2Digest: lambda self, e: self.func(
232
+ "SHA2", e.this, e.args.get("length") or exp.Literal.number(256)
233
+ ),
221
234
  }
222
235
 
223
236
  # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots
@@ -231,6 +244,9 @@ class Redshift(Postgres):
231
244
  TRANSFORMS.pop(exp.LastDay)
232
245
  TRANSFORMS.pop(exp.SHA2)
233
246
 
247
+ # Postgres does not permit a double precision argument in ROUND; Redshift does
248
+ TRANSFORMS.pop(exp.Round)
249
+
234
250
  RESERVED_KEYWORDS = {
235
251
  "aes128",
236
252
  "aes256",
@@ -8,6 +8,9 @@ from sqlglot import exp
8
8
 
9
9
 
10
10
  class RisingWave(Postgres):
11
+ REQUIRES_PARENTHESIZED_STRUCT_ACCESS = True
12
+ SUPPORTS_STRUCT_STAR_EXPANSION = True
13
+
11
14
  class Tokenizer(Postgres.Tokenizer):
12
15
  KEYWORDS = {
13
16
  **Postgres.Tokenizer.KEYWORDS,
@@ -81,6 +81,7 @@ class SingleStore(MySQL):
81
81
  "!:>": TokenType.NCOLON_GT,
82
82
  "::$": TokenType.DCOLONDOLLAR,
83
83
  "::%": TokenType.DCOLONPERCENT,
84
+ "::?": TokenType.DCOLONQMARK,
84
85
  }
85
86
 
86
87
  class Parser(MySQL.Parser):
@@ -253,6 +254,12 @@ class SingleStore(MySQL):
253
254
  TokenType.DCOLONPERCENT: lambda self, this, path: build_json_extract_path(
254
255
  exp.JSONExtractScalar, json_type="DOUBLE"
255
256
  )([this, exp.Literal.string(path.name)]),
257
+ TokenType.DCOLONQMARK: lambda self, this, path: self.expression(
258
+ exp.JSONExists,
259
+ this=this,
260
+ path=path.name,
261
+ from_dcolonqmark=True,
262
+ ),
256
263
  }
257
264
  COLUMN_OPERATORS.pop(TokenType.ARROW)
258
265
  COLUMN_OPERATORS.pop(TokenType.DARROW)
@@ -452,8 +459,10 @@ class SingleStore(MySQL):
452
459
  exp.JSONBExists: lambda self, e: self.func(
453
460
  "BSON_MATCH_ANY_EXISTS", e.this, e.args.get("path")
454
461
  ),
455
- exp.JSONExists: unsupported_args("passing", "on_condition")(
456
- lambda self, e: self.func("JSON_MATCH_ANY_EXISTS", e.this, e.args.get("path"))
462
+ exp.JSONExists: lambda self, e: (
463
+ f"{self.sql(e.this)}::?{self.sql(e.args.get('path'))}"
464
+ if e.args.get("from_dcolonqmark")
465
+ else self.func("JSON_MATCH_ANY_EXISTS", e.this, e.args.get("path"))
457
466
  ),
458
467
  exp.JSONObject: unsupported_args(
459
468
  "null_handling", "unique_keys", "return_type", "encoding"
@@ -542,7 +551,7 @@ class SingleStore(MySQL):
542
551
  "offset",
543
552
  "starts_with",
544
553
  "limit",
545
- "from",
554
+ "from_",
546
555
  "scope",
547
556
  "scope_kind",
548
557
  "mutex",