sqlglot 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. sqlglot/__main__.py +6 -4
  2. sqlglot/_version.py +2 -2
  3. sqlglot/dialects/bigquery.py +116 -295
  4. sqlglot/dialects/clickhouse.py +67 -2
  5. sqlglot/dialects/databricks.py +38 -1
  6. sqlglot/dialects/dialect.py +327 -286
  7. sqlglot/dialects/dremio.py +4 -1
  8. sqlglot/dialects/duckdb.py +718 -22
  9. sqlglot/dialects/exasol.py +243 -10
  10. sqlglot/dialects/hive.py +8 -8
  11. sqlglot/dialects/mysql.py +11 -2
  12. sqlglot/dialects/oracle.py +29 -0
  13. sqlglot/dialects/postgres.py +46 -24
  14. sqlglot/dialects/presto.py +47 -16
  15. sqlglot/dialects/redshift.py +16 -0
  16. sqlglot/dialects/risingwave.py +3 -0
  17. sqlglot/dialects/singlestore.py +12 -3
  18. sqlglot/dialects/snowflake.py +199 -271
  19. sqlglot/dialects/spark.py +2 -2
  20. sqlglot/dialects/spark2.py +11 -48
  21. sqlglot/dialects/sqlite.py +9 -0
  22. sqlglot/dialects/teradata.py +5 -8
  23. sqlglot/dialects/trino.py +6 -0
  24. sqlglot/dialects/tsql.py +61 -25
  25. sqlglot/diff.py +4 -2
  26. sqlglot/errors.py +69 -0
  27. sqlglot/expressions.py +484 -84
  28. sqlglot/generator.py +143 -41
  29. sqlglot/helper.py +2 -2
  30. sqlglot/optimizer/annotate_types.py +247 -140
  31. sqlglot/optimizer/canonicalize.py +6 -1
  32. sqlglot/optimizer/eliminate_joins.py +1 -1
  33. sqlglot/optimizer/eliminate_subqueries.py +2 -2
  34. sqlglot/optimizer/merge_subqueries.py +5 -5
  35. sqlglot/optimizer/normalize.py +20 -13
  36. sqlglot/optimizer/normalize_identifiers.py +17 -3
  37. sqlglot/optimizer/optimizer.py +4 -0
  38. sqlglot/optimizer/pushdown_predicates.py +1 -1
  39. sqlglot/optimizer/qualify.py +14 -6
  40. sqlglot/optimizer/qualify_columns.py +113 -352
  41. sqlglot/optimizer/qualify_tables.py +112 -70
  42. sqlglot/optimizer/resolver.py +374 -0
  43. sqlglot/optimizer/scope.py +27 -16
  44. sqlglot/optimizer/simplify.py +1074 -964
  45. sqlglot/optimizer/unnest_subqueries.py +12 -2
  46. sqlglot/parser.py +276 -160
  47. sqlglot/planner.py +2 -2
  48. sqlglot/schema.py +15 -4
  49. sqlglot/tokens.py +42 -7
  50. sqlglot/transforms.py +77 -22
  51. sqlglot/typing/__init__.py +316 -0
  52. sqlglot/typing/bigquery.py +376 -0
  53. sqlglot/typing/hive.py +12 -0
  54. sqlglot/typing/presto.py +24 -0
  55. sqlglot/typing/snowflake.py +505 -0
  56. sqlglot/typing/spark2.py +58 -0
  57. sqlglot/typing/tsql.py +9 -0
  58. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
  59. sqlglot-28.4.0.dist-info/RECORD +92 -0
  60. sqlglot-27.29.0.dist-info/RECORD +0 -84
  61. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
  62. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
  63. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
9
9
  arg_max_or_min_no_count,
10
10
  build_date_delta,
11
11
  build_formatted_time,
12
+ build_like,
12
13
  inline_array_sql,
13
14
  json_extract_segments,
14
15
  json_path_key_only_name,
@@ -23,6 +24,7 @@ from sqlglot.dialects.dialect import (
23
24
  timestamptrunc_sql,
24
25
  unit_to_var,
25
26
  trim_sql,
27
+ sha2_digest_sql,
26
28
  )
27
29
  from sqlglot.generator import Generator
28
30
  from sqlglot.helper import is_int, seq_get
@@ -188,6 +190,43 @@ def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> st
188
190
  return f"{{{csv_args}}}"
189
191
 
190
192
 
193
+ def _build_timestamp_trunc(unit: str) -> t.Callable[[t.List], exp.TimestampTrunc]:
194
+ return lambda args: exp.TimestampTrunc(
195
+ this=seq_get(args, 0), unit=exp.var(unit), zone=seq_get(args, 1)
196
+ )
197
+
198
+
199
+ def _build_split_by_char(args: t.List) -> exp.Split | exp.Anonymous:
200
+ sep = seq_get(args, 0)
201
+ if isinstance(sep, exp.Literal):
202
+ sep_value = sep.to_py()
203
+ if isinstance(sep_value, str) and len(sep_value.encode("utf-8")) == 1:
204
+ return _build_split(exp.Split)(args)
205
+
206
+ return exp.Anonymous(this="splitByChar", expressions=args)
207
+
208
+
209
+ def _build_split(exp_class: t.Type[E]) -> t.Callable[[t.List], E]:
210
+ return lambda args: exp_class(
211
+ this=seq_get(args, 1), expression=seq_get(args, 0), limit=seq_get(args, 2)
212
+ )
213
+
214
+
215
+ # Skip the 'week' unit since ClickHouse's toStartOfWeek
216
+ # uses an extra mode argument to specify the first day of the week
217
+ TIMESTAMP_TRUNC_UNITS = {
218
+ "MICROSECOND",
219
+ "MILLISECOND",
220
+ "SECOND",
221
+ "MINUTE",
222
+ "HOUR",
223
+ "DAY",
224
+ "MONTH",
225
+ "QUARTER",
226
+ "YEAR",
227
+ }
228
+
229
+
191
230
  class ClickHouse(Dialect):
192
231
  INDEX_OFFSET = 1
193
232
  NORMALIZE_FUNCTIONS: bool | str = False
@@ -308,10 +347,16 @@ class ClickHouse(Dialect):
308
347
 
309
348
  FUNCTIONS = {
310
349
  **parser.Parser.FUNCTIONS,
350
+ **{
351
+ f"TOSTARTOF{unit}": _build_timestamp_trunc(unit=unit)
352
+ for unit in TIMESTAMP_TRUNC_UNITS
353
+ },
311
354
  "ANY": exp.AnyValue.from_arg_list,
312
355
  "ARRAYSUM": exp.ArraySum.from_arg_list,
313
356
  "ARRAYREVERSE": exp.ArrayReverse.from_arg_list,
314
357
  "ARRAYSLICE": exp.ArraySlice.from_arg_list,
358
+ "CURRENTDATABASE": exp.CurrentDatabase.from_arg_list,
359
+ "CURRENTSCHEMAS": exp.CurrentSchemas.from_arg_list,
315
360
  "COUNTIF": _build_count_if,
316
361
  "COSINEDISTANCE": exp.CosineDistance.from_arg_list,
317
362
  "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None),
@@ -322,13 +367,17 @@ class ClickHouse(Dialect):
322
367
  "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None),
323
368
  "DATESUB": build_date_delta(exp.DateSub, default_unit=None),
324
369
  "FORMATDATETIME": _build_datetime_format(exp.TimeToStr),
370
+ "HAS": exp.ArrayContains.from_arg_list,
371
+ "ILIKE": build_like(exp.ILike),
325
372
  "JSONEXTRACTSTRING": build_json_extract_path(
326
373
  exp.JSONExtractScalar, zero_based_indexing=False
327
374
  ),
328
375
  "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
376
+ "LIKE": build_like(exp.Like),
329
377
  "L2Distance": exp.EuclideanDistance.from_arg_list,
330
378
  "MAP": parser.build_var_map,
331
379
  "MATCH": exp.RegexpLike.from_arg_list,
380
+ "NOTLIKE": build_like(exp.Like, not_like=True),
332
381
  "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime),
333
382
  "RANDCANONICAL": exp.Rand.from_arg_list,
334
383
  "STR_TO_DATE": _build_str_to_date,
@@ -336,11 +385,15 @@ class ClickHouse(Dialect):
336
385
  "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None),
337
386
  "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None),
338
387
  "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None),
388
+ "TOMONDAY": _build_timestamp_trunc("WEEK"),
339
389
  "UNIQ": exp.ApproxDistinct.from_arg_list,
340
390
  "XOR": lambda args: exp.Xor(expressions=args),
341
391
  "MD5": exp.MD5Digest.from_arg_list,
342
392
  "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
343
393
  "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
394
+ "SPLITBYCHAR": _build_split_by_char,
395
+ "SPLITBYREGEXP": _build_split(exp.RegexpSplit),
396
+ "SPLITBYSTRING": _build_split(exp.Split),
344
397
  "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list,
345
398
  "TOTYPENAME": exp.Typeof.from_arg_list,
346
399
  "EDITDISTANCE": exp.Levenshtein.from_arg_list,
@@ -418,6 +471,7 @@ class ClickHouse(Dialect):
418
471
  "quantiles",
419
472
  "quantileExact",
420
473
  "quantilesExact",
474
+ "quantilesExactExclusive",
421
475
  "quantileExactLow",
422
476
  "quantilesExactLow",
423
477
  "quantileExactHigh",
@@ -771,7 +825,7 @@ class ClickHouse(Dialect):
771
825
  if join:
772
826
  method = join.args.get("method")
773
827
  join.set("method", None)
774
- join.set("global", method)
828
+ join.set("global_", method)
775
829
 
776
830
  # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table`
777
831
  # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join
@@ -1087,6 +1141,7 @@ class ClickHouse(Dialect):
1087
1141
  exp.AnyValue: rename_func("any"),
1088
1142
  exp.ApproxDistinct: rename_func("uniq"),
1089
1143
  exp.ArrayConcat: rename_func("arrayConcat"),
1144
+ exp.ArrayContains: rename_func("has"),
1090
1145
  exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this),
1091
1146
  exp.ArrayRemove: remove_from_array_using_filter,
1092
1147
  exp.ArrayReverse: rename_func("arrayReverse"),
@@ -1096,6 +1151,8 @@ class ClickHouse(Dialect):
1096
1151
  exp.ArgMin: arg_max_or_min_no_count("argMin"),
1097
1152
  exp.Array: inline_array_sql,
1098
1153
  exp.CastToStrType: rename_func("CAST"),
1154
+ exp.CurrentDatabase: rename_func("CURRENT_DATABASE"),
1155
+ exp.CurrentSchemas: rename_func("CURRENT_SCHEMAS"),
1099
1156
  exp.CountIf: rename_func("countIf"),
1100
1157
  exp.CosineDistance: rename_func("cosineDistance"),
1101
1158
  exp.CompressColumnConstraint: lambda self,
@@ -1148,9 +1205,17 @@ class ClickHouse(Dialect):
1148
1205
  exp.MD5Digest: rename_func("MD5"),
1149
1206
  exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))),
1150
1207
  exp.SHA: rename_func("SHA1"),
1208
+ exp.SHA1Digest: rename_func("SHA1"),
1151
1209
  exp.SHA2: sha256_sql,
1210
+ exp.SHA2Digest: sha2_digest_sql,
1211
+ exp.Split: lambda self, e: self.func(
1212
+ "splitByString", e.args.get("expression"), e.this, e.args.get("limit")
1213
+ ),
1214
+ exp.RegexpSplit: lambda self, e: self.func(
1215
+ "splitByRegexp", e.args.get("expression"), e.this, e.args.get("limit")
1216
+ ),
1152
1217
  exp.UnixToTime: _unix_to_time_sql,
1153
- exp.TimestampTrunc: timestamptrunc_sql(zone=True),
1218
+ exp.TimestampTrunc: timestamptrunc_sql(func="dateTrunc", zone=True),
1154
1219
  exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"),
1155
1220
  exp.Variance: rename_func("varSamp"),
1156
1221
  exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from copy import deepcopy
4
4
  from collections import defaultdict
5
5
 
6
- from sqlglot import exp, transforms, jsonpath
6
+ from sqlglot import exp, transforms, jsonpath, parser
7
7
  from sqlglot.dialects.dialect import (
8
8
  date_delta_sql,
9
9
  build_date_delta,
@@ -12,6 +12,7 @@ from sqlglot.dialects.dialect import (
12
12
  groupconcat_sql,
13
13
  )
14
14
  from sqlglot.dialects.spark import Spark
15
+ from sqlglot.helper import seq_get
15
16
  from sqlglot.tokens import TokenType
16
17
  from sqlglot.optimizer.annotate_types import TypeAnnotator
17
18
 
@@ -54,11 +55,21 @@ class Databricks(Spark):
54
55
 
55
56
  FUNCTIONS = {
56
57
  **Spark.Parser.FUNCTIONS,
58
+ "GETDATE": exp.CurrentTimestamp.from_arg_list,
57
59
  "DATEADD": build_date_delta(exp.DateAdd),
58
60
  "DATE_ADD": build_date_delta(exp.DateAdd),
59
61
  "DATEDIFF": build_date_delta(exp.DateDiff),
60
62
  "DATE_DIFF": build_date_delta(exp.DateDiff),
63
+ "NOW": exp.CurrentTimestamp.from_arg_list,
61
64
  "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "databricks"),
65
+ "UNIFORM": lambda args: exp.Uniform(
66
+ this=seq_get(args, 0), expression=seq_get(args, 1), seed=seq_get(args, 2)
67
+ ),
68
+ }
69
+
70
+ NO_PAREN_FUNCTION_PARSERS = {
71
+ **Spark.Parser.NO_PAREN_FUNCTION_PARSERS,
72
+ "CURDATE": lambda self: self._parse_curdate(),
62
73
  }
63
74
 
64
75
  FACTOR = {
@@ -66,6 +77,21 @@ class Databricks(Spark):
66
77
  TokenType.COLON: exp.JSONExtract,
67
78
  }
68
79
 
80
+ COLUMN_OPERATORS = {
81
+ **parser.Parser.COLUMN_OPERATORS,
82
+ TokenType.QDCOLON: lambda self, this, to: self.expression(
83
+ exp.TryCast,
84
+ this=this,
85
+ to=to,
86
+ ),
87
+ }
88
+
89
+ def _parse_curdate(self) -> exp.CurrentDate:
90
+ # CURDATE, an alias for CURRENT_DATE, has optional parentheses
91
+ if self._match(TokenType.L_PAREN):
92
+ self._match_r_paren()
93
+ return self.expression(exp.CurrentDate)
94
+
69
95
  class Generator(Spark.Generator):
70
96
  TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
71
97
  COPY_PARAMS_ARE_WRAPPED = False
@@ -104,6 +130,7 @@ class Databricks(Spark):
104
130
  if e.args.get("is_numeric")
105
131
  else self.function_fallback_sql(e)
106
132
  ),
133
+ exp.CurrentCatalog: lambda *_: "CURRENT_CATALOG()",
107
134
  }
108
135
 
109
136
  TRANSFORMS.pop(exp.RegexpLike)
@@ -136,3 +163,13 @@ class Databricks(Spark):
136
163
  def jsonpath_sql(self, expression: exp.JSONPath) -> str:
137
164
  expression.set("escape", None)
138
165
  return super().jsonpath_sql(expression)
166
+
167
+ def uniform_sql(self, expression: exp.Uniform) -> str:
168
+ gen = expression.args.get("gen")
169
+ seed = expression.args.get("seed")
170
+
171
+ # From Snowflake UNIFORM(min, max, gen) as RANDOM(), RANDOM(seed), or constant value -> Extract seed
172
+ if gen:
173
+ seed = gen.this
174
+
175
+ return self.func("UNIFORM", expression.this, expression.expression, seed)