sqlglot 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/__main__.py +6 -4
- sqlglot/_version.py +2 -2
- sqlglot/dialects/bigquery.py +116 -295
- sqlglot/dialects/clickhouse.py +67 -2
- sqlglot/dialects/databricks.py +38 -1
- sqlglot/dialects/dialect.py +327 -286
- sqlglot/dialects/dremio.py +4 -1
- sqlglot/dialects/duckdb.py +718 -22
- sqlglot/dialects/exasol.py +243 -10
- sqlglot/dialects/hive.py +8 -8
- sqlglot/dialects/mysql.py +11 -2
- sqlglot/dialects/oracle.py +29 -0
- sqlglot/dialects/postgres.py +46 -24
- sqlglot/dialects/presto.py +47 -16
- sqlglot/dialects/redshift.py +16 -0
- sqlglot/dialects/risingwave.py +3 -0
- sqlglot/dialects/singlestore.py +12 -3
- sqlglot/dialects/snowflake.py +199 -271
- sqlglot/dialects/spark.py +2 -2
- sqlglot/dialects/spark2.py +11 -48
- sqlglot/dialects/sqlite.py +9 -0
- sqlglot/dialects/teradata.py +5 -8
- sqlglot/dialects/trino.py +6 -0
- sqlglot/dialects/tsql.py +61 -25
- sqlglot/diff.py +4 -2
- sqlglot/errors.py +69 -0
- sqlglot/expressions.py +484 -84
- sqlglot/generator.py +143 -41
- sqlglot/helper.py +2 -2
- sqlglot/optimizer/annotate_types.py +247 -140
- sqlglot/optimizer/canonicalize.py +6 -1
- sqlglot/optimizer/eliminate_joins.py +1 -1
- sqlglot/optimizer/eliminate_subqueries.py +2 -2
- sqlglot/optimizer/merge_subqueries.py +5 -5
- sqlglot/optimizer/normalize.py +20 -13
- sqlglot/optimizer/normalize_identifiers.py +17 -3
- sqlglot/optimizer/optimizer.py +4 -0
- sqlglot/optimizer/pushdown_predicates.py +1 -1
- sqlglot/optimizer/qualify.py +14 -6
- sqlglot/optimizer/qualify_columns.py +113 -352
- sqlglot/optimizer/qualify_tables.py +112 -70
- sqlglot/optimizer/resolver.py +374 -0
- sqlglot/optimizer/scope.py +27 -16
- sqlglot/optimizer/simplify.py +1074 -964
- sqlglot/optimizer/unnest_subqueries.py +12 -2
- sqlglot/parser.py +276 -160
- sqlglot/planner.py +2 -2
- sqlglot/schema.py +15 -4
- sqlglot/tokens.py +42 -7
- sqlglot/transforms.py +77 -22
- sqlglot/typing/__init__.py +316 -0
- sqlglot/typing/bigquery.py +376 -0
- sqlglot/typing/hive.py +12 -0
- sqlglot/typing/presto.py +24 -0
- sqlglot/typing/snowflake.py +505 -0
- sqlglot/typing/spark2.py +58 -0
- sqlglot/typing/tsql.py +9 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
- sqlglot-28.4.0.dist-info/RECORD +92 -0
- sqlglot-27.29.0.dist-info/RECORD +0 -84
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0
sqlglot/dialects/presto.py
CHANGED
|
@@ -31,6 +31,7 @@ from sqlglot.dialects.dialect import (
|
|
|
31
31
|
sequence_sql,
|
|
32
32
|
build_regexp_extract,
|
|
33
33
|
explode_to_unnest_sql,
|
|
34
|
+
sha2_digest_sql,
|
|
34
35
|
)
|
|
35
36
|
from sqlglot.dialects.hive import Hive
|
|
36
37
|
from sqlglot.dialects.mysql import MySQL
|
|
@@ -39,11 +40,18 @@ from sqlglot.optimizer.scope import find_all_in_scope
|
|
|
39
40
|
from sqlglot.tokens import TokenType
|
|
40
41
|
from sqlglot.transforms import unqualify_columns
|
|
41
42
|
from sqlglot.generator import unsupported_args
|
|
43
|
+
from sqlglot.typing.presto import EXPRESSION_METADATA
|
|
42
44
|
|
|
43
45
|
DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub]
|
|
44
46
|
|
|
45
47
|
|
|
46
48
|
def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str:
|
|
49
|
+
delimiters = expression.expression
|
|
50
|
+
if delimiters and not (
|
|
51
|
+
delimiters.is_string and delimiters.this == self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS
|
|
52
|
+
):
|
|
53
|
+
self.unsupported("INITCAP does not support custom delimiters")
|
|
54
|
+
|
|
47
55
|
regex = r"(\w)(\w*)"
|
|
48
56
|
return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))"
|
|
49
57
|
|
|
@@ -267,20 +275,7 @@ class Presto(Dialect):
|
|
|
267
275
|
# https://github.com/prestodb/presto/issues/2863
|
|
268
276
|
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
|
|
269
277
|
|
|
270
|
-
|
|
271
|
-
# equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT
|
|
272
|
-
ANNOTATORS = {
|
|
273
|
-
**Dialect.ANNOTATORS,
|
|
274
|
-
exp.Floor: lambda self, e: self._annotate_by_args(e, "this"),
|
|
275
|
-
exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"),
|
|
276
|
-
exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"),
|
|
277
|
-
exp.Round: lambda self, e: self._annotate_by_args(e, "this"),
|
|
278
|
-
exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
|
|
279
|
-
exp.Abs: lambda self, e: self._annotate_by_args(e, "this"),
|
|
280
|
-
exp.Rand: lambda self, e: self._annotate_by_args(e, "this")
|
|
281
|
-
if e.this
|
|
282
|
-
else self._set_type(e, exp.DataType.Type.DOUBLE),
|
|
283
|
-
}
|
|
278
|
+
EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
|
|
284
279
|
|
|
285
280
|
SUPPORTED_SETTINGS = {
|
|
286
281
|
*Dialect.SUPPORTED_SETTINGS,
|
|
@@ -429,10 +424,19 @@ class Presto(Dialect):
|
|
|
429
424
|
TRANSFORMS = {
|
|
430
425
|
**generator.Generator.TRANSFORMS,
|
|
431
426
|
exp.AnyValue: rename_func("ARBITRARY"),
|
|
432
|
-
exp.ApproxQuantile:
|
|
427
|
+
exp.ApproxQuantile: lambda self, e: self.func(
|
|
428
|
+
"APPROX_PERCENTILE",
|
|
429
|
+
e.this,
|
|
430
|
+
e.args.get("weight"),
|
|
431
|
+
e.args.get("quantile"),
|
|
432
|
+
e.args.get("accuracy"),
|
|
433
|
+
),
|
|
433
434
|
exp.ArgMax: rename_func("MAX_BY"),
|
|
434
435
|
exp.ArgMin: rename_func("MIN_BY"),
|
|
435
|
-
exp.Array:
|
|
436
|
+
exp.Array: transforms.preprocess(
|
|
437
|
+
[transforms.inherit_struct_field_names],
|
|
438
|
+
generator=lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
|
|
439
|
+
),
|
|
436
440
|
exp.ArrayAny: rename_func("ANY_MATCH"),
|
|
437
441
|
exp.ArrayConcat: rename_func("CONCAT"),
|
|
438
442
|
exp.ArrayContains: rename_func("CONTAINS"),
|
|
@@ -542,7 +546,9 @@ class Presto(Dialect):
|
|
|
542
546
|
exp.Xor: bool_xor_sql,
|
|
543
547
|
exp.MD5Digest: rename_func("MD5"),
|
|
544
548
|
exp.SHA: rename_func("SHA1"),
|
|
549
|
+
exp.SHA1Digest: rename_func("SHA1"),
|
|
545
550
|
exp.SHA2: sha256_sql,
|
|
551
|
+
exp.SHA2Digest: sha2_digest_sql,
|
|
546
552
|
}
|
|
547
553
|
|
|
548
554
|
RESERVED_KEYWORDS = {
|
|
@@ -606,6 +612,31 @@ class Presto(Dialect):
|
|
|
606
612
|
"with",
|
|
607
613
|
}
|
|
608
614
|
|
|
615
|
+
def extract_sql(self, expression: exp.Extract) -> str:
|
|
616
|
+
date_part = expression.name
|
|
617
|
+
|
|
618
|
+
if not date_part.startswith("EPOCH"):
|
|
619
|
+
return super().extract_sql(expression)
|
|
620
|
+
|
|
621
|
+
if date_part == "EPOCH_MILLISECOND":
|
|
622
|
+
scale = 10**3
|
|
623
|
+
elif date_part == "EPOCH_MICROSECOND":
|
|
624
|
+
scale = 10**6
|
|
625
|
+
elif date_part == "EPOCH_NANOSECOND":
|
|
626
|
+
scale = 10**9
|
|
627
|
+
else:
|
|
628
|
+
scale = None
|
|
629
|
+
|
|
630
|
+
value = expression.expression
|
|
631
|
+
|
|
632
|
+
ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP"))
|
|
633
|
+
to_unix: exp.Expression = exp.TimeToUnix(this=ts)
|
|
634
|
+
|
|
635
|
+
if scale:
|
|
636
|
+
to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
|
|
637
|
+
|
|
638
|
+
return self.sql(to_unix)
|
|
639
|
+
|
|
609
640
|
def jsonformat_sql(self, expression: exp.JSONFormat) -> str:
|
|
610
641
|
this = expression.this
|
|
611
642
|
is_json = expression.args.get("is_json")
|
sqlglot/dialects/redshift.py
CHANGED
|
@@ -47,6 +47,8 @@ class Redshift(Postgres):
|
|
|
47
47
|
COPY_PARAMS_ARE_CSV = False
|
|
48
48
|
HEX_LOWERCASE = True
|
|
49
49
|
HAS_DISTINCT_ARRAY_CONSTRUCTORS = True
|
|
50
|
+
COALESCE_COMPARISON_NON_STANDARD = True
|
|
51
|
+
REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL = False
|
|
50
52
|
|
|
51
53
|
# ref: https://docs.aws.amazon.com/redshift/latest/dg/r_FORMAT_strings.html
|
|
52
54
|
TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'"
|
|
@@ -68,6 +70,13 @@ class Redshift(Postgres):
|
|
|
68
70
|
"DATE_DIFF": _build_date_delta(exp.TsOrDsDiff),
|
|
69
71
|
"GETDATE": exp.CurrentTimestamp.from_arg_list,
|
|
70
72
|
"LISTAGG": exp.GroupConcat.from_arg_list,
|
|
73
|
+
"REGEXP_SUBSTR": lambda args: exp.RegexpExtract(
|
|
74
|
+
this=seq_get(args, 0),
|
|
75
|
+
expression=seq_get(args, 1),
|
|
76
|
+
position=seq_get(args, 2),
|
|
77
|
+
occurrence=seq_get(args, 3),
|
|
78
|
+
parameters=seq_get(args, 4),
|
|
79
|
+
),
|
|
71
80
|
"SPLIT_TO_ARRAY": lambda args: exp.StringToArray(
|
|
72
81
|
this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(",")
|
|
73
82
|
),
|
|
@@ -200,6 +209,7 @@ class Redshift(Postgres):
|
|
|
200
209
|
exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"),
|
|
201
210
|
exp.GroupConcat: rename_func("LISTAGG"),
|
|
202
211
|
exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))),
|
|
212
|
+
exp.RegexpExtract: rename_func("REGEXP_SUBSTR"),
|
|
203
213
|
exp.Select: transforms.preprocess(
|
|
204
214
|
[
|
|
205
215
|
transforms.eliminate_window_clause,
|
|
@@ -218,6 +228,9 @@ class Redshift(Postgres):
|
|
|
218
228
|
exp.TsOrDsAdd: date_delta_sql("DATEADD"),
|
|
219
229
|
exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
|
|
220
230
|
exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e),
|
|
231
|
+
exp.SHA2Digest: lambda self, e: self.func(
|
|
232
|
+
"SHA2", e.this, e.args.get("length") or exp.Literal.number(256)
|
|
233
|
+
),
|
|
221
234
|
}
|
|
222
235
|
|
|
223
236
|
# Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots
|
|
@@ -231,6 +244,9 @@ class Redshift(Postgres):
|
|
|
231
244
|
TRANSFORMS.pop(exp.LastDay)
|
|
232
245
|
TRANSFORMS.pop(exp.SHA2)
|
|
233
246
|
|
|
247
|
+
# Postgres does not permit a double precision argument in ROUND; Redshift does
|
|
248
|
+
TRANSFORMS.pop(exp.Round)
|
|
249
|
+
|
|
234
250
|
RESERVED_KEYWORDS = {
|
|
235
251
|
"aes128",
|
|
236
252
|
"aes256",
|
sqlglot/dialects/risingwave.py
CHANGED
sqlglot/dialects/singlestore.py
CHANGED
|
@@ -81,6 +81,7 @@ class SingleStore(MySQL):
|
|
|
81
81
|
"!:>": TokenType.NCOLON_GT,
|
|
82
82
|
"::$": TokenType.DCOLONDOLLAR,
|
|
83
83
|
"::%": TokenType.DCOLONPERCENT,
|
|
84
|
+
"::?": TokenType.DCOLONQMARK,
|
|
84
85
|
}
|
|
85
86
|
|
|
86
87
|
class Parser(MySQL.Parser):
|
|
@@ -253,6 +254,12 @@ class SingleStore(MySQL):
|
|
|
253
254
|
TokenType.DCOLONPERCENT: lambda self, this, path: build_json_extract_path(
|
|
254
255
|
exp.JSONExtractScalar, json_type="DOUBLE"
|
|
255
256
|
)([this, exp.Literal.string(path.name)]),
|
|
257
|
+
TokenType.DCOLONQMARK: lambda self, this, path: self.expression(
|
|
258
|
+
exp.JSONExists,
|
|
259
|
+
this=this,
|
|
260
|
+
path=path.name,
|
|
261
|
+
from_dcolonqmark=True,
|
|
262
|
+
),
|
|
256
263
|
}
|
|
257
264
|
COLUMN_OPERATORS.pop(TokenType.ARROW)
|
|
258
265
|
COLUMN_OPERATORS.pop(TokenType.DARROW)
|
|
@@ -452,8 +459,10 @@ class SingleStore(MySQL):
|
|
|
452
459
|
exp.JSONBExists: lambda self, e: self.func(
|
|
453
460
|
"BSON_MATCH_ANY_EXISTS", e.this, e.args.get("path")
|
|
454
461
|
),
|
|
455
|
-
exp.JSONExists:
|
|
456
|
-
|
|
462
|
+
exp.JSONExists: lambda self, e: (
|
|
463
|
+
f"{self.sql(e.this)}::?{self.sql(e.args.get('path'))}"
|
|
464
|
+
if e.args.get("from_dcolonqmark")
|
|
465
|
+
else self.func("JSON_MATCH_ANY_EXISTS", e.this, e.args.get("path"))
|
|
457
466
|
),
|
|
458
467
|
exp.JSONObject: unsupported_args(
|
|
459
468
|
"null_handling", "unique_keys", "return_type", "encoding"
|
|
@@ -542,7 +551,7 @@ class SingleStore(MySQL):
|
|
|
542
551
|
"offset",
|
|
543
552
|
"starts_with",
|
|
544
553
|
"limit",
|
|
545
|
-
"
|
|
554
|
+
"from_",
|
|
546
555
|
"scope",
|
|
547
556
|
"scope_kind",
|
|
548
557
|
"mutex",
|