sqlglot 27.27.0__py3-none-any.whl → 28.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/__init__.py +1 -0
- sqlglot/__main__.py +6 -4
- sqlglot/_version.py +2 -2
- sqlglot/dialects/bigquery.py +118 -279
- sqlglot/dialects/clickhouse.py +73 -5
- sqlglot/dialects/databricks.py +38 -1
- sqlglot/dialects/dialect.py +354 -275
- sqlglot/dialects/dremio.py +4 -1
- sqlglot/dialects/duckdb.py +754 -25
- sqlglot/dialects/exasol.py +243 -10
- sqlglot/dialects/hive.py +8 -8
- sqlglot/dialects/mysql.py +14 -4
- sqlglot/dialects/oracle.py +29 -0
- sqlglot/dialects/postgres.py +60 -26
- sqlglot/dialects/presto.py +47 -16
- sqlglot/dialects/redshift.py +16 -0
- sqlglot/dialects/risingwave.py +3 -0
- sqlglot/dialects/singlestore.py +12 -3
- sqlglot/dialects/snowflake.py +239 -218
- sqlglot/dialects/spark.py +15 -4
- sqlglot/dialects/spark2.py +11 -48
- sqlglot/dialects/sqlite.py +10 -0
- sqlglot/dialects/starrocks.py +3 -0
- sqlglot/dialects/teradata.py +5 -8
- sqlglot/dialects/trino.py +6 -0
- sqlglot/dialects/tsql.py +61 -22
- sqlglot/diff.py +4 -2
- sqlglot/errors.py +69 -0
- sqlglot/executor/__init__.py +5 -10
- sqlglot/executor/python.py +1 -29
- sqlglot/expressions.py +637 -100
- sqlglot/generator.py +160 -43
- sqlglot/helper.py +2 -44
- sqlglot/lineage.py +10 -4
- sqlglot/optimizer/annotate_types.py +247 -140
- sqlglot/optimizer/canonicalize.py +6 -1
- sqlglot/optimizer/eliminate_joins.py +1 -1
- sqlglot/optimizer/eliminate_subqueries.py +2 -2
- sqlglot/optimizer/merge_subqueries.py +5 -5
- sqlglot/optimizer/normalize.py +20 -13
- sqlglot/optimizer/normalize_identifiers.py +17 -3
- sqlglot/optimizer/optimizer.py +4 -0
- sqlglot/optimizer/pushdown_predicates.py +1 -1
- sqlglot/optimizer/qualify.py +18 -10
- sqlglot/optimizer/qualify_columns.py +122 -275
- sqlglot/optimizer/qualify_tables.py +128 -76
- sqlglot/optimizer/resolver.py +374 -0
- sqlglot/optimizer/scope.py +27 -16
- sqlglot/optimizer/simplify.py +1075 -959
- sqlglot/optimizer/unnest_subqueries.py +12 -2
- sqlglot/parser.py +296 -170
- sqlglot/planner.py +2 -2
- sqlglot/schema.py +15 -4
- sqlglot/tokens.py +42 -7
- sqlglot/transforms.py +77 -22
- sqlglot/typing/__init__.py +316 -0
- sqlglot/typing/bigquery.py +376 -0
- sqlglot/typing/hive.py +12 -0
- sqlglot/typing/presto.py +24 -0
- sqlglot/typing/snowflake.py +505 -0
- sqlglot/typing/spark2.py +58 -0
- sqlglot/typing/tsql.py +9 -0
- {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
- sqlglot-28.4.0.dist-info/RECORD +92 -0
- sqlglot-27.27.0.dist-info/RECORD +0 -84
- {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
- {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0
sqlglot/dialects/clickhouse.py
CHANGED
|
@@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
|
|
|
9
9
|
arg_max_or_min_no_count,
|
|
10
10
|
build_date_delta,
|
|
11
11
|
build_formatted_time,
|
|
12
|
+
build_like,
|
|
12
13
|
inline_array_sql,
|
|
13
14
|
json_extract_segments,
|
|
14
15
|
json_path_key_only_name,
|
|
@@ -23,6 +24,7 @@ from sqlglot.dialects.dialect import (
|
|
|
23
24
|
timestamptrunc_sql,
|
|
24
25
|
unit_to_var,
|
|
25
26
|
trim_sql,
|
|
27
|
+
sha2_digest_sql,
|
|
26
28
|
)
|
|
27
29
|
from sqlglot.generator import Generator
|
|
28
30
|
from sqlglot.helper import is_int, seq_get
|
|
@@ -188,6 +190,43 @@ def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> st
|
|
|
188
190
|
return f"{{{csv_args}}}"
|
|
189
191
|
|
|
190
192
|
|
|
193
|
+
def _build_timestamp_trunc(unit: str) -> t.Callable[[t.List], exp.TimestampTrunc]:
|
|
194
|
+
return lambda args: exp.TimestampTrunc(
|
|
195
|
+
this=seq_get(args, 0), unit=exp.var(unit), zone=seq_get(args, 1)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _build_split_by_char(args: t.List) -> exp.Split | exp.Anonymous:
|
|
200
|
+
sep = seq_get(args, 0)
|
|
201
|
+
if isinstance(sep, exp.Literal):
|
|
202
|
+
sep_value = sep.to_py()
|
|
203
|
+
if isinstance(sep_value, str) and len(sep_value.encode("utf-8")) == 1:
|
|
204
|
+
return _build_split(exp.Split)(args)
|
|
205
|
+
|
|
206
|
+
return exp.Anonymous(this="splitByChar", expressions=args)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _build_split(exp_class: t.Type[E]) -> t.Callable[[t.List], E]:
|
|
210
|
+
return lambda args: exp_class(
|
|
211
|
+
this=seq_get(args, 1), expression=seq_get(args, 0), limit=seq_get(args, 2)
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
# Skip the 'week' unit since ClickHouse's toStartOfWeek
|
|
216
|
+
# uses an extra mode argument to specify the first day of the week
|
|
217
|
+
TIMESTAMP_TRUNC_UNITS = {
|
|
218
|
+
"MICROSECOND",
|
|
219
|
+
"MILLISECOND",
|
|
220
|
+
"SECOND",
|
|
221
|
+
"MINUTE",
|
|
222
|
+
"HOUR",
|
|
223
|
+
"DAY",
|
|
224
|
+
"MONTH",
|
|
225
|
+
"QUARTER",
|
|
226
|
+
"YEAR",
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
|
|
191
230
|
class ClickHouse(Dialect):
|
|
192
231
|
INDEX_OFFSET = 1
|
|
193
232
|
NORMALIZE_FUNCTIONS: bool | str = False
|
|
@@ -308,10 +347,16 @@ class ClickHouse(Dialect):
|
|
|
308
347
|
|
|
309
348
|
FUNCTIONS = {
|
|
310
349
|
**parser.Parser.FUNCTIONS,
|
|
350
|
+
**{
|
|
351
|
+
f"TOSTARTOF{unit}": _build_timestamp_trunc(unit=unit)
|
|
352
|
+
for unit in TIMESTAMP_TRUNC_UNITS
|
|
353
|
+
},
|
|
311
354
|
"ANY": exp.AnyValue.from_arg_list,
|
|
312
355
|
"ARRAYSUM": exp.ArraySum.from_arg_list,
|
|
313
356
|
"ARRAYREVERSE": exp.ArrayReverse.from_arg_list,
|
|
314
357
|
"ARRAYSLICE": exp.ArraySlice.from_arg_list,
|
|
358
|
+
"CURRENTDATABASE": exp.CurrentDatabase.from_arg_list,
|
|
359
|
+
"CURRENTSCHEMAS": exp.CurrentSchemas.from_arg_list,
|
|
315
360
|
"COUNTIF": _build_count_if,
|
|
316
361
|
"COSINEDISTANCE": exp.CosineDistance.from_arg_list,
|
|
317
362
|
"DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None),
|
|
@@ -322,13 +367,17 @@ class ClickHouse(Dialect):
|
|
|
322
367
|
"DATE_SUB": build_date_delta(exp.DateSub, default_unit=None),
|
|
323
368
|
"DATESUB": build_date_delta(exp.DateSub, default_unit=None),
|
|
324
369
|
"FORMATDATETIME": _build_datetime_format(exp.TimeToStr),
|
|
370
|
+
"HAS": exp.ArrayContains.from_arg_list,
|
|
371
|
+
"ILIKE": build_like(exp.ILike),
|
|
325
372
|
"JSONEXTRACTSTRING": build_json_extract_path(
|
|
326
373
|
exp.JSONExtractScalar, zero_based_indexing=False
|
|
327
374
|
),
|
|
328
375
|
"LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
|
|
376
|
+
"LIKE": build_like(exp.Like),
|
|
329
377
|
"L2Distance": exp.EuclideanDistance.from_arg_list,
|
|
330
378
|
"MAP": parser.build_var_map,
|
|
331
379
|
"MATCH": exp.RegexpLike.from_arg_list,
|
|
380
|
+
"NOTLIKE": build_like(exp.Like, not_like=True),
|
|
332
381
|
"PARSEDATETIME": _build_datetime_format(exp.ParseDatetime),
|
|
333
382
|
"RANDCANONICAL": exp.Rand.from_arg_list,
|
|
334
383
|
"STR_TO_DATE": _build_str_to_date,
|
|
@@ -336,11 +385,15 @@ class ClickHouse(Dialect):
|
|
|
336
385
|
"TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None),
|
|
337
386
|
"TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None),
|
|
338
387
|
"TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None),
|
|
388
|
+
"TOMONDAY": _build_timestamp_trunc("WEEK"),
|
|
339
389
|
"UNIQ": exp.ApproxDistinct.from_arg_list,
|
|
340
390
|
"XOR": lambda args: exp.Xor(expressions=args),
|
|
341
391
|
"MD5": exp.MD5Digest.from_arg_list,
|
|
342
392
|
"SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
|
|
343
393
|
"SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
|
|
394
|
+
"SPLITBYCHAR": _build_split_by_char,
|
|
395
|
+
"SPLITBYREGEXP": _build_split(exp.RegexpSplit),
|
|
396
|
+
"SPLITBYSTRING": _build_split(exp.Split),
|
|
344
397
|
"SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list,
|
|
345
398
|
"TOTYPENAME": exp.Typeof.from_arg_list,
|
|
346
399
|
"EDITDISTANCE": exp.Levenshtein.from_arg_list,
|
|
@@ -418,6 +471,7 @@ class ClickHouse(Dialect):
|
|
|
418
471
|
"quantiles",
|
|
419
472
|
"quantileExact",
|
|
420
473
|
"quantilesExact",
|
|
474
|
+
"quantilesExactExclusive",
|
|
421
475
|
"quantileExactLow",
|
|
422
476
|
"quantilesExactLow",
|
|
423
477
|
"quantileExactHigh",
|
|
@@ -771,7 +825,7 @@ class ClickHouse(Dialect):
|
|
|
771
825
|
if join:
|
|
772
826
|
method = join.args.get("method")
|
|
773
827
|
join.set("method", None)
|
|
774
|
-
join.set("
|
|
828
|
+
join.set("global_", method)
|
|
775
829
|
|
|
776
830
|
# tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table`
|
|
777
831
|
# https://clickhouse.com/docs/en/sql-reference/statements/select/array-join
|
|
@@ -1087,6 +1141,7 @@ class ClickHouse(Dialect):
|
|
|
1087
1141
|
exp.AnyValue: rename_func("any"),
|
|
1088
1142
|
exp.ApproxDistinct: rename_func("uniq"),
|
|
1089
1143
|
exp.ArrayConcat: rename_func("arrayConcat"),
|
|
1144
|
+
exp.ArrayContains: rename_func("has"),
|
|
1090
1145
|
exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this),
|
|
1091
1146
|
exp.ArrayRemove: remove_from_array_using_filter,
|
|
1092
1147
|
exp.ArrayReverse: rename_func("arrayReverse"),
|
|
@@ -1096,6 +1151,8 @@ class ClickHouse(Dialect):
|
|
|
1096
1151
|
exp.ArgMin: arg_max_or_min_no_count("argMin"),
|
|
1097
1152
|
exp.Array: inline_array_sql,
|
|
1098
1153
|
exp.CastToStrType: rename_func("CAST"),
|
|
1154
|
+
exp.CurrentDatabase: rename_func("CURRENT_DATABASE"),
|
|
1155
|
+
exp.CurrentSchemas: rename_func("CURRENT_SCHEMAS"),
|
|
1099
1156
|
exp.CountIf: rename_func("countIf"),
|
|
1100
1157
|
exp.CosineDistance: rename_func("cosineDistance"),
|
|
1101
1158
|
exp.CompressColumnConstraint: lambda self,
|
|
@@ -1148,9 +1205,17 @@ class ClickHouse(Dialect):
|
|
|
1148
1205
|
exp.MD5Digest: rename_func("MD5"),
|
|
1149
1206
|
exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))),
|
|
1150
1207
|
exp.SHA: rename_func("SHA1"),
|
|
1208
|
+
exp.SHA1Digest: rename_func("SHA1"),
|
|
1151
1209
|
exp.SHA2: sha256_sql,
|
|
1210
|
+
exp.SHA2Digest: sha2_digest_sql,
|
|
1211
|
+
exp.Split: lambda self, e: self.func(
|
|
1212
|
+
"splitByString", e.args.get("expression"), e.this, e.args.get("limit")
|
|
1213
|
+
),
|
|
1214
|
+
exp.RegexpSplit: lambda self, e: self.func(
|
|
1215
|
+
"splitByRegexp", e.args.get("expression"), e.this, e.args.get("limit")
|
|
1216
|
+
),
|
|
1152
1217
|
exp.UnixToTime: _unix_to_time_sql,
|
|
1153
|
-
exp.TimestampTrunc: timestamptrunc_sql(zone=True),
|
|
1218
|
+
exp.TimestampTrunc: timestamptrunc_sql(func="dateTrunc", zone=True),
|
|
1154
1219
|
exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"),
|
|
1155
1220
|
exp.Variance: rename_func("varSamp"),
|
|
1156
1221
|
exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
|
|
@@ -1418,9 +1483,12 @@ class ClickHouse(Dialect):
|
|
|
1418
1483
|
return in_sql
|
|
1419
1484
|
|
|
1420
1485
|
def not_sql(self, expression: exp.Not) -> str:
|
|
1421
|
-
if isinstance(expression.this, exp.In)
|
|
1422
|
-
|
|
1423
|
-
|
|
1486
|
+
if isinstance(expression.this, exp.In):
|
|
1487
|
+
if expression.this.args.get("is_global"):
|
|
1488
|
+
# let `GLOBAL IN` child interpose `NOT`
|
|
1489
|
+
return self.sql(expression, "this")
|
|
1490
|
+
|
|
1491
|
+
expression.set("this", exp.paren(expression.this, copy=False))
|
|
1424
1492
|
|
|
1425
1493
|
return super().not_sql(expression)
|
|
1426
1494
|
|
sqlglot/dialects/databricks.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from copy import deepcopy
|
|
4
4
|
from collections import defaultdict
|
|
5
5
|
|
|
6
|
-
from sqlglot import exp, transforms, jsonpath
|
|
6
|
+
from sqlglot import exp, transforms, jsonpath, parser
|
|
7
7
|
from sqlglot.dialects.dialect import (
|
|
8
8
|
date_delta_sql,
|
|
9
9
|
build_date_delta,
|
|
@@ -12,6 +12,7 @@ from sqlglot.dialects.dialect import (
|
|
|
12
12
|
groupconcat_sql,
|
|
13
13
|
)
|
|
14
14
|
from sqlglot.dialects.spark import Spark
|
|
15
|
+
from sqlglot.helper import seq_get
|
|
15
16
|
from sqlglot.tokens import TokenType
|
|
16
17
|
from sqlglot.optimizer.annotate_types import TypeAnnotator
|
|
17
18
|
|
|
@@ -54,11 +55,21 @@ class Databricks(Spark):
|
|
|
54
55
|
|
|
55
56
|
FUNCTIONS = {
|
|
56
57
|
**Spark.Parser.FUNCTIONS,
|
|
58
|
+
"GETDATE": exp.CurrentTimestamp.from_arg_list,
|
|
57
59
|
"DATEADD": build_date_delta(exp.DateAdd),
|
|
58
60
|
"DATE_ADD": build_date_delta(exp.DateAdd),
|
|
59
61
|
"DATEDIFF": build_date_delta(exp.DateDiff),
|
|
60
62
|
"DATE_DIFF": build_date_delta(exp.DateDiff),
|
|
63
|
+
"NOW": exp.CurrentTimestamp.from_arg_list,
|
|
61
64
|
"TO_DATE": build_formatted_time(exp.TsOrDsToDate, "databricks"),
|
|
65
|
+
"UNIFORM": lambda args: exp.Uniform(
|
|
66
|
+
this=seq_get(args, 0), expression=seq_get(args, 1), seed=seq_get(args, 2)
|
|
67
|
+
),
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
NO_PAREN_FUNCTION_PARSERS = {
|
|
71
|
+
**Spark.Parser.NO_PAREN_FUNCTION_PARSERS,
|
|
72
|
+
"CURDATE": lambda self: self._parse_curdate(),
|
|
62
73
|
}
|
|
63
74
|
|
|
64
75
|
FACTOR = {
|
|
@@ -66,6 +77,21 @@ class Databricks(Spark):
|
|
|
66
77
|
TokenType.COLON: exp.JSONExtract,
|
|
67
78
|
}
|
|
68
79
|
|
|
80
|
+
COLUMN_OPERATORS = {
|
|
81
|
+
**parser.Parser.COLUMN_OPERATORS,
|
|
82
|
+
TokenType.QDCOLON: lambda self, this, to: self.expression(
|
|
83
|
+
exp.TryCast,
|
|
84
|
+
this=this,
|
|
85
|
+
to=to,
|
|
86
|
+
),
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
def _parse_curdate(self) -> exp.CurrentDate:
|
|
90
|
+
# CURDATE, an alias for CURRENT_DATE, has optional parentheses
|
|
91
|
+
if self._match(TokenType.L_PAREN):
|
|
92
|
+
self._match_r_paren()
|
|
93
|
+
return self.expression(exp.CurrentDate)
|
|
94
|
+
|
|
69
95
|
class Generator(Spark.Generator):
|
|
70
96
|
TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
|
|
71
97
|
COPY_PARAMS_ARE_WRAPPED = False
|
|
@@ -104,6 +130,7 @@ class Databricks(Spark):
|
|
|
104
130
|
if e.args.get("is_numeric")
|
|
105
131
|
else self.function_fallback_sql(e)
|
|
106
132
|
),
|
|
133
|
+
exp.CurrentCatalog: lambda *_: "CURRENT_CATALOG()",
|
|
107
134
|
}
|
|
108
135
|
|
|
109
136
|
TRANSFORMS.pop(exp.RegexpLike)
|
|
@@ -136,3 +163,13 @@ class Databricks(Spark):
|
|
|
136
163
|
def jsonpath_sql(self, expression: exp.JSONPath) -> str:
|
|
137
164
|
expression.set("escape", None)
|
|
138
165
|
return super().jsonpath_sql(expression)
|
|
166
|
+
|
|
167
|
+
def uniform_sql(self, expression: exp.Uniform) -> str:
|
|
168
|
+
gen = expression.args.get("gen")
|
|
169
|
+
seed = expression.args.get("seed")
|
|
170
|
+
|
|
171
|
+
# From Snowflake UNIFORM(min, max, gen) as RANDOM(), RANDOM(seed), or constant value -> Extract seed
|
|
172
|
+
if gen:
|
|
173
|
+
seed = gen.this
|
|
174
|
+
|
|
175
|
+
return self.func("UNIFORM", expression.this, expression.expression, seed)
|