sqlglot 27.29.0__py3-none-any.whl → 28.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/__main__.py +6 -4
- sqlglot/_version.py +2 -2
- sqlglot/dialects/bigquery.py +116 -295
- sqlglot/dialects/clickhouse.py +67 -2
- sqlglot/dialects/databricks.py +38 -1
- sqlglot/dialects/dialect.py +327 -286
- sqlglot/dialects/dremio.py +4 -1
- sqlglot/dialects/duckdb.py +718 -22
- sqlglot/dialects/exasol.py +243 -10
- sqlglot/dialects/hive.py +8 -8
- sqlglot/dialects/mysql.py +11 -2
- sqlglot/dialects/oracle.py +29 -0
- sqlglot/dialects/postgres.py +46 -24
- sqlglot/dialects/presto.py +47 -16
- sqlglot/dialects/redshift.py +16 -0
- sqlglot/dialects/risingwave.py +3 -0
- sqlglot/dialects/singlestore.py +12 -3
- sqlglot/dialects/snowflake.py +199 -271
- sqlglot/dialects/spark.py +2 -2
- sqlglot/dialects/spark2.py +11 -48
- sqlglot/dialects/sqlite.py +9 -0
- sqlglot/dialects/teradata.py +5 -8
- sqlglot/dialects/trino.py +6 -0
- sqlglot/dialects/tsql.py +61 -25
- sqlglot/diff.py +4 -2
- sqlglot/errors.py +69 -0
- sqlglot/expressions.py +484 -84
- sqlglot/generator.py +143 -41
- sqlglot/helper.py +2 -2
- sqlglot/optimizer/annotate_types.py +247 -140
- sqlglot/optimizer/canonicalize.py +6 -1
- sqlglot/optimizer/eliminate_joins.py +1 -1
- sqlglot/optimizer/eliminate_subqueries.py +2 -2
- sqlglot/optimizer/merge_subqueries.py +5 -5
- sqlglot/optimizer/normalize.py +20 -13
- sqlglot/optimizer/normalize_identifiers.py +17 -3
- sqlglot/optimizer/optimizer.py +4 -0
- sqlglot/optimizer/pushdown_predicates.py +1 -1
- sqlglot/optimizer/qualify.py +14 -6
- sqlglot/optimizer/qualify_columns.py +113 -352
- sqlglot/optimizer/qualify_tables.py +112 -70
- sqlglot/optimizer/resolver.py +374 -0
- sqlglot/optimizer/scope.py +27 -16
- sqlglot/optimizer/simplify.py +1074 -964
- sqlglot/optimizer/unnest_subqueries.py +12 -2
- sqlglot/parser.py +276 -160
- sqlglot/planner.py +2 -2
- sqlglot/schema.py +15 -4
- sqlglot/tokens.py +42 -7
- sqlglot/transforms.py +77 -22
- sqlglot/typing/__init__.py +316 -0
- sqlglot/typing/bigquery.py +376 -0
- sqlglot/typing/hive.py +12 -0
- sqlglot/typing/presto.py +24 -0
- sqlglot/typing/snowflake.py +505 -0
- sqlglot/typing/spark2.py +58 -0
- sqlglot/typing/tsql.py +9 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/METADATA +2 -2
- sqlglot-28.4.1.dist-info/RECORD +92 -0
- sqlglot-27.29.0.dist-info/RECORD +0 -84
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/WHEEL +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/top_level.txt +0 -0
sqlglot/typing/presto.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from sqlglot import exp
|
|
4
|
+
from sqlglot.typing import EXPRESSION_METADATA
|
|
5
|
+
|
|
6
|
+
EXPRESSION_METADATA = {
|
|
7
|
+
**EXPRESSION_METADATA,
|
|
8
|
+
**{
|
|
9
|
+
expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")}
|
|
10
|
+
for expr_type in {
|
|
11
|
+
exp.Abs,
|
|
12
|
+
exp.Ceil,
|
|
13
|
+
exp.Floor,
|
|
14
|
+
exp.Round,
|
|
15
|
+
exp.Sign,
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
exp.Mod: {"annotator": lambda self, e: self._annotate_by_args(e, "this", "expression")},
|
|
19
|
+
exp.Rand: {
|
|
20
|
+
"annotator": lambda self, e: self._annotate_by_args(e, "this")
|
|
21
|
+
if e.this
|
|
22
|
+
else self._set_type(e, exp.DataType.Type.DOUBLE)
|
|
23
|
+
},
|
|
24
|
+
}
|
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlglot import exp
|
|
6
|
+
from sqlglot.helper import seq_get
|
|
7
|
+
from sqlglot.typing import EXPRESSION_METADATA
|
|
8
|
+
|
|
9
|
+
if t.TYPE_CHECKING:
|
|
10
|
+
from sqlglot.optimizer.annotate_types import TypeAnnotator
|
|
11
|
+
|
|
12
|
+
DATE_PARTS = {"DAY", "WEEK", "MONTH", "QUARTER", "YEAR"}
|
|
13
|
+
|
|
14
|
+
MAX_PRECISION = 38
|
|
15
|
+
|
|
16
|
+
MAX_SCALE = 37
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _annotate_reverse(self: TypeAnnotator, expression: exp.Reverse) -> exp.Reverse:
|
|
20
|
+
expression = self._annotate_by_args(expression, "this")
|
|
21
|
+
if expression.is_type(exp.DataType.Type.NULL):
|
|
22
|
+
# Snowflake treats REVERSE(NULL) as a VARCHAR
|
|
23
|
+
self._set_type(expression, exp.DataType.Type.VARCHAR)
|
|
24
|
+
|
|
25
|
+
return expression
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _annotate_timestamp_from_parts(
|
|
29
|
+
self: TypeAnnotator, expression: exp.TimestampFromParts
|
|
30
|
+
) -> exp.TimestampFromParts:
|
|
31
|
+
"""Annotate TimestampFromParts with correct type based on arguments.
|
|
32
|
+
TIMESTAMP_FROM_PARTS with time_zone -> TIMESTAMPTZ
|
|
33
|
+
TIMESTAMP_FROM_PARTS without time_zone -> TIMESTAMP (defaults to TIMESTAMP_NTZ)
|
|
34
|
+
"""
|
|
35
|
+
if expression.args.get("zone"):
|
|
36
|
+
self._set_type(expression, exp.DataType.Type.TIMESTAMPTZ)
|
|
37
|
+
else:
|
|
38
|
+
self._set_type(expression, exp.DataType.Type.TIMESTAMP)
|
|
39
|
+
|
|
40
|
+
return expression
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _annotate_date_or_time_add(self: TypeAnnotator, expression: exp.Expression) -> exp.Expression:
|
|
44
|
+
if (
|
|
45
|
+
expression.this.is_type(exp.DataType.Type.DATE)
|
|
46
|
+
and expression.text("unit").upper() not in DATE_PARTS
|
|
47
|
+
):
|
|
48
|
+
self._set_type(expression, exp.DataType.Type.TIMESTAMPNTZ)
|
|
49
|
+
else:
|
|
50
|
+
self._annotate_by_args(expression, "this")
|
|
51
|
+
return expression
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _annotate_decode_case(self: TypeAnnotator, expression: exp.DecodeCase) -> exp.DecodeCase:
|
|
55
|
+
"""Annotate DecodeCase with the type inferred from return values only.
|
|
56
|
+
|
|
57
|
+
DECODE uses the format: DECODE(expr, val1, ret1, val2, ret2, ..., default)
|
|
58
|
+
We only look at the return values (ret1, ret2, ..., default) to determine the type,
|
|
59
|
+
not the comparison values (val1, val2, ...) or the expression being compared.
|
|
60
|
+
"""
|
|
61
|
+
expressions = expression.expressions
|
|
62
|
+
|
|
63
|
+
# Return values are at indices 2, 4, 6, ... and the last element (if even length)
|
|
64
|
+
# DECODE(expr, val1, ret1, val2, ret2, ..., default)
|
|
65
|
+
return_types = [expressions[i].type for i in range(2, len(expressions), 2)]
|
|
66
|
+
|
|
67
|
+
# If the total number of expressions is even, the last one is the default
|
|
68
|
+
# Example:
|
|
69
|
+
# DECODE(x, 1, 'a', 2, 'b') -> len=5 (odd), no default
|
|
70
|
+
# DECODE(x, 1, 'a', 2, 'b', 'default') -> len=6 (even), has default
|
|
71
|
+
if len(expressions) % 2 == 0:
|
|
72
|
+
return_types.append(expressions[-1].type)
|
|
73
|
+
|
|
74
|
+
# Determine the common type from all return values
|
|
75
|
+
last_type = None
|
|
76
|
+
for ret_type in return_types:
|
|
77
|
+
last_type = self._maybe_coerce(last_type or ret_type, ret_type)
|
|
78
|
+
|
|
79
|
+
self._set_type(expression, last_type)
|
|
80
|
+
return expression
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _annotate_arg_max_min(self, expression):
|
|
84
|
+
self._set_type(
|
|
85
|
+
expression,
|
|
86
|
+
exp.DataType.Type.ARRAY if expression.args.get("count") else expression.this.type,
|
|
87
|
+
)
|
|
88
|
+
return expression
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _annotate_within_group(self: TypeAnnotator, expression: exp.WithinGroup) -> exp.WithinGroup:
|
|
92
|
+
"""Annotate WithinGroup with correct type based on the inner function.
|
|
93
|
+
|
|
94
|
+
1) Annotate args first
|
|
95
|
+
2) Check if this is PercentileDisc/PercentileCont and if so, re-annotate its type to match the ordered expression's type
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
if (
|
|
99
|
+
isinstance(expression.this, (exp.PercentileDisc, exp.PercentileCont))
|
|
100
|
+
and isinstance(order_expr := expression.expression, exp.Order)
|
|
101
|
+
and len(order_expr.expressions) == 1
|
|
102
|
+
and isinstance(ordered_expr := order_expr.expressions[0], exp.Ordered)
|
|
103
|
+
):
|
|
104
|
+
self._set_type(expression, ordered_expr.this.type)
|
|
105
|
+
|
|
106
|
+
return expression
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _annotate_median(self: TypeAnnotator, expression: exp.Median) -> exp.Median:
|
|
110
|
+
"""Annotate MEDIAN function with correct return type.
|
|
111
|
+
|
|
112
|
+
Based on Snowflake documentation:
|
|
113
|
+
- If the expr is FLOAT/DOUBLE -> annotate as DOUBLE (FLOAT is a synonym for DOUBLE)
|
|
114
|
+
- If the expr is NUMBER(p, s) -> annotate as NUMBER(min(p+3, 38), min(s+3, 37))
|
|
115
|
+
"""
|
|
116
|
+
# First annotate the argument to get its type
|
|
117
|
+
expression = self._annotate_by_args(expression, "this")
|
|
118
|
+
|
|
119
|
+
# Get the input type
|
|
120
|
+
input_type = expression.this.type
|
|
121
|
+
|
|
122
|
+
if input_type.is_type(exp.DataType.Type.DOUBLE):
|
|
123
|
+
# If input is FLOAT/DOUBLE, return DOUBLE (FLOAT is normalized to DOUBLE in Snowflake)
|
|
124
|
+
self._set_type(expression, exp.DataType.Type.DOUBLE)
|
|
125
|
+
else:
|
|
126
|
+
# If input is NUMBER(p, s), return NUMBER(min(p+3, 38), min(s+3, 37))
|
|
127
|
+
exprs = input_type.expressions
|
|
128
|
+
|
|
129
|
+
precision_expr = seq_get(exprs, 0)
|
|
130
|
+
precision = precision_expr.this.to_py() if precision_expr else MAX_PRECISION
|
|
131
|
+
|
|
132
|
+
scale_expr = seq_get(exprs, 1)
|
|
133
|
+
scale = scale_expr.this.to_py() if scale_expr else 0
|
|
134
|
+
|
|
135
|
+
new_precision = min(precision + 3, MAX_PRECISION)
|
|
136
|
+
new_scale = min(scale + 3, MAX_SCALE)
|
|
137
|
+
|
|
138
|
+
# Build the new NUMBER type
|
|
139
|
+
new_type = exp.DataType.build(f"NUMBER({new_precision}, {new_scale})", dialect="snowflake")
|
|
140
|
+
self._set_type(expression, new_type)
|
|
141
|
+
|
|
142
|
+
return expression
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _annotate_variance(self: TypeAnnotator, expression: exp.Expression) -> exp.Expression:
|
|
146
|
+
"""Annotate variance functions (VAR_POP, VAR_SAMP, VARIANCE, VARIANCE_POP) with correct return type.
|
|
147
|
+
|
|
148
|
+
Based on Snowflake behavior:
|
|
149
|
+
- DECFLOAT -> DECFLOAT(38)
|
|
150
|
+
- FLOAT/DOUBLE -> FLOAT
|
|
151
|
+
- INT, NUMBER(p, 0) -> NUMBER(38, 6)
|
|
152
|
+
- NUMBER(p, s) -> NUMBER(38, max(12, s))
|
|
153
|
+
"""
|
|
154
|
+
# First annotate the argument to get its type
|
|
155
|
+
expression = self._annotate_by_args(expression, "this")
|
|
156
|
+
|
|
157
|
+
# Get the input type
|
|
158
|
+
input_type = expression.this.type
|
|
159
|
+
|
|
160
|
+
# Special case: DECFLOAT -> DECFLOAT(38)
|
|
161
|
+
if input_type.is_type(exp.DataType.Type.DECFLOAT):
|
|
162
|
+
self._set_type(expression, exp.DataType.build("DECFLOAT", dialect="snowflake"))
|
|
163
|
+
# Special case: FLOAT/DOUBLE -> DOUBLE
|
|
164
|
+
elif input_type.is_type(exp.DataType.Type.FLOAT, exp.DataType.Type.DOUBLE):
|
|
165
|
+
self._set_type(expression, exp.DataType.Type.DOUBLE)
|
|
166
|
+
# For NUMBER types: determine the scale
|
|
167
|
+
else:
|
|
168
|
+
exprs = input_type.expressions
|
|
169
|
+
scale_expr = seq_get(exprs, 1)
|
|
170
|
+
scale = scale_expr.this.to_py() if scale_expr else 0
|
|
171
|
+
|
|
172
|
+
# If scale is 0 (INT, BIGINT, NUMBER(p,0)): return NUMBER(38, 6)
|
|
173
|
+
# Otherwise, Snowflake appears to assign scale through the formula MAX(12, s)
|
|
174
|
+
new_scale = 6 if scale == 0 else max(12, scale)
|
|
175
|
+
|
|
176
|
+
# Build the new NUMBER type
|
|
177
|
+
new_type = exp.DataType.build(f"NUMBER({MAX_PRECISION}, {new_scale})", dialect="snowflake")
|
|
178
|
+
self._set_type(expression, new_type)
|
|
179
|
+
|
|
180
|
+
return expression
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _annotate_math_with_float_decfloat(
|
|
184
|
+
self: TypeAnnotator, expression: exp.Expression
|
|
185
|
+
) -> exp.Expression:
|
|
186
|
+
"""Annotate math functions that preserve DECFLOAT but return DOUBLE for others.
|
|
187
|
+
|
|
188
|
+
In Snowflake, trigonometric and exponential math functions:
|
|
189
|
+
- If input is DECFLOAT -> return DECFLOAT
|
|
190
|
+
- For integer types (INT, BIGINT, etc.) -> return DOUBLE
|
|
191
|
+
- For other numeric types (NUMBER, DECIMAL, DOUBLE) -> return DOUBLE
|
|
192
|
+
"""
|
|
193
|
+
expression = self._annotate_by_args(expression, "this")
|
|
194
|
+
|
|
195
|
+
# If input is DECFLOAT, preserve
|
|
196
|
+
if expression.this.is_type(exp.DataType.Type.DECFLOAT):
|
|
197
|
+
self._set_type(expression, expression.this.type)
|
|
198
|
+
else:
|
|
199
|
+
# For all other types (integers, decimals, etc.), return DOUBLE
|
|
200
|
+
self._set_type(expression, exp.DataType.Type.DOUBLE)
|
|
201
|
+
|
|
202
|
+
return expression
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
EXPRESSION_METADATA = {
|
|
206
|
+
**EXPRESSION_METADATA,
|
|
207
|
+
**{
|
|
208
|
+
expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")}
|
|
209
|
+
for expr_type in {
|
|
210
|
+
exp.AddMonths,
|
|
211
|
+
exp.Ceil,
|
|
212
|
+
exp.DateTrunc,
|
|
213
|
+
exp.Floor,
|
|
214
|
+
exp.Left,
|
|
215
|
+
exp.Mode,
|
|
216
|
+
exp.Pad,
|
|
217
|
+
exp.Right,
|
|
218
|
+
exp.Round,
|
|
219
|
+
exp.Stuff,
|
|
220
|
+
exp.Substring,
|
|
221
|
+
exp.TimeSlice,
|
|
222
|
+
exp.TimestampTrunc,
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
**{
|
|
226
|
+
expr_type: {"returns": exp.DataType.Type.ARRAY}
|
|
227
|
+
for expr_type in (
|
|
228
|
+
exp.ApproxTopK,
|
|
229
|
+
exp.ApproxTopKEstimate,
|
|
230
|
+
exp.ArrayAgg,
|
|
231
|
+
exp.ArrayConstructCompact,
|
|
232
|
+
exp.ArrayUniqueAgg,
|
|
233
|
+
exp.ArrayUnionAgg,
|
|
234
|
+
exp.RegexpExtractAll,
|
|
235
|
+
exp.Split,
|
|
236
|
+
exp.StringToArray,
|
|
237
|
+
)
|
|
238
|
+
},
|
|
239
|
+
**{
|
|
240
|
+
expr_type: {"returns": exp.DataType.Type.BIGINT}
|
|
241
|
+
for expr_type in {
|
|
242
|
+
exp.BitmapBitPosition,
|
|
243
|
+
exp.BitmapBucketNumber,
|
|
244
|
+
exp.BitmapCount,
|
|
245
|
+
exp.Factorial,
|
|
246
|
+
exp.GroupingId,
|
|
247
|
+
exp.MD5NumberLower64,
|
|
248
|
+
exp.MD5NumberUpper64,
|
|
249
|
+
exp.Rand,
|
|
250
|
+
exp.Zipf,
|
|
251
|
+
}
|
|
252
|
+
},
|
|
253
|
+
**{
|
|
254
|
+
expr_type: {"returns": exp.DataType.Type.BINARY}
|
|
255
|
+
for expr_type in {
|
|
256
|
+
exp.Base64DecodeBinary,
|
|
257
|
+
exp.BitmapConstructAgg,
|
|
258
|
+
exp.BitmapOrAgg,
|
|
259
|
+
exp.Compress,
|
|
260
|
+
exp.DecompressBinary,
|
|
261
|
+
exp.HexString,
|
|
262
|
+
exp.MD5Digest,
|
|
263
|
+
exp.SHA1Digest,
|
|
264
|
+
exp.SHA2Digest,
|
|
265
|
+
exp.ToBinary,
|
|
266
|
+
exp.TryBase64DecodeBinary,
|
|
267
|
+
exp.TryHexDecodeBinary,
|
|
268
|
+
exp.Unhex,
|
|
269
|
+
}
|
|
270
|
+
},
|
|
271
|
+
**{
|
|
272
|
+
expr_type: {"returns": exp.DataType.Type.BOOLEAN}
|
|
273
|
+
for expr_type in {
|
|
274
|
+
exp.Booland,
|
|
275
|
+
exp.Boolnot,
|
|
276
|
+
exp.Boolor,
|
|
277
|
+
exp.BoolxorAgg,
|
|
278
|
+
exp.EqualNull,
|
|
279
|
+
exp.IsNullValue,
|
|
280
|
+
exp.Search,
|
|
281
|
+
exp.SearchIp,
|
|
282
|
+
exp.ToBoolean,
|
|
283
|
+
}
|
|
284
|
+
},
|
|
285
|
+
**{
|
|
286
|
+
expr_type: {"returns": exp.DataType.Type.DATE}
|
|
287
|
+
for expr_type in {
|
|
288
|
+
exp.NextDay,
|
|
289
|
+
exp.PreviousDay,
|
|
290
|
+
}
|
|
291
|
+
},
|
|
292
|
+
**{
|
|
293
|
+
expr_type: {
|
|
294
|
+
"annotator": lambda self, e: self._set_type(
|
|
295
|
+
e, exp.DataType.build("NUMBER", dialect="snowflake")
|
|
296
|
+
)
|
|
297
|
+
}
|
|
298
|
+
for expr_type in (
|
|
299
|
+
exp.BitwiseAndAgg,
|
|
300
|
+
exp.BitwiseOrAgg,
|
|
301
|
+
exp.BitwiseXorAgg,
|
|
302
|
+
exp.RegexpCount,
|
|
303
|
+
exp.RegexpInstr,
|
|
304
|
+
exp.ToNumber,
|
|
305
|
+
)
|
|
306
|
+
},
|
|
307
|
+
**{
|
|
308
|
+
expr_type: {"returns": exp.DataType.Type.DOUBLE}
|
|
309
|
+
for expr_type in {
|
|
310
|
+
exp.ApproxPercentileEstimate,
|
|
311
|
+
exp.ApproximateSimilarity,
|
|
312
|
+
exp.Asinh,
|
|
313
|
+
exp.Atanh,
|
|
314
|
+
exp.Cbrt,
|
|
315
|
+
exp.Cosh,
|
|
316
|
+
exp.CosineDistance,
|
|
317
|
+
exp.DotProduct,
|
|
318
|
+
exp.EuclideanDistance,
|
|
319
|
+
exp.ManhattanDistance,
|
|
320
|
+
exp.MonthsBetween,
|
|
321
|
+
exp.Normal,
|
|
322
|
+
exp.Sinh,
|
|
323
|
+
}
|
|
324
|
+
},
|
|
325
|
+
**{
|
|
326
|
+
expr_type: {"returns": exp.DataType.Type.DECFLOAT}
|
|
327
|
+
for expr_type in {
|
|
328
|
+
exp.ToDecfloat,
|
|
329
|
+
exp.TryToDecfloat,
|
|
330
|
+
}
|
|
331
|
+
},
|
|
332
|
+
**{
|
|
333
|
+
expr_type: {"annotator": _annotate_math_with_float_decfloat}
|
|
334
|
+
for expr_type in {
|
|
335
|
+
exp.Acos,
|
|
336
|
+
exp.Asin,
|
|
337
|
+
exp.Atan,
|
|
338
|
+
exp.Atan2,
|
|
339
|
+
exp.Cos,
|
|
340
|
+
exp.Cot,
|
|
341
|
+
exp.Degrees,
|
|
342
|
+
exp.Exp,
|
|
343
|
+
exp.Ln,
|
|
344
|
+
exp.Log,
|
|
345
|
+
exp.Pow,
|
|
346
|
+
exp.Radians,
|
|
347
|
+
exp.RegrAvgx,
|
|
348
|
+
exp.RegrAvgy,
|
|
349
|
+
exp.RegrCount,
|
|
350
|
+
exp.RegrIntercept,
|
|
351
|
+
exp.RegrR2,
|
|
352
|
+
exp.RegrSlope,
|
|
353
|
+
exp.RegrSxx,
|
|
354
|
+
exp.RegrSxy,
|
|
355
|
+
exp.RegrSyy,
|
|
356
|
+
exp.RegrValx,
|
|
357
|
+
exp.RegrValy,
|
|
358
|
+
exp.Sin,
|
|
359
|
+
exp.Sqrt,
|
|
360
|
+
exp.Tan,
|
|
361
|
+
exp.Tanh,
|
|
362
|
+
}
|
|
363
|
+
},
|
|
364
|
+
**{
|
|
365
|
+
expr_type: {"returns": exp.DataType.Type.INT}
|
|
366
|
+
for expr_type in {
|
|
367
|
+
exp.Ascii,
|
|
368
|
+
exp.BitLength,
|
|
369
|
+
exp.ByteLength,
|
|
370
|
+
exp.Getbit,
|
|
371
|
+
exp.Grouping,
|
|
372
|
+
exp.Hour,
|
|
373
|
+
exp.JarowinklerSimilarity,
|
|
374
|
+
exp.Length,
|
|
375
|
+
exp.Levenshtein,
|
|
376
|
+
exp.Minute,
|
|
377
|
+
exp.RtrimmedLength,
|
|
378
|
+
exp.Second,
|
|
379
|
+
exp.StrPosition,
|
|
380
|
+
exp.Unicode,
|
|
381
|
+
exp.WidthBucket,
|
|
382
|
+
}
|
|
383
|
+
},
|
|
384
|
+
**{
|
|
385
|
+
expr_type: {"returns": exp.DataType.Type.OBJECT}
|
|
386
|
+
for expr_type in {
|
|
387
|
+
exp.ApproxPercentileAccumulate,
|
|
388
|
+
exp.ApproxPercentileCombine,
|
|
389
|
+
exp.ApproxTopKAccumulate,
|
|
390
|
+
exp.ApproxTopKCombine,
|
|
391
|
+
exp.ObjectAgg,
|
|
392
|
+
exp.ParseIp,
|
|
393
|
+
exp.ParseUrl,
|
|
394
|
+
exp.XMLGet,
|
|
395
|
+
}
|
|
396
|
+
},
|
|
397
|
+
**{
|
|
398
|
+
expr_type: {"returns": exp.DataType.Type.FILE}
|
|
399
|
+
for expr_type in {
|
|
400
|
+
exp.ToFile,
|
|
401
|
+
}
|
|
402
|
+
},
|
|
403
|
+
**{
|
|
404
|
+
expr_type: {"returns": exp.DataType.Type.TIME}
|
|
405
|
+
for expr_type in {
|
|
406
|
+
exp.TimeFromParts,
|
|
407
|
+
exp.TsOrDsToTime,
|
|
408
|
+
}
|
|
409
|
+
},
|
|
410
|
+
**{
|
|
411
|
+
expr_type: {"returns": exp.DataType.Type.VARCHAR}
|
|
412
|
+
for expr_type in {
|
|
413
|
+
exp.AIAgg,
|
|
414
|
+
exp.AIClassify,
|
|
415
|
+
exp.AISummarizeAgg,
|
|
416
|
+
exp.Base64DecodeString,
|
|
417
|
+
exp.Base64Encode,
|
|
418
|
+
exp.CheckJson,
|
|
419
|
+
exp.CheckXml,
|
|
420
|
+
exp.Chr,
|
|
421
|
+
exp.Collate,
|
|
422
|
+
exp.Collation,
|
|
423
|
+
exp.CurrentAccount,
|
|
424
|
+
exp.CurrentAccountName,
|
|
425
|
+
exp.CurrentAvailableRoles,
|
|
426
|
+
exp.CurrentClient,
|
|
427
|
+
exp.CurrentDatabase,
|
|
428
|
+
exp.CurrentIpAddress,
|
|
429
|
+
exp.CurrentSchemas,
|
|
430
|
+
exp.CurrentSecondaryRoles,
|
|
431
|
+
exp.CurrentSession,
|
|
432
|
+
exp.CurrentStatement,
|
|
433
|
+
exp.CurrentVersion,
|
|
434
|
+
exp.CurrentTransaction,
|
|
435
|
+
exp.CurrentWarehouse,
|
|
436
|
+
exp.CurrentOrganizationUser,
|
|
437
|
+
exp.CurrentRegion,
|
|
438
|
+
exp.CurrentRole,
|
|
439
|
+
exp.CurrentRoleType,
|
|
440
|
+
exp.CurrentOrganizationName,
|
|
441
|
+
exp.DecompressString,
|
|
442
|
+
exp.HexDecodeString,
|
|
443
|
+
exp.HexEncode,
|
|
444
|
+
exp.Initcap,
|
|
445
|
+
exp.MD5,
|
|
446
|
+
exp.Monthname,
|
|
447
|
+
exp.Randstr,
|
|
448
|
+
exp.RegexpExtract,
|
|
449
|
+
exp.RegexpReplace,
|
|
450
|
+
exp.Repeat,
|
|
451
|
+
exp.Replace,
|
|
452
|
+
exp.SHA,
|
|
453
|
+
exp.SHA2,
|
|
454
|
+
exp.Soundex,
|
|
455
|
+
exp.SoundexP123,
|
|
456
|
+
exp.Space,
|
|
457
|
+
exp.SplitPart,
|
|
458
|
+
exp.Translate,
|
|
459
|
+
exp.TryBase64DecodeString,
|
|
460
|
+
exp.TryHexDecodeString,
|
|
461
|
+
exp.Uuid,
|
|
462
|
+
}
|
|
463
|
+
},
|
|
464
|
+
**{
|
|
465
|
+
expr_type: {"returns": exp.DataType.Type.VARIANT}
|
|
466
|
+
for expr_type in {
|
|
467
|
+
exp.Minhash,
|
|
468
|
+
exp.MinhashCombine,
|
|
469
|
+
}
|
|
470
|
+
},
|
|
471
|
+
**{
|
|
472
|
+
expr_type: {"annotator": _annotate_variance}
|
|
473
|
+
for expr_type in (
|
|
474
|
+
exp.Variance,
|
|
475
|
+
exp.VariancePop,
|
|
476
|
+
)
|
|
477
|
+
},
|
|
478
|
+
exp.ArgMax: {"annotator": _annotate_arg_max_min},
|
|
479
|
+
exp.ArgMin: {"annotator": _annotate_arg_max_min},
|
|
480
|
+
exp.ConcatWs: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions")},
|
|
481
|
+
exp.ConvertTimezone: {
|
|
482
|
+
"annotator": lambda self, e: self._set_type(
|
|
483
|
+
e,
|
|
484
|
+
exp.DataType.Type.TIMESTAMPNTZ
|
|
485
|
+
if e.args.get("source_tz")
|
|
486
|
+
else exp.DataType.Type.TIMESTAMPTZ,
|
|
487
|
+
)
|
|
488
|
+
},
|
|
489
|
+
exp.DateAdd: {"annotator": _annotate_date_or_time_add},
|
|
490
|
+
exp.DecodeCase: {"annotator": _annotate_decode_case},
|
|
491
|
+
exp.GreatestIgnoreNulls: {
|
|
492
|
+
"annotator": lambda self, e: self._annotate_by_args(e, "expressions")
|
|
493
|
+
},
|
|
494
|
+
exp.HashAgg: {
|
|
495
|
+
"annotator": lambda self, e: self._set_type(
|
|
496
|
+
e, exp.DataType.build("NUMBER(19, 0)", dialect="snowflake")
|
|
497
|
+
)
|
|
498
|
+
},
|
|
499
|
+
exp.LeastIgnoreNulls: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions")},
|
|
500
|
+
exp.Median: {"annotator": _annotate_median},
|
|
501
|
+
exp.Reverse: {"annotator": _annotate_reverse},
|
|
502
|
+
exp.TimeAdd: {"annotator": _annotate_date_or_time_add},
|
|
503
|
+
exp.TimestampFromParts: {"annotator": _annotate_timestamp_from_parts},
|
|
504
|
+
exp.WithinGroup: {"annotator": _annotate_within_group},
|
|
505
|
+
}
|
sqlglot/typing/spark2.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlglot import exp
|
|
6
|
+
from sqlglot.helper import ensure_list
|
|
7
|
+
from sqlglot.typing.hive import EXPRESSION_METADATA as HIVE_EXPRESSION_METADATA
|
|
8
|
+
|
|
9
|
+
if t.TYPE_CHECKING:
|
|
10
|
+
from sqlglot._typing import E
|
|
11
|
+
from sqlglot.optimizer.annotate_types import TypeAnnotator
|
|
12
|
+
from sqlglot.typing import ExpressionMetadataType
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _annotate_by_similar_args(
|
|
16
|
+
self: TypeAnnotator, expression: E, *args: str, target_type: exp.DataType | exp.DataType.Type
|
|
17
|
+
) -> E:
|
|
18
|
+
"""
|
|
19
|
+
Infers the type of the expression according to the following rules:
|
|
20
|
+
- If all args are of the same type OR any arg is of target_type, the expr is inferred as such
|
|
21
|
+
- If any arg is of UNKNOWN type and none of target_type, the expr is inferred as UNKNOWN
|
|
22
|
+
"""
|
|
23
|
+
expressions: t.List[exp.Expression] = []
|
|
24
|
+
for arg in args:
|
|
25
|
+
arg_expr = expression.args.get(arg)
|
|
26
|
+
expressions.extend(expr for expr in ensure_list(arg_expr) if expr)
|
|
27
|
+
|
|
28
|
+
last_datatype = None
|
|
29
|
+
|
|
30
|
+
has_unknown = False
|
|
31
|
+
for expr in expressions:
|
|
32
|
+
if expr.is_type(exp.DataType.Type.UNKNOWN):
|
|
33
|
+
has_unknown = True
|
|
34
|
+
elif expr.is_type(target_type):
|
|
35
|
+
has_unknown = False
|
|
36
|
+
last_datatype = target_type
|
|
37
|
+
break
|
|
38
|
+
else:
|
|
39
|
+
last_datatype = expr.type
|
|
40
|
+
|
|
41
|
+
self._set_type(expression, exp.DataType.Type.UNKNOWN if has_unknown else last_datatype)
|
|
42
|
+
return expression
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
EXPRESSION_METADATA: ExpressionMetadataType = {
|
|
46
|
+
**HIVE_EXPRESSION_METADATA,
|
|
47
|
+
exp.Substring: {"annotator": lambda self, e: self._annotate_by_args(e, "this")},
|
|
48
|
+
exp.Concat: {
|
|
49
|
+
"annotator": lambda self, e: _annotate_by_similar_args(
|
|
50
|
+
self, e, "expressions", target_type=exp.DataType.Type.TEXT
|
|
51
|
+
)
|
|
52
|
+
},
|
|
53
|
+
exp.Pad: {
|
|
54
|
+
"annotator": lambda self, e: _annotate_by_similar_args(
|
|
55
|
+
self, e, "this", "fill_pattern", target_type=exp.DataType.Type.TEXT
|
|
56
|
+
)
|
|
57
|
+
},
|
|
58
|
+
}
|
sqlglot/typing/tsql.py
ADDED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sqlglot
|
|
3
|
-
Version:
|
|
3
|
+
Version: 28.4.1
|
|
4
4
|
Summary: An easily customizable SQL parser and transpiler
|
|
5
5
|
Author-email: Toby Mao <toby.mao@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -33,7 +33,7 @@ Requires-Dist: typing_extensions; extra == "dev"
|
|
|
33
33
|
Requires-Dist: maturin<2.0,>=1.4; extra == "dev"
|
|
34
34
|
Requires-Dist: pyperf; extra == "dev"
|
|
35
35
|
Provides-Extra: rs
|
|
36
|
-
Requires-Dist: sqlglotrs==0.
|
|
36
|
+
Requires-Dist: sqlglotrs==0.10.0; extra == "rs"
|
|
37
37
|
Dynamic: license-file
|
|
38
38
|
Dynamic: provides-extra
|
|
39
39
|
|