sqlglot 27.29.0__py3-none-any.whl → 28.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/__main__.py +6 -4
- sqlglot/_version.py +2 -2
- sqlglot/dialects/bigquery.py +116 -295
- sqlglot/dialects/clickhouse.py +67 -2
- sqlglot/dialects/databricks.py +38 -1
- sqlglot/dialects/dialect.py +327 -286
- sqlglot/dialects/dremio.py +4 -1
- sqlglot/dialects/duckdb.py +718 -22
- sqlglot/dialects/exasol.py +243 -10
- sqlglot/dialects/hive.py +8 -8
- sqlglot/dialects/mysql.py +11 -2
- sqlglot/dialects/oracle.py +29 -0
- sqlglot/dialects/postgres.py +46 -24
- sqlglot/dialects/presto.py +47 -16
- sqlglot/dialects/redshift.py +16 -0
- sqlglot/dialects/risingwave.py +3 -0
- sqlglot/dialects/singlestore.py +12 -3
- sqlglot/dialects/snowflake.py +199 -271
- sqlglot/dialects/spark.py +2 -2
- sqlglot/dialects/spark2.py +11 -48
- sqlglot/dialects/sqlite.py +9 -0
- sqlglot/dialects/teradata.py +5 -8
- sqlglot/dialects/trino.py +6 -0
- sqlglot/dialects/tsql.py +61 -25
- sqlglot/diff.py +4 -2
- sqlglot/errors.py +69 -0
- sqlglot/expressions.py +484 -84
- sqlglot/generator.py +143 -41
- sqlglot/helper.py +2 -2
- sqlglot/optimizer/annotate_types.py +247 -140
- sqlglot/optimizer/canonicalize.py +6 -1
- sqlglot/optimizer/eliminate_joins.py +1 -1
- sqlglot/optimizer/eliminate_subqueries.py +2 -2
- sqlglot/optimizer/merge_subqueries.py +5 -5
- sqlglot/optimizer/normalize.py +20 -13
- sqlglot/optimizer/normalize_identifiers.py +17 -3
- sqlglot/optimizer/optimizer.py +4 -0
- sqlglot/optimizer/pushdown_predicates.py +1 -1
- sqlglot/optimizer/qualify.py +14 -6
- sqlglot/optimizer/qualify_columns.py +113 -352
- sqlglot/optimizer/qualify_tables.py +112 -70
- sqlglot/optimizer/resolver.py +374 -0
- sqlglot/optimizer/scope.py +27 -16
- sqlglot/optimizer/simplify.py +1074 -964
- sqlglot/optimizer/unnest_subqueries.py +12 -2
- sqlglot/parser.py +276 -160
- sqlglot/planner.py +2 -2
- sqlglot/schema.py +15 -4
- sqlglot/tokens.py +42 -7
- sqlglot/transforms.py +77 -22
- sqlglot/typing/__init__.py +316 -0
- sqlglot/typing/bigquery.py +376 -0
- sqlglot/typing/hive.py +12 -0
- sqlglot/typing/presto.py +24 -0
- sqlglot/typing/snowflake.py +505 -0
- sqlglot/typing/spark2.py +58 -0
- sqlglot/typing/tsql.py +9 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/METADATA +2 -2
- sqlglot-28.4.1.dist-info/RECORD +92 -0
- sqlglot-27.29.0.dist-info/RECORD +0 -84
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/WHEEL +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
|
|
3
|
+
from sqlglot import exp
|
|
4
|
+
from sqlglot.helper import subclasses
|
|
5
|
+
|
|
6
|
+
ExpressionMetadataType = t.Dict[type[exp.Expression], t.Dict[str, t.Any]]
|
|
7
|
+
|
|
8
|
+
TIMESTAMP_EXPRESSIONS = {
|
|
9
|
+
exp.CurrentTimestamp,
|
|
10
|
+
exp.StrToTime,
|
|
11
|
+
exp.TimeStrToTime,
|
|
12
|
+
exp.TimestampAdd,
|
|
13
|
+
exp.TimestampSub,
|
|
14
|
+
exp.UnixToTime,
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
EXPRESSION_METADATA: ExpressionMetadataType = {
|
|
18
|
+
**{
|
|
19
|
+
expr_type: {"annotator": lambda self, e: self._annotate_binary(e)}
|
|
20
|
+
for expr_type in subclasses(exp.__name__, exp.Binary)
|
|
21
|
+
},
|
|
22
|
+
**{
|
|
23
|
+
expr_type: {"annotator": lambda self, e: self._annotate_unary(e)}
|
|
24
|
+
for expr_type in subclasses(exp.__name__, (exp.Unary, exp.Alias))
|
|
25
|
+
},
|
|
26
|
+
**{
|
|
27
|
+
expr_type: {"returns": exp.DataType.Type.BIGINT}
|
|
28
|
+
for expr_type in {
|
|
29
|
+
exp.ApproxDistinct,
|
|
30
|
+
exp.ArraySize,
|
|
31
|
+
exp.CountIf,
|
|
32
|
+
exp.Int64,
|
|
33
|
+
exp.Length,
|
|
34
|
+
exp.UnixDate,
|
|
35
|
+
exp.UnixSeconds,
|
|
36
|
+
exp.UnixMicros,
|
|
37
|
+
exp.UnixMillis,
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
**{
|
|
41
|
+
expr_type: {"returns": exp.DataType.Type.BINARY}
|
|
42
|
+
for expr_type in {
|
|
43
|
+
exp.FromBase32,
|
|
44
|
+
exp.FromBase64,
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
**{
|
|
48
|
+
expr_type: {"returns": exp.DataType.Type.BOOLEAN}
|
|
49
|
+
for expr_type in {
|
|
50
|
+
exp.Between,
|
|
51
|
+
exp.Boolean,
|
|
52
|
+
exp.Contains,
|
|
53
|
+
exp.EndsWith,
|
|
54
|
+
exp.In,
|
|
55
|
+
exp.LogicalAnd,
|
|
56
|
+
exp.LogicalOr,
|
|
57
|
+
exp.RegexpLike,
|
|
58
|
+
exp.StartsWith,
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
**{
|
|
62
|
+
expr_type: {"returns": exp.DataType.Type.DATE}
|
|
63
|
+
for expr_type in {
|
|
64
|
+
exp.CurrentDate,
|
|
65
|
+
exp.Date,
|
|
66
|
+
exp.DateFromParts,
|
|
67
|
+
exp.DateStrToDate,
|
|
68
|
+
exp.DiToDate,
|
|
69
|
+
exp.LastDay,
|
|
70
|
+
exp.StrToDate,
|
|
71
|
+
exp.TimeStrToDate,
|
|
72
|
+
exp.TsOrDsToDate,
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
**{
|
|
76
|
+
expr_type: {"returns": exp.DataType.Type.DATETIME}
|
|
77
|
+
for expr_type in {
|
|
78
|
+
exp.CurrentDatetime,
|
|
79
|
+
exp.Datetime,
|
|
80
|
+
exp.DatetimeAdd,
|
|
81
|
+
exp.DatetimeSub,
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
**{
|
|
85
|
+
expr_type: {"returns": exp.DataType.Type.DOUBLE}
|
|
86
|
+
for expr_type in {
|
|
87
|
+
exp.ApproxQuantile,
|
|
88
|
+
exp.Avg,
|
|
89
|
+
exp.Exp,
|
|
90
|
+
exp.Ln,
|
|
91
|
+
exp.Log,
|
|
92
|
+
exp.Pi,
|
|
93
|
+
exp.Pow,
|
|
94
|
+
exp.Quantile,
|
|
95
|
+
exp.Radians,
|
|
96
|
+
exp.Round,
|
|
97
|
+
exp.SafeDivide,
|
|
98
|
+
exp.Sqrt,
|
|
99
|
+
exp.Stddev,
|
|
100
|
+
exp.StddevPop,
|
|
101
|
+
exp.StddevSamp,
|
|
102
|
+
exp.ToDouble,
|
|
103
|
+
exp.Variance,
|
|
104
|
+
exp.VariancePop,
|
|
105
|
+
exp.Skewness,
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
**{
|
|
109
|
+
expr_type: {"returns": exp.DataType.Type.INT}
|
|
110
|
+
for expr_type in {
|
|
111
|
+
exp.Ascii,
|
|
112
|
+
exp.Ceil,
|
|
113
|
+
exp.DatetimeDiff,
|
|
114
|
+
exp.TimestampDiff,
|
|
115
|
+
exp.TimeDiff,
|
|
116
|
+
exp.Unicode,
|
|
117
|
+
exp.DateToDi,
|
|
118
|
+
exp.Levenshtein,
|
|
119
|
+
exp.Sign,
|
|
120
|
+
exp.StrPosition,
|
|
121
|
+
exp.TsOrDiToDi,
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
**{
|
|
125
|
+
expr_type: {"returns": exp.DataType.Type.INTERVAL}
|
|
126
|
+
for expr_type in {
|
|
127
|
+
exp.Interval,
|
|
128
|
+
exp.JustifyDays,
|
|
129
|
+
exp.JustifyHours,
|
|
130
|
+
exp.JustifyInterval,
|
|
131
|
+
exp.MakeInterval,
|
|
132
|
+
}
|
|
133
|
+
},
|
|
134
|
+
**{
|
|
135
|
+
expr_type: {"returns": exp.DataType.Type.JSON}
|
|
136
|
+
for expr_type in {
|
|
137
|
+
exp.ParseJSON,
|
|
138
|
+
}
|
|
139
|
+
},
|
|
140
|
+
**{
|
|
141
|
+
expr_type: {"returns": exp.DataType.Type.TIME}
|
|
142
|
+
for expr_type in {
|
|
143
|
+
exp.CurrentTime,
|
|
144
|
+
exp.Time,
|
|
145
|
+
exp.TimeAdd,
|
|
146
|
+
exp.TimeSub,
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
**{
|
|
150
|
+
expr_type: {"returns": exp.DataType.Type.TIMESTAMPLTZ}
|
|
151
|
+
for expr_type in {
|
|
152
|
+
exp.TimestampLtzFromParts,
|
|
153
|
+
}
|
|
154
|
+
},
|
|
155
|
+
**{
|
|
156
|
+
expr_type: {"returns": exp.DataType.Type.TIMESTAMPTZ}
|
|
157
|
+
for expr_type in {
|
|
158
|
+
exp.CurrentTimestampLTZ,
|
|
159
|
+
exp.TimestampTzFromParts,
|
|
160
|
+
}
|
|
161
|
+
},
|
|
162
|
+
**{expr_type: {"returns": exp.DataType.Type.TIMESTAMP} for expr_type in TIMESTAMP_EXPRESSIONS},
|
|
163
|
+
**{
|
|
164
|
+
expr_type: {"returns": exp.DataType.Type.TINYINT}
|
|
165
|
+
for expr_type in {
|
|
166
|
+
exp.Day,
|
|
167
|
+
exp.DayOfMonth,
|
|
168
|
+
exp.DayOfWeek,
|
|
169
|
+
exp.DayOfWeekIso,
|
|
170
|
+
exp.DayOfYear,
|
|
171
|
+
exp.Month,
|
|
172
|
+
exp.Quarter,
|
|
173
|
+
exp.Week,
|
|
174
|
+
exp.WeekOfYear,
|
|
175
|
+
exp.Year,
|
|
176
|
+
exp.YearOfWeek,
|
|
177
|
+
exp.YearOfWeekIso,
|
|
178
|
+
}
|
|
179
|
+
},
|
|
180
|
+
**{
|
|
181
|
+
expr_type: {"returns": exp.DataType.Type.VARCHAR}
|
|
182
|
+
for expr_type in {
|
|
183
|
+
exp.ArrayToString,
|
|
184
|
+
exp.Concat,
|
|
185
|
+
exp.ConcatWs,
|
|
186
|
+
exp.Chr,
|
|
187
|
+
exp.DateToDateStr,
|
|
188
|
+
exp.DPipe,
|
|
189
|
+
exp.GroupConcat,
|
|
190
|
+
exp.Initcap,
|
|
191
|
+
exp.Lower,
|
|
192
|
+
exp.Substring,
|
|
193
|
+
exp.String,
|
|
194
|
+
exp.TimeToStr,
|
|
195
|
+
exp.TimeToTimeStr,
|
|
196
|
+
exp.Trim,
|
|
197
|
+
exp.ToBase32,
|
|
198
|
+
exp.ToBase64,
|
|
199
|
+
exp.TsOrDsToDateStr,
|
|
200
|
+
exp.UnixToStr,
|
|
201
|
+
exp.UnixToTimeStr,
|
|
202
|
+
exp.Upper,
|
|
203
|
+
}
|
|
204
|
+
},
|
|
205
|
+
**{
|
|
206
|
+
expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")}
|
|
207
|
+
for expr_type in {
|
|
208
|
+
exp.Abs,
|
|
209
|
+
exp.AnyValue,
|
|
210
|
+
exp.ArrayConcatAgg,
|
|
211
|
+
exp.ArrayReverse,
|
|
212
|
+
exp.ArraySlice,
|
|
213
|
+
exp.Filter,
|
|
214
|
+
exp.HavingMax,
|
|
215
|
+
exp.LastValue,
|
|
216
|
+
exp.Limit,
|
|
217
|
+
exp.Order,
|
|
218
|
+
exp.SortArray,
|
|
219
|
+
exp.Window,
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
**{
|
|
223
|
+
expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions")}
|
|
224
|
+
for expr_type in {
|
|
225
|
+
exp.ArrayConcat,
|
|
226
|
+
exp.Coalesce,
|
|
227
|
+
exp.Greatest,
|
|
228
|
+
exp.Least,
|
|
229
|
+
exp.Max,
|
|
230
|
+
exp.Min,
|
|
231
|
+
}
|
|
232
|
+
},
|
|
233
|
+
**{
|
|
234
|
+
expr_type: {"annotator": lambda self, e: self._annotate_by_array_element(e)}
|
|
235
|
+
for expr_type in {
|
|
236
|
+
exp.ArrayFirst,
|
|
237
|
+
exp.ArrayLast,
|
|
238
|
+
}
|
|
239
|
+
},
|
|
240
|
+
**{
|
|
241
|
+
expr_type: {"returns": exp.DataType.Type.UNKNOWN}
|
|
242
|
+
for expr_type in {
|
|
243
|
+
exp.Anonymous,
|
|
244
|
+
exp.Slice,
|
|
245
|
+
}
|
|
246
|
+
},
|
|
247
|
+
**{
|
|
248
|
+
expr_type: {"annotator": lambda self, e: self._annotate_timeunit(e)}
|
|
249
|
+
for expr_type in {
|
|
250
|
+
exp.DateAdd,
|
|
251
|
+
exp.DateSub,
|
|
252
|
+
exp.DateTrunc,
|
|
253
|
+
}
|
|
254
|
+
},
|
|
255
|
+
**{
|
|
256
|
+
expr_type: {"annotator": lambda self, e: self._set_type(e, e.args["to"])}
|
|
257
|
+
for expr_type in {
|
|
258
|
+
exp.Cast,
|
|
259
|
+
exp.TryCast,
|
|
260
|
+
}
|
|
261
|
+
},
|
|
262
|
+
**{
|
|
263
|
+
expr_type: {"annotator": lambda self, e: self._annotate_map(e)}
|
|
264
|
+
for expr_type in {
|
|
265
|
+
exp.Map,
|
|
266
|
+
exp.VarMap,
|
|
267
|
+
}
|
|
268
|
+
},
|
|
269
|
+
exp.Array: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions", array=True)},
|
|
270
|
+
exp.ArrayAgg: {"annotator": lambda self, e: self._annotate_by_args(e, "this", array=True)},
|
|
271
|
+
exp.Bracket: {"annotator": lambda self, e: self._annotate_bracket(e)},
|
|
272
|
+
exp.Case: {"annotator": lambda self, e: self._annotate_by_args(e, "default", "ifs")},
|
|
273
|
+
exp.Count: {
|
|
274
|
+
"annotator": lambda self, e: self._set_type(
|
|
275
|
+
e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT
|
|
276
|
+
)
|
|
277
|
+
},
|
|
278
|
+
exp.DateDiff: {
|
|
279
|
+
"annotator": lambda self, e: self._set_type(
|
|
280
|
+
e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT
|
|
281
|
+
)
|
|
282
|
+
},
|
|
283
|
+
exp.DataType: {"annotator": lambda self, e: self._set_type(e, e.copy())},
|
|
284
|
+
exp.Div: {"annotator": lambda self, e: self._annotate_div(e)},
|
|
285
|
+
exp.Distinct: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions")},
|
|
286
|
+
exp.Dot: {"annotator": lambda self, e: self._annotate_dot(e)},
|
|
287
|
+
exp.Explode: {"annotator": lambda self, e: self._annotate_explode(e)},
|
|
288
|
+
exp.Extract: {"annotator": lambda self, e: self._annotate_extract(e)},
|
|
289
|
+
exp.GenerateSeries: {
|
|
290
|
+
"annotator": lambda self, e: self._annotate_by_args(e, "start", "end", "step", array=True)
|
|
291
|
+
},
|
|
292
|
+
exp.GenerateDateArray: {
|
|
293
|
+
"annotator": lambda self, e: self._set_type(e, exp.DataType.build("ARRAY<DATE>"))
|
|
294
|
+
},
|
|
295
|
+
exp.GenerateTimestampArray: {
|
|
296
|
+
"annotator": lambda self, e: self._set_type(e, exp.DataType.build("ARRAY<TIMESTAMP>"))
|
|
297
|
+
},
|
|
298
|
+
exp.If: {"annotator": lambda self, e: self._annotate_by_args(e, "true", "false")},
|
|
299
|
+
exp.Literal: {"annotator": lambda self, e: self._annotate_literal(e)},
|
|
300
|
+
exp.Null: {"returns": exp.DataType.Type.NULL},
|
|
301
|
+
exp.Nullif: {"annotator": lambda self, e: self._annotate_by_args(e, "this", "expression")},
|
|
302
|
+
exp.PropertyEQ: {"annotator": lambda self, e: self._annotate_by_args(e, "expression")},
|
|
303
|
+
exp.Struct: {"annotator": lambda self, e: self._annotate_struct(e)},
|
|
304
|
+
exp.Sum: {
|
|
305
|
+
"annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True)
|
|
306
|
+
},
|
|
307
|
+
exp.Timestamp: {
|
|
308
|
+
"annotator": lambda self, e: self._set_type(
|
|
309
|
+
e,
|
|
310
|
+
exp.DataType.Type.TIMESTAMPTZ if e.args.get("with_tz") else exp.DataType.Type.TIMESTAMP,
|
|
311
|
+
)
|
|
312
|
+
},
|
|
313
|
+
exp.ToMap: {"annotator": lambda self, e: self._annotate_to_map(e)},
|
|
314
|
+
exp.Unnest: {"annotator": lambda self, e: self._annotate_unnest(e)},
|
|
315
|
+
exp.Subquery: {"annotator": lambda self, e: self._annotate_subquery(e)},
|
|
316
|
+
}
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlglot import exp
|
|
6
|
+
from sqlglot.typing import EXPRESSION_METADATA, TIMESTAMP_EXPRESSIONS
|
|
7
|
+
|
|
8
|
+
if t.TYPE_CHECKING:
|
|
9
|
+
from sqlglot.optimizer.annotate_types import TypeAnnotator
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _annotate_math_functions(self: TypeAnnotator, expression: exp.Expression) -> exp.Expression:
|
|
13
|
+
"""
|
|
14
|
+
Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention:
|
|
15
|
+
+---------+---------+---------+------------+---------+
|
|
16
|
+
| INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
17
|
+
+---------+---------+---------+------------+---------+
|
|
18
|
+
| OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
19
|
+
+---------+---------+---------+------------+---------+
|
|
20
|
+
"""
|
|
21
|
+
this: exp.Expression = expression.this
|
|
22
|
+
|
|
23
|
+
self._set_type(
|
|
24
|
+
expression,
|
|
25
|
+
exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type,
|
|
26
|
+
)
|
|
27
|
+
return expression
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _annotate_safe_divide(self: TypeAnnotator, expression: exp.SafeDivide) -> exp.Expression:
|
|
31
|
+
"""
|
|
32
|
+
+------------+------------+------------+-------------+---------+
|
|
33
|
+
| INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
34
|
+
+------------+------------+------------+-------------+---------+
|
|
35
|
+
| INT64 | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
36
|
+
| NUMERIC | NUMERIC | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
37
|
+
| BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | FLOAT64 |
|
|
38
|
+
| FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 |
|
|
39
|
+
+------------+------------+------------+-------------+---------+
|
|
40
|
+
"""
|
|
41
|
+
if expression.this.is_type(*exp.DataType.INTEGER_TYPES) and expression.expression.is_type(
|
|
42
|
+
*exp.DataType.INTEGER_TYPES
|
|
43
|
+
):
|
|
44
|
+
return self._set_type(expression, exp.DataType.Type.DOUBLE)
|
|
45
|
+
|
|
46
|
+
return _annotate_by_args_with_coerce(self, expression)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _annotate_by_args_with_coerce(
|
|
50
|
+
self: TypeAnnotator, expression: exp.Expression
|
|
51
|
+
) -> exp.Expression:
|
|
52
|
+
"""
|
|
53
|
+
+------------+------------+------------+-------------+---------+
|
|
54
|
+
| INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
55
|
+
+------------+------------+------------+-------------+---------+
|
|
56
|
+
| INT64 | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
57
|
+
| NUMERIC | NUMERIC | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
58
|
+
| BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | FLOAT64 |
|
|
59
|
+
| FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 |
|
|
60
|
+
+------------+------------+------------+-------------+---------+
|
|
61
|
+
"""
|
|
62
|
+
self._set_type(expression, self._maybe_coerce(expression.this.type, expression.expression.type))
|
|
63
|
+
return expression
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
|
|
67
|
+
struct_type = exp.DataType(
|
|
68
|
+
this=exp.DataType.Type.STRUCT,
|
|
69
|
+
expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
|
|
70
|
+
nested=True,
|
|
71
|
+
)
|
|
72
|
+
self._set_type(
|
|
73
|
+
expression,
|
|
74
|
+
exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return expression
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat:
|
|
81
|
+
annotated = self._annotate_by_args(expression, "expressions")
|
|
82
|
+
|
|
83
|
+
# Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING
|
|
84
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat
|
|
85
|
+
if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN):
|
|
86
|
+
self._set_type(annotated, exp.DataType.Type.VARCHAR)
|
|
87
|
+
|
|
88
|
+
return annotated
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array:
|
|
92
|
+
array_args = expression.expressions
|
|
93
|
+
|
|
94
|
+
# BigQuery behaves as follows:
|
|
95
|
+
#
|
|
96
|
+
# SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT<STRING>
|
|
97
|
+
# SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING>
|
|
98
|
+
# ARRAY(SELECT ... UNION ALL SELECT ...) -- ARRAY<type from coerced projections>
|
|
99
|
+
if len(array_args) == 1:
|
|
100
|
+
unnested = array_args[0].unnest()
|
|
101
|
+
projection_type: t.Optional[exp.DataType | exp.DataType.Type] = None
|
|
102
|
+
|
|
103
|
+
# Handle ARRAY(SELECT ...) - single SELECT query
|
|
104
|
+
if isinstance(unnested, exp.Select):
|
|
105
|
+
if (
|
|
106
|
+
(query_type := unnested.meta.get("query_type")) is not None
|
|
107
|
+
and query_type.is_type(exp.DataType.Type.STRUCT)
|
|
108
|
+
and len(query_type.expressions) == 1
|
|
109
|
+
and isinstance(col_def := query_type.expressions[0], exp.ColumnDef)
|
|
110
|
+
and (col_type := col_def.kind) is not None
|
|
111
|
+
and not col_type.is_type(exp.DataType.Type.UNKNOWN)
|
|
112
|
+
):
|
|
113
|
+
projection_type = col_type
|
|
114
|
+
|
|
115
|
+
# Handle ARRAY(SELECT ... UNION ALL SELECT ...) - set operations
|
|
116
|
+
elif isinstance(unnested, exp.SetOperation):
|
|
117
|
+
# Get all column types for the SetOperation
|
|
118
|
+
col_types = self._get_setop_column_types(unnested)
|
|
119
|
+
# For ARRAY constructor, there should only be one projection
|
|
120
|
+
# https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/array_functions#array
|
|
121
|
+
if col_types and unnested.left.selects:
|
|
122
|
+
first_col_name = unnested.left.selects[0].alias_or_name
|
|
123
|
+
projection_type = col_types.get(first_col_name)
|
|
124
|
+
|
|
125
|
+
# If we successfully determine a projection type and it's not UNKNOWN, wrap it in ARRAY
|
|
126
|
+
if projection_type and not (
|
|
127
|
+
(
|
|
128
|
+
isinstance(projection_type, exp.DataType)
|
|
129
|
+
and projection_type.is_type(exp.DataType.Type.UNKNOWN)
|
|
130
|
+
)
|
|
131
|
+
or projection_type == exp.DataType.Type.UNKNOWN
|
|
132
|
+
):
|
|
133
|
+
element_type = (
|
|
134
|
+
projection_type.copy()
|
|
135
|
+
if isinstance(projection_type, exp.DataType)
|
|
136
|
+
else exp.DataType(this=projection_type)
|
|
137
|
+
)
|
|
138
|
+
array_type = exp.DataType(
|
|
139
|
+
this=exp.DataType.Type.ARRAY,
|
|
140
|
+
expressions=[element_type],
|
|
141
|
+
nested=True,
|
|
142
|
+
)
|
|
143
|
+
return self._set_type(expression, array_type)
|
|
144
|
+
|
|
145
|
+
return self._annotate_by_args(expression, "expressions", array=True)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
EXPRESSION_METADATA = {
|
|
149
|
+
**EXPRESSION_METADATA,
|
|
150
|
+
**{
|
|
151
|
+
expr_type: {"annotator": lambda self, e: _annotate_math_functions(self, e)}
|
|
152
|
+
for expr_type in {
|
|
153
|
+
exp.Avg,
|
|
154
|
+
exp.Ceil,
|
|
155
|
+
exp.Exp,
|
|
156
|
+
exp.Floor,
|
|
157
|
+
exp.Ln,
|
|
158
|
+
exp.Log,
|
|
159
|
+
exp.Round,
|
|
160
|
+
exp.Sqrt,
|
|
161
|
+
}
|
|
162
|
+
},
|
|
163
|
+
**{
|
|
164
|
+
expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")}
|
|
165
|
+
for expr_type in {
|
|
166
|
+
exp.Abs,
|
|
167
|
+
exp.ArgMax,
|
|
168
|
+
exp.ArgMin,
|
|
169
|
+
exp.DateTrunc,
|
|
170
|
+
exp.DatetimeTrunc,
|
|
171
|
+
exp.FirstValue,
|
|
172
|
+
exp.GroupConcat,
|
|
173
|
+
exp.IgnoreNulls,
|
|
174
|
+
exp.JSONExtract,
|
|
175
|
+
exp.Lead,
|
|
176
|
+
exp.Left,
|
|
177
|
+
exp.Lower,
|
|
178
|
+
exp.NthValue,
|
|
179
|
+
exp.Pad,
|
|
180
|
+
exp.PercentileDisc,
|
|
181
|
+
exp.RegexpExtract,
|
|
182
|
+
exp.RegexpReplace,
|
|
183
|
+
exp.Repeat,
|
|
184
|
+
exp.Replace,
|
|
185
|
+
exp.RespectNulls,
|
|
186
|
+
exp.Reverse,
|
|
187
|
+
exp.Right,
|
|
188
|
+
exp.SafeNegate,
|
|
189
|
+
exp.Sign,
|
|
190
|
+
exp.Substring,
|
|
191
|
+
exp.TimestampTrunc,
|
|
192
|
+
exp.Translate,
|
|
193
|
+
exp.Trim,
|
|
194
|
+
exp.Upper,
|
|
195
|
+
}
|
|
196
|
+
},
|
|
197
|
+
**{
|
|
198
|
+
expr_type: {"returns": exp.DataType.Type.BIGINT}
|
|
199
|
+
for expr_type in {
|
|
200
|
+
exp.Ascii,
|
|
201
|
+
exp.BitwiseAndAgg,
|
|
202
|
+
exp.BitwiseCount,
|
|
203
|
+
exp.BitwiseOrAgg,
|
|
204
|
+
exp.BitwiseXorAgg,
|
|
205
|
+
exp.ByteLength,
|
|
206
|
+
exp.DenseRank,
|
|
207
|
+
exp.FarmFingerprint,
|
|
208
|
+
exp.Grouping,
|
|
209
|
+
exp.LaxInt64,
|
|
210
|
+
exp.Length,
|
|
211
|
+
exp.Ntile,
|
|
212
|
+
exp.Rank,
|
|
213
|
+
exp.RangeBucket,
|
|
214
|
+
exp.RegexpInstr,
|
|
215
|
+
exp.RowNumber,
|
|
216
|
+
exp.Unicode,
|
|
217
|
+
}
|
|
218
|
+
},
|
|
219
|
+
**{
|
|
220
|
+
expr_type: {"returns": exp.DataType.Type.BINARY}
|
|
221
|
+
for expr_type in {
|
|
222
|
+
exp.ByteString,
|
|
223
|
+
exp.CodePointsToBytes,
|
|
224
|
+
exp.MD5Digest,
|
|
225
|
+
exp.SHA,
|
|
226
|
+
exp.SHA2,
|
|
227
|
+
exp.SHA1Digest,
|
|
228
|
+
exp.SHA2Digest,
|
|
229
|
+
exp.Unhex,
|
|
230
|
+
}
|
|
231
|
+
},
|
|
232
|
+
**{
|
|
233
|
+
expr_type: {"returns": exp.DataType.Type.BOOLEAN}
|
|
234
|
+
for expr_type in {
|
|
235
|
+
exp.IsInf,
|
|
236
|
+
exp.IsNan,
|
|
237
|
+
exp.JSONBool,
|
|
238
|
+
exp.LaxBool,
|
|
239
|
+
}
|
|
240
|
+
},
|
|
241
|
+
**{
|
|
242
|
+
expr_type: {"returns": exp.DataType.Type.DATETIME}
|
|
243
|
+
for expr_type in {
|
|
244
|
+
exp.ParseDatetime,
|
|
245
|
+
exp.TimestampFromParts,
|
|
246
|
+
}
|
|
247
|
+
},
|
|
248
|
+
**{
|
|
249
|
+
expr_type: {"returns": exp.DataType.Type.DOUBLE}
|
|
250
|
+
for expr_type in {
|
|
251
|
+
exp.Acos,
|
|
252
|
+
exp.Acosh,
|
|
253
|
+
exp.Asin,
|
|
254
|
+
exp.Asinh,
|
|
255
|
+
exp.Atan,
|
|
256
|
+
exp.Atan2,
|
|
257
|
+
exp.Atanh,
|
|
258
|
+
exp.Cbrt,
|
|
259
|
+
exp.Corr,
|
|
260
|
+
exp.CosineDistance,
|
|
261
|
+
exp.Cot,
|
|
262
|
+
exp.Coth,
|
|
263
|
+
exp.CovarPop,
|
|
264
|
+
exp.CovarSamp,
|
|
265
|
+
exp.Csc,
|
|
266
|
+
exp.Csch,
|
|
267
|
+
exp.CumeDist,
|
|
268
|
+
exp.EuclideanDistance,
|
|
269
|
+
exp.Float64,
|
|
270
|
+
exp.LaxFloat64,
|
|
271
|
+
exp.PercentRank,
|
|
272
|
+
exp.Rand,
|
|
273
|
+
exp.Sec,
|
|
274
|
+
exp.Sech,
|
|
275
|
+
exp.Sin,
|
|
276
|
+
exp.Sinh,
|
|
277
|
+
}
|
|
278
|
+
},
|
|
279
|
+
**{
|
|
280
|
+
expr_type: {"returns": exp.DataType.Type.JSON}
|
|
281
|
+
for expr_type in {
|
|
282
|
+
exp.JSONArray,
|
|
283
|
+
exp.JSONArrayAppend,
|
|
284
|
+
exp.JSONArrayInsert,
|
|
285
|
+
exp.JSONObject,
|
|
286
|
+
exp.JSONRemove,
|
|
287
|
+
exp.JSONSet,
|
|
288
|
+
exp.JSONStripNulls,
|
|
289
|
+
}
|
|
290
|
+
},
|
|
291
|
+
**{
|
|
292
|
+
expr_type: {"returns": exp.DataType.Type.TIME}
|
|
293
|
+
for expr_type in {
|
|
294
|
+
exp.ParseTime,
|
|
295
|
+
exp.TimeFromParts,
|
|
296
|
+
exp.TimeTrunc,
|
|
297
|
+
exp.TsOrDsToTime,
|
|
298
|
+
}
|
|
299
|
+
},
|
|
300
|
+
**{
|
|
301
|
+
expr_type: {"returns": exp.DataType.Type.VARCHAR}
|
|
302
|
+
for expr_type in {
|
|
303
|
+
exp.CodePointsToString,
|
|
304
|
+
exp.Format,
|
|
305
|
+
exp.JSONExtractScalar,
|
|
306
|
+
exp.JSONType,
|
|
307
|
+
exp.LaxString,
|
|
308
|
+
exp.LowerHex,
|
|
309
|
+
exp.MD5,
|
|
310
|
+
exp.NetHost,
|
|
311
|
+
exp.Normalize,
|
|
312
|
+
exp.SafeConvertBytesToString,
|
|
313
|
+
exp.Soundex,
|
|
314
|
+
exp.Uuid,
|
|
315
|
+
}
|
|
316
|
+
},
|
|
317
|
+
**{
|
|
318
|
+
expr_type: {"annotator": lambda self, e: _annotate_by_args_with_coerce(self, e)}
|
|
319
|
+
for expr_type in {
|
|
320
|
+
exp.PercentileCont,
|
|
321
|
+
exp.SafeAdd,
|
|
322
|
+
exp.SafeDivide,
|
|
323
|
+
exp.SafeMultiply,
|
|
324
|
+
exp.SafeSubtract,
|
|
325
|
+
}
|
|
326
|
+
},
|
|
327
|
+
**{
|
|
328
|
+
expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this", array=True)}
|
|
329
|
+
for expr_type in {
|
|
330
|
+
exp.ApproxQuantiles,
|
|
331
|
+
exp.JSONExtractArray,
|
|
332
|
+
exp.RegexpExtractAll,
|
|
333
|
+
exp.Split,
|
|
334
|
+
}
|
|
335
|
+
},
|
|
336
|
+
**{
|
|
337
|
+
expr_type: {"returns": exp.DataType.Type.TIMESTAMPTZ} for expr_type in TIMESTAMP_EXPRESSIONS
|
|
338
|
+
},
|
|
339
|
+
exp.ApproxTopK: {"annotator": lambda self, e: _annotate_by_args_approx_top(self, e)},
|
|
340
|
+
exp.ApproxTopSum: {"annotator": lambda self, e: _annotate_by_args_approx_top(self, e)},
|
|
341
|
+
exp.Array: {"annotator": _annotate_array},
|
|
342
|
+
exp.ArrayConcat: {
|
|
343
|
+
"annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions")
|
|
344
|
+
},
|
|
345
|
+
exp.Concat: {"annotator": _annotate_concat},
|
|
346
|
+
exp.DateFromUnixDate: {"returns": exp.DataType.Type.DATE},
|
|
347
|
+
exp.GenerateTimestampArray: {
|
|
348
|
+
"annotator": lambda self, e: self._set_type(
|
|
349
|
+
e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
|
|
350
|
+
)
|
|
351
|
+
},
|
|
352
|
+
exp.JSONFormat: {
|
|
353
|
+
"annotator": lambda self, e: self._set_type(
|
|
354
|
+
e, exp.DataType.Type.JSON if e.args.get("to_json") else exp.DataType.Type.VARCHAR
|
|
355
|
+
)
|
|
356
|
+
},
|
|
357
|
+
exp.JSONKeysAtDepth: {
|
|
358
|
+
"annotator": lambda self, e: self._set_type(
|
|
359
|
+
e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
|
|
360
|
+
)
|
|
361
|
+
},
|
|
362
|
+
exp.JSONValueArray: {
|
|
363
|
+
"annotator": lambda self, e: self._set_type(
|
|
364
|
+
e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
|
|
365
|
+
)
|
|
366
|
+
},
|
|
367
|
+
exp.Lag: {"annotator": lambda self, e: self._annotate_by_args(e, "this", "default")},
|
|
368
|
+
exp.ParseBignumeric: {"returns": exp.DataType.Type.BIGDECIMAL},
|
|
369
|
+
exp.ParseNumeric: {"returns": exp.DataType.Type.DECIMAL},
|
|
370
|
+
exp.SafeDivide: {"annotator": lambda self, e: _annotate_safe_divide(self, e)},
|
|
371
|
+
exp.ToCodePoints: {
|
|
372
|
+
"annotator": lambda self, e: self._set_type(
|
|
373
|
+
e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
|
|
374
|
+
)
|
|
375
|
+
},
|
|
376
|
+
}
|
sqlglot/typing/hive.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from sqlglot import exp
|
|
4
|
+
from sqlglot.typing import EXPRESSION_METADATA
|
|
5
|
+
|
|
6
|
+
EXPRESSION_METADATA = {
|
|
7
|
+
**EXPRESSION_METADATA,
|
|
8
|
+
exp.If: {"annotator": lambda self, e: self._annotate_by_args(e, "true", "false", promote=True)},
|
|
9
|
+
exp.Coalesce: {
|
|
10
|
+
"annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True)
|
|
11
|
+
},
|
|
12
|
+
}
|