sqlglot 27.29.0__py3-none-any.whl → 28.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. sqlglot/__main__.py +6 -4
  2. sqlglot/_version.py +2 -2
  3. sqlglot/dialects/bigquery.py +116 -295
  4. sqlglot/dialects/clickhouse.py +67 -2
  5. sqlglot/dialects/databricks.py +38 -1
  6. sqlglot/dialects/dialect.py +327 -286
  7. sqlglot/dialects/dremio.py +4 -1
  8. sqlglot/dialects/duckdb.py +718 -22
  9. sqlglot/dialects/exasol.py +243 -10
  10. sqlglot/dialects/hive.py +8 -8
  11. sqlglot/dialects/mysql.py +11 -2
  12. sqlglot/dialects/oracle.py +29 -0
  13. sqlglot/dialects/postgres.py +46 -24
  14. sqlglot/dialects/presto.py +47 -16
  15. sqlglot/dialects/redshift.py +16 -0
  16. sqlglot/dialects/risingwave.py +3 -0
  17. sqlglot/dialects/singlestore.py +12 -3
  18. sqlglot/dialects/snowflake.py +199 -271
  19. sqlglot/dialects/spark.py +2 -2
  20. sqlglot/dialects/spark2.py +11 -48
  21. sqlglot/dialects/sqlite.py +9 -0
  22. sqlglot/dialects/teradata.py +5 -8
  23. sqlglot/dialects/trino.py +6 -0
  24. sqlglot/dialects/tsql.py +61 -25
  25. sqlglot/diff.py +4 -2
  26. sqlglot/errors.py +69 -0
  27. sqlglot/expressions.py +484 -84
  28. sqlglot/generator.py +143 -41
  29. sqlglot/helper.py +2 -2
  30. sqlglot/optimizer/annotate_types.py +247 -140
  31. sqlglot/optimizer/canonicalize.py +6 -1
  32. sqlglot/optimizer/eliminate_joins.py +1 -1
  33. sqlglot/optimizer/eliminate_subqueries.py +2 -2
  34. sqlglot/optimizer/merge_subqueries.py +5 -5
  35. sqlglot/optimizer/normalize.py +20 -13
  36. sqlglot/optimizer/normalize_identifiers.py +17 -3
  37. sqlglot/optimizer/optimizer.py +4 -0
  38. sqlglot/optimizer/pushdown_predicates.py +1 -1
  39. sqlglot/optimizer/qualify.py +14 -6
  40. sqlglot/optimizer/qualify_columns.py +113 -352
  41. sqlglot/optimizer/qualify_tables.py +112 -70
  42. sqlglot/optimizer/resolver.py +374 -0
  43. sqlglot/optimizer/scope.py +27 -16
  44. sqlglot/optimizer/simplify.py +1074 -964
  45. sqlglot/optimizer/unnest_subqueries.py +12 -2
  46. sqlglot/parser.py +276 -160
  47. sqlglot/planner.py +2 -2
  48. sqlglot/schema.py +15 -4
  49. sqlglot/tokens.py +42 -7
  50. sqlglot/transforms.py +77 -22
  51. sqlglot/typing/__init__.py +316 -0
  52. sqlglot/typing/bigquery.py +376 -0
  53. sqlglot/typing/hive.py +12 -0
  54. sqlglot/typing/presto.py +24 -0
  55. sqlglot/typing/snowflake.py +505 -0
  56. sqlglot/typing/spark2.py +58 -0
  57. sqlglot/typing/tsql.py +9 -0
  58. {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/METADATA +2 -2
  59. sqlglot-28.4.1.dist-info/RECORD +92 -0
  60. sqlglot-27.29.0.dist-info/RECORD +0 -84
  61. {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/WHEEL +0 -0
  62. {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/licenses/LICENSE +0 -0
  63. {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,316 @@
1
+ import typing as t
2
+
3
+ from sqlglot import exp
4
+ from sqlglot.helper import subclasses
5
+
6
+ ExpressionMetadataType = t.Dict[type[exp.Expression], t.Dict[str, t.Any]]
7
+
8
+ TIMESTAMP_EXPRESSIONS = {
9
+ exp.CurrentTimestamp,
10
+ exp.StrToTime,
11
+ exp.TimeStrToTime,
12
+ exp.TimestampAdd,
13
+ exp.TimestampSub,
14
+ exp.UnixToTime,
15
+ }
16
+
17
+ EXPRESSION_METADATA: ExpressionMetadataType = {
18
+ **{
19
+ expr_type: {"annotator": lambda self, e: self._annotate_binary(e)}
20
+ for expr_type in subclasses(exp.__name__, exp.Binary)
21
+ },
22
+ **{
23
+ expr_type: {"annotator": lambda self, e: self._annotate_unary(e)}
24
+ for expr_type in subclasses(exp.__name__, (exp.Unary, exp.Alias))
25
+ },
26
+ **{
27
+ expr_type: {"returns": exp.DataType.Type.BIGINT}
28
+ for expr_type in {
29
+ exp.ApproxDistinct,
30
+ exp.ArraySize,
31
+ exp.CountIf,
32
+ exp.Int64,
33
+ exp.Length,
34
+ exp.UnixDate,
35
+ exp.UnixSeconds,
36
+ exp.UnixMicros,
37
+ exp.UnixMillis,
38
+ }
39
+ },
40
+ **{
41
+ expr_type: {"returns": exp.DataType.Type.BINARY}
42
+ for expr_type in {
43
+ exp.FromBase32,
44
+ exp.FromBase64,
45
+ }
46
+ },
47
+ **{
48
+ expr_type: {"returns": exp.DataType.Type.BOOLEAN}
49
+ for expr_type in {
50
+ exp.Between,
51
+ exp.Boolean,
52
+ exp.Contains,
53
+ exp.EndsWith,
54
+ exp.In,
55
+ exp.LogicalAnd,
56
+ exp.LogicalOr,
57
+ exp.RegexpLike,
58
+ exp.StartsWith,
59
+ }
60
+ },
61
+ **{
62
+ expr_type: {"returns": exp.DataType.Type.DATE}
63
+ for expr_type in {
64
+ exp.CurrentDate,
65
+ exp.Date,
66
+ exp.DateFromParts,
67
+ exp.DateStrToDate,
68
+ exp.DiToDate,
69
+ exp.LastDay,
70
+ exp.StrToDate,
71
+ exp.TimeStrToDate,
72
+ exp.TsOrDsToDate,
73
+ }
74
+ },
75
+ **{
76
+ expr_type: {"returns": exp.DataType.Type.DATETIME}
77
+ for expr_type in {
78
+ exp.CurrentDatetime,
79
+ exp.Datetime,
80
+ exp.DatetimeAdd,
81
+ exp.DatetimeSub,
82
+ }
83
+ },
84
+ **{
85
+ expr_type: {"returns": exp.DataType.Type.DOUBLE}
86
+ for expr_type in {
87
+ exp.ApproxQuantile,
88
+ exp.Avg,
89
+ exp.Exp,
90
+ exp.Ln,
91
+ exp.Log,
92
+ exp.Pi,
93
+ exp.Pow,
94
+ exp.Quantile,
95
+ exp.Radians,
96
+ exp.Round,
97
+ exp.SafeDivide,
98
+ exp.Sqrt,
99
+ exp.Stddev,
100
+ exp.StddevPop,
101
+ exp.StddevSamp,
102
+ exp.ToDouble,
103
+ exp.Variance,
104
+ exp.VariancePop,
105
+ exp.Skewness,
106
+ }
107
+ },
108
+ **{
109
+ expr_type: {"returns": exp.DataType.Type.INT}
110
+ for expr_type in {
111
+ exp.Ascii,
112
+ exp.Ceil,
113
+ exp.DatetimeDiff,
114
+ exp.TimestampDiff,
115
+ exp.TimeDiff,
116
+ exp.Unicode,
117
+ exp.DateToDi,
118
+ exp.Levenshtein,
119
+ exp.Sign,
120
+ exp.StrPosition,
121
+ exp.TsOrDiToDi,
122
+ }
123
+ },
124
+ **{
125
+ expr_type: {"returns": exp.DataType.Type.INTERVAL}
126
+ for expr_type in {
127
+ exp.Interval,
128
+ exp.JustifyDays,
129
+ exp.JustifyHours,
130
+ exp.JustifyInterval,
131
+ exp.MakeInterval,
132
+ }
133
+ },
134
+ **{
135
+ expr_type: {"returns": exp.DataType.Type.JSON}
136
+ for expr_type in {
137
+ exp.ParseJSON,
138
+ }
139
+ },
140
+ **{
141
+ expr_type: {"returns": exp.DataType.Type.TIME}
142
+ for expr_type in {
143
+ exp.CurrentTime,
144
+ exp.Time,
145
+ exp.TimeAdd,
146
+ exp.TimeSub,
147
+ }
148
+ },
149
+ **{
150
+ expr_type: {"returns": exp.DataType.Type.TIMESTAMPLTZ}
151
+ for expr_type in {
152
+ exp.TimestampLtzFromParts,
153
+ }
154
+ },
155
+ **{
156
+ expr_type: {"returns": exp.DataType.Type.TIMESTAMPTZ}
157
+ for expr_type in {
158
+ exp.CurrentTimestampLTZ,
159
+ exp.TimestampTzFromParts,
160
+ }
161
+ },
162
+ **{expr_type: {"returns": exp.DataType.Type.TIMESTAMP} for expr_type in TIMESTAMP_EXPRESSIONS},
163
+ **{
164
+ expr_type: {"returns": exp.DataType.Type.TINYINT}
165
+ for expr_type in {
166
+ exp.Day,
167
+ exp.DayOfMonth,
168
+ exp.DayOfWeek,
169
+ exp.DayOfWeekIso,
170
+ exp.DayOfYear,
171
+ exp.Month,
172
+ exp.Quarter,
173
+ exp.Week,
174
+ exp.WeekOfYear,
175
+ exp.Year,
176
+ exp.YearOfWeek,
177
+ exp.YearOfWeekIso,
178
+ }
179
+ },
180
+ **{
181
+ expr_type: {"returns": exp.DataType.Type.VARCHAR}
182
+ for expr_type in {
183
+ exp.ArrayToString,
184
+ exp.Concat,
185
+ exp.ConcatWs,
186
+ exp.Chr,
187
+ exp.DateToDateStr,
188
+ exp.DPipe,
189
+ exp.GroupConcat,
190
+ exp.Initcap,
191
+ exp.Lower,
192
+ exp.Substring,
193
+ exp.String,
194
+ exp.TimeToStr,
195
+ exp.TimeToTimeStr,
196
+ exp.Trim,
197
+ exp.ToBase32,
198
+ exp.ToBase64,
199
+ exp.TsOrDsToDateStr,
200
+ exp.UnixToStr,
201
+ exp.UnixToTimeStr,
202
+ exp.Upper,
203
+ }
204
+ },
205
+ **{
206
+ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")}
207
+ for expr_type in {
208
+ exp.Abs,
209
+ exp.AnyValue,
210
+ exp.ArrayConcatAgg,
211
+ exp.ArrayReverse,
212
+ exp.ArraySlice,
213
+ exp.Filter,
214
+ exp.HavingMax,
215
+ exp.LastValue,
216
+ exp.Limit,
217
+ exp.Order,
218
+ exp.SortArray,
219
+ exp.Window,
220
+ }
221
+ },
222
+ **{
223
+ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions")}
224
+ for expr_type in {
225
+ exp.ArrayConcat,
226
+ exp.Coalesce,
227
+ exp.Greatest,
228
+ exp.Least,
229
+ exp.Max,
230
+ exp.Min,
231
+ }
232
+ },
233
+ **{
234
+ expr_type: {"annotator": lambda self, e: self._annotate_by_array_element(e)}
235
+ for expr_type in {
236
+ exp.ArrayFirst,
237
+ exp.ArrayLast,
238
+ }
239
+ },
240
+ **{
241
+ expr_type: {"returns": exp.DataType.Type.UNKNOWN}
242
+ for expr_type in {
243
+ exp.Anonymous,
244
+ exp.Slice,
245
+ }
246
+ },
247
+ **{
248
+ expr_type: {"annotator": lambda self, e: self._annotate_timeunit(e)}
249
+ for expr_type in {
250
+ exp.DateAdd,
251
+ exp.DateSub,
252
+ exp.DateTrunc,
253
+ }
254
+ },
255
+ **{
256
+ expr_type: {"annotator": lambda self, e: self._set_type(e, e.args["to"])}
257
+ for expr_type in {
258
+ exp.Cast,
259
+ exp.TryCast,
260
+ }
261
+ },
262
+ **{
263
+ expr_type: {"annotator": lambda self, e: self._annotate_map(e)}
264
+ for expr_type in {
265
+ exp.Map,
266
+ exp.VarMap,
267
+ }
268
+ },
269
+ exp.Array: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions", array=True)},
270
+ exp.ArrayAgg: {"annotator": lambda self, e: self._annotate_by_args(e, "this", array=True)},
271
+ exp.Bracket: {"annotator": lambda self, e: self._annotate_bracket(e)},
272
+ exp.Case: {"annotator": lambda self, e: self._annotate_by_args(e, "default", "ifs")},
273
+ exp.Count: {
274
+ "annotator": lambda self, e: self._set_type(
275
+ e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT
276
+ )
277
+ },
278
+ exp.DateDiff: {
279
+ "annotator": lambda self, e: self._set_type(
280
+ e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT
281
+ )
282
+ },
283
+ exp.DataType: {"annotator": lambda self, e: self._set_type(e, e.copy())},
284
+ exp.Div: {"annotator": lambda self, e: self._annotate_div(e)},
285
+ exp.Distinct: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions")},
286
+ exp.Dot: {"annotator": lambda self, e: self._annotate_dot(e)},
287
+ exp.Explode: {"annotator": lambda self, e: self._annotate_explode(e)},
288
+ exp.Extract: {"annotator": lambda self, e: self._annotate_extract(e)},
289
+ exp.GenerateSeries: {
290
+ "annotator": lambda self, e: self._annotate_by_args(e, "start", "end", "step", array=True)
291
+ },
292
+ exp.GenerateDateArray: {
293
+ "annotator": lambda self, e: self._set_type(e, exp.DataType.build("ARRAY<DATE>"))
294
+ },
295
+ exp.GenerateTimestampArray: {
296
+ "annotator": lambda self, e: self._set_type(e, exp.DataType.build("ARRAY<TIMESTAMP>"))
297
+ },
298
+ exp.If: {"annotator": lambda self, e: self._annotate_by_args(e, "true", "false")},
299
+ exp.Literal: {"annotator": lambda self, e: self._annotate_literal(e)},
300
+ exp.Null: {"returns": exp.DataType.Type.NULL},
301
+ exp.Nullif: {"annotator": lambda self, e: self._annotate_by_args(e, "this", "expression")},
302
+ exp.PropertyEQ: {"annotator": lambda self, e: self._annotate_by_args(e, "expression")},
303
+ exp.Struct: {"annotator": lambda self, e: self._annotate_struct(e)},
304
+ exp.Sum: {
305
+ "annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True)
306
+ },
307
+ exp.Timestamp: {
308
+ "annotator": lambda self, e: self._set_type(
309
+ e,
310
+ exp.DataType.Type.TIMESTAMPTZ if e.args.get("with_tz") else exp.DataType.Type.TIMESTAMP,
311
+ )
312
+ },
313
+ exp.ToMap: {"annotator": lambda self, e: self._annotate_to_map(e)},
314
+ exp.Unnest: {"annotator": lambda self, e: self._annotate_unnest(e)},
315
+ exp.Subquery: {"annotator": lambda self, e: self._annotate_subquery(e)},
316
+ }
@@ -0,0 +1,376 @@
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+
5
+ from sqlglot import exp
6
+ from sqlglot.typing import EXPRESSION_METADATA, TIMESTAMP_EXPRESSIONS
7
+
8
+ if t.TYPE_CHECKING:
9
+ from sqlglot.optimizer.annotate_types import TypeAnnotator
10
+
11
+
12
+ def _annotate_math_functions(self: TypeAnnotator, expression: exp.Expression) -> exp.Expression:
13
+ """
14
+ Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention:
15
+ +---------+---------+---------+------------+---------+
16
+ | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
17
+ +---------+---------+---------+------------+---------+
18
+ | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
19
+ +---------+---------+---------+------------+---------+
20
+ """
21
+ this: exp.Expression = expression.this
22
+
23
+ self._set_type(
24
+ expression,
25
+ exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type,
26
+ )
27
+ return expression
28
+
29
+
30
+ def _annotate_safe_divide(self: TypeAnnotator, expression: exp.SafeDivide) -> exp.Expression:
31
+ """
32
+ +------------+------------+------------+-------------+---------+
33
+ | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
34
+ +------------+------------+------------+-------------+---------+
35
+ | INT64 | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
36
+ | NUMERIC | NUMERIC | NUMERIC | BIGNUMERIC | FLOAT64 |
37
+ | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | FLOAT64 |
38
+ | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 |
39
+ +------------+------------+------------+-------------+---------+
40
+ """
41
+ if expression.this.is_type(*exp.DataType.INTEGER_TYPES) and expression.expression.is_type(
42
+ *exp.DataType.INTEGER_TYPES
43
+ ):
44
+ return self._set_type(expression, exp.DataType.Type.DOUBLE)
45
+
46
+ return _annotate_by_args_with_coerce(self, expression)
47
+
48
+
49
+ def _annotate_by_args_with_coerce(
50
+ self: TypeAnnotator, expression: exp.Expression
51
+ ) -> exp.Expression:
52
+ """
53
+ +------------+------------+------------+-------------+---------+
54
+ | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
55
+ +------------+------------+------------+-------------+---------+
56
+ | INT64 | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
57
+ | NUMERIC | NUMERIC | NUMERIC | BIGNUMERIC | FLOAT64 |
58
+ | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | FLOAT64 |
59
+ | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 |
60
+ +------------+------------+------------+-------------+---------+
61
+ """
62
+ self._set_type(expression, self._maybe_coerce(expression.this.type, expression.expression.type))
63
+ return expression
64
+
65
+
66
+ def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
67
+ struct_type = exp.DataType(
68
+ this=exp.DataType.Type.STRUCT,
69
+ expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
70
+ nested=True,
71
+ )
72
+ self._set_type(
73
+ expression,
74
+ exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
75
+ )
76
+
77
+ return expression
78
+
79
+
80
+ def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat:
81
+ annotated = self._annotate_by_args(expression, "expressions")
82
+
83
+ # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING
84
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat
85
+ if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN):
86
+ self._set_type(annotated, exp.DataType.Type.VARCHAR)
87
+
88
+ return annotated
89
+
90
+
91
+ def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array:
92
+ array_args = expression.expressions
93
+
94
+ # BigQuery behaves as follows:
95
+ #
96
+ # SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT<STRING>
97
+ # SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING>
98
+ # ARRAY(SELECT ... UNION ALL SELECT ...) -- ARRAY<type from coerced projections>
99
+ if len(array_args) == 1:
100
+ unnested = array_args[0].unnest()
101
+ projection_type: t.Optional[exp.DataType | exp.DataType.Type] = None
102
+
103
+ # Handle ARRAY(SELECT ...) - single SELECT query
104
+ if isinstance(unnested, exp.Select):
105
+ if (
106
+ (query_type := unnested.meta.get("query_type")) is not None
107
+ and query_type.is_type(exp.DataType.Type.STRUCT)
108
+ and len(query_type.expressions) == 1
109
+ and isinstance(col_def := query_type.expressions[0], exp.ColumnDef)
110
+ and (col_type := col_def.kind) is not None
111
+ and not col_type.is_type(exp.DataType.Type.UNKNOWN)
112
+ ):
113
+ projection_type = col_type
114
+
115
+ # Handle ARRAY(SELECT ... UNION ALL SELECT ...) - set operations
116
+ elif isinstance(unnested, exp.SetOperation):
117
+ # Get all column types for the SetOperation
118
+ col_types = self._get_setop_column_types(unnested)
119
+ # For ARRAY constructor, there should only be one projection
120
+ # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/array_functions#array
121
+ if col_types and unnested.left.selects:
122
+ first_col_name = unnested.left.selects[0].alias_or_name
123
+ projection_type = col_types.get(first_col_name)
124
+
125
+ # If we successfully determine a projection type and it's not UNKNOWN, wrap it in ARRAY
126
+ if projection_type and not (
127
+ (
128
+ isinstance(projection_type, exp.DataType)
129
+ and projection_type.is_type(exp.DataType.Type.UNKNOWN)
130
+ )
131
+ or projection_type == exp.DataType.Type.UNKNOWN
132
+ ):
133
+ element_type = (
134
+ projection_type.copy()
135
+ if isinstance(projection_type, exp.DataType)
136
+ else exp.DataType(this=projection_type)
137
+ )
138
+ array_type = exp.DataType(
139
+ this=exp.DataType.Type.ARRAY,
140
+ expressions=[element_type],
141
+ nested=True,
142
+ )
143
+ return self._set_type(expression, array_type)
144
+
145
+ return self._annotate_by_args(expression, "expressions", array=True)
146
+
147
+
148
+ EXPRESSION_METADATA = {
149
+ **EXPRESSION_METADATA,
150
+ **{
151
+ expr_type: {"annotator": lambda self, e: _annotate_math_functions(self, e)}
152
+ for expr_type in {
153
+ exp.Avg,
154
+ exp.Ceil,
155
+ exp.Exp,
156
+ exp.Floor,
157
+ exp.Ln,
158
+ exp.Log,
159
+ exp.Round,
160
+ exp.Sqrt,
161
+ }
162
+ },
163
+ **{
164
+ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")}
165
+ for expr_type in {
166
+ exp.Abs,
167
+ exp.ArgMax,
168
+ exp.ArgMin,
169
+ exp.DateTrunc,
170
+ exp.DatetimeTrunc,
171
+ exp.FirstValue,
172
+ exp.GroupConcat,
173
+ exp.IgnoreNulls,
174
+ exp.JSONExtract,
175
+ exp.Lead,
176
+ exp.Left,
177
+ exp.Lower,
178
+ exp.NthValue,
179
+ exp.Pad,
180
+ exp.PercentileDisc,
181
+ exp.RegexpExtract,
182
+ exp.RegexpReplace,
183
+ exp.Repeat,
184
+ exp.Replace,
185
+ exp.RespectNulls,
186
+ exp.Reverse,
187
+ exp.Right,
188
+ exp.SafeNegate,
189
+ exp.Sign,
190
+ exp.Substring,
191
+ exp.TimestampTrunc,
192
+ exp.Translate,
193
+ exp.Trim,
194
+ exp.Upper,
195
+ }
196
+ },
197
+ **{
198
+ expr_type: {"returns": exp.DataType.Type.BIGINT}
199
+ for expr_type in {
200
+ exp.Ascii,
201
+ exp.BitwiseAndAgg,
202
+ exp.BitwiseCount,
203
+ exp.BitwiseOrAgg,
204
+ exp.BitwiseXorAgg,
205
+ exp.ByteLength,
206
+ exp.DenseRank,
207
+ exp.FarmFingerprint,
208
+ exp.Grouping,
209
+ exp.LaxInt64,
210
+ exp.Length,
211
+ exp.Ntile,
212
+ exp.Rank,
213
+ exp.RangeBucket,
214
+ exp.RegexpInstr,
215
+ exp.RowNumber,
216
+ exp.Unicode,
217
+ }
218
+ },
219
+ **{
220
+ expr_type: {"returns": exp.DataType.Type.BINARY}
221
+ for expr_type in {
222
+ exp.ByteString,
223
+ exp.CodePointsToBytes,
224
+ exp.MD5Digest,
225
+ exp.SHA,
226
+ exp.SHA2,
227
+ exp.SHA1Digest,
228
+ exp.SHA2Digest,
229
+ exp.Unhex,
230
+ }
231
+ },
232
+ **{
233
+ expr_type: {"returns": exp.DataType.Type.BOOLEAN}
234
+ for expr_type in {
235
+ exp.IsInf,
236
+ exp.IsNan,
237
+ exp.JSONBool,
238
+ exp.LaxBool,
239
+ }
240
+ },
241
+ **{
242
+ expr_type: {"returns": exp.DataType.Type.DATETIME}
243
+ for expr_type in {
244
+ exp.ParseDatetime,
245
+ exp.TimestampFromParts,
246
+ }
247
+ },
248
+ **{
249
+ expr_type: {"returns": exp.DataType.Type.DOUBLE}
250
+ for expr_type in {
251
+ exp.Acos,
252
+ exp.Acosh,
253
+ exp.Asin,
254
+ exp.Asinh,
255
+ exp.Atan,
256
+ exp.Atan2,
257
+ exp.Atanh,
258
+ exp.Cbrt,
259
+ exp.Corr,
260
+ exp.CosineDistance,
261
+ exp.Cot,
262
+ exp.Coth,
263
+ exp.CovarPop,
264
+ exp.CovarSamp,
265
+ exp.Csc,
266
+ exp.Csch,
267
+ exp.CumeDist,
268
+ exp.EuclideanDistance,
269
+ exp.Float64,
270
+ exp.LaxFloat64,
271
+ exp.PercentRank,
272
+ exp.Rand,
273
+ exp.Sec,
274
+ exp.Sech,
275
+ exp.Sin,
276
+ exp.Sinh,
277
+ }
278
+ },
279
+ **{
280
+ expr_type: {"returns": exp.DataType.Type.JSON}
281
+ for expr_type in {
282
+ exp.JSONArray,
283
+ exp.JSONArrayAppend,
284
+ exp.JSONArrayInsert,
285
+ exp.JSONObject,
286
+ exp.JSONRemove,
287
+ exp.JSONSet,
288
+ exp.JSONStripNulls,
289
+ }
290
+ },
291
+ **{
292
+ expr_type: {"returns": exp.DataType.Type.TIME}
293
+ for expr_type in {
294
+ exp.ParseTime,
295
+ exp.TimeFromParts,
296
+ exp.TimeTrunc,
297
+ exp.TsOrDsToTime,
298
+ }
299
+ },
300
+ **{
301
+ expr_type: {"returns": exp.DataType.Type.VARCHAR}
302
+ for expr_type in {
303
+ exp.CodePointsToString,
304
+ exp.Format,
305
+ exp.JSONExtractScalar,
306
+ exp.JSONType,
307
+ exp.LaxString,
308
+ exp.LowerHex,
309
+ exp.MD5,
310
+ exp.NetHost,
311
+ exp.Normalize,
312
+ exp.SafeConvertBytesToString,
313
+ exp.Soundex,
314
+ exp.Uuid,
315
+ }
316
+ },
317
+ **{
318
+ expr_type: {"annotator": lambda self, e: _annotate_by_args_with_coerce(self, e)}
319
+ for expr_type in {
320
+ exp.PercentileCont,
321
+ exp.SafeAdd,
322
+ exp.SafeDivide,
323
+ exp.SafeMultiply,
324
+ exp.SafeSubtract,
325
+ }
326
+ },
327
+ **{
328
+ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this", array=True)}
329
+ for expr_type in {
330
+ exp.ApproxQuantiles,
331
+ exp.JSONExtractArray,
332
+ exp.RegexpExtractAll,
333
+ exp.Split,
334
+ }
335
+ },
336
+ **{
337
+ expr_type: {"returns": exp.DataType.Type.TIMESTAMPTZ} for expr_type in TIMESTAMP_EXPRESSIONS
338
+ },
339
+ exp.ApproxTopK: {"annotator": lambda self, e: _annotate_by_args_approx_top(self, e)},
340
+ exp.ApproxTopSum: {"annotator": lambda self, e: _annotate_by_args_approx_top(self, e)},
341
+ exp.Array: {"annotator": _annotate_array},
342
+ exp.ArrayConcat: {
343
+ "annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions")
344
+ },
345
+ exp.Concat: {"annotator": _annotate_concat},
346
+ exp.DateFromUnixDate: {"returns": exp.DataType.Type.DATE},
347
+ exp.GenerateTimestampArray: {
348
+ "annotator": lambda self, e: self._set_type(
349
+ e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
350
+ )
351
+ },
352
+ exp.JSONFormat: {
353
+ "annotator": lambda self, e: self._set_type(
354
+ e, exp.DataType.Type.JSON if e.args.get("to_json") else exp.DataType.Type.VARCHAR
355
+ )
356
+ },
357
+ exp.JSONKeysAtDepth: {
358
+ "annotator": lambda self, e: self._set_type(
359
+ e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
360
+ )
361
+ },
362
+ exp.JSONValueArray: {
363
+ "annotator": lambda self, e: self._set_type(
364
+ e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
365
+ )
366
+ },
367
+ exp.Lag: {"annotator": lambda self, e: self._annotate_by_args(e, "this", "default")},
368
+ exp.ParseBignumeric: {"returns": exp.DataType.Type.BIGDECIMAL},
369
+ exp.ParseNumeric: {"returns": exp.DataType.Type.DECIMAL},
370
+ exp.SafeDivide: {"annotator": lambda self, e: _annotate_safe_divide(self, e)},
371
+ exp.ToCodePoints: {
372
+ "annotator": lambda self, e: self._set_type(
373
+ e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
374
+ )
375
+ },
376
+ }
sqlglot/typing/hive.py ADDED
@@ -0,0 +1,12 @@
1
+ from __future__ import annotations
2
+
3
+ from sqlglot import exp
4
+ from sqlglot.typing import EXPRESSION_METADATA
5
+
6
+ EXPRESSION_METADATA = {
7
+ **EXPRESSION_METADATA,
8
+ exp.If: {"annotator": lambda self, e: self._annotate_by_args(e, "true", "false", promote=True)},
9
+ exp.Coalesce: {
10
+ "annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True)
11
+ },
12
+ }