sqlglot 27.27.0__py3-none-any.whl → 28.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. sqlglot/__init__.py +1 -0
  2. sqlglot/__main__.py +6 -4
  3. sqlglot/_version.py +2 -2
  4. sqlglot/dialects/bigquery.py +118 -279
  5. sqlglot/dialects/clickhouse.py +73 -5
  6. sqlglot/dialects/databricks.py +38 -1
  7. sqlglot/dialects/dialect.py +354 -275
  8. sqlglot/dialects/dremio.py +4 -1
  9. sqlglot/dialects/duckdb.py +754 -25
  10. sqlglot/dialects/exasol.py +243 -10
  11. sqlglot/dialects/hive.py +8 -8
  12. sqlglot/dialects/mysql.py +14 -4
  13. sqlglot/dialects/oracle.py +29 -0
  14. sqlglot/dialects/postgres.py +60 -26
  15. sqlglot/dialects/presto.py +47 -16
  16. sqlglot/dialects/redshift.py +16 -0
  17. sqlglot/dialects/risingwave.py +3 -0
  18. sqlglot/dialects/singlestore.py +12 -3
  19. sqlglot/dialects/snowflake.py +239 -218
  20. sqlglot/dialects/spark.py +15 -4
  21. sqlglot/dialects/spark2.py +11 -48
  22. sqlglot/dialects/sqlite.py +10 -0
  23. sqlglot/dialects/starrocks.py +3 -0
  24. sqlglot/dialects/teradata.py +5 -8
  25. sqlglot/dialects/trino.py +6 -0
  26. sqlglot/dialects/tsql.py +61 -22
  27. sqlglot/diff.py +4 -2
  28. sqlglot/errors.py +69 -0
  29. sqlglot/executor/__init__.py +5 -10
  30. sqlglot/executor/python.py +1 -29
  31. sqlglot/expressions.py +637 -100
  32. sqlglot/generator.py +160 -43
  33. sqlglot/helper.py +2 -44
  34. sqlglot/lineage.py +10 -4
  35. sqlglot/optimizer/annotate_types.py +247 -140
  36. sqlglot/optimizer/canonicalize.py +6 -1
  37. sqlglot/optimizer/eliminate_joins.py +1 -1
  38. sqlglot/optimizer/eliminate_subqueries.py +2 -2
  39. sqlglot/optimizer/merge_subqueries.py +5 -5
  40. sqlglot/optimizer/normalize.py +20 -13
  41. sqlglot/optimizer/normalize_identifiers.py +17 -3
  42. sqlglot/optimizer/optimizer.py +4 -0
  43. sqlglot/optimizer/pushdown_predicates.py +1 -1
  44. sqlglot/optimizer/qualify.py +18 -10
  45. sqlglot/optimizer/qualify_columns.py +122 -275
  46. sqlglot/optimizer/qualify_tables.py +128 -76
  47. sqlglot/optimizer/resolver.py +374 -0
  48. sqlglot/optimizer/scope.py +27 -16
  49. sqlglot/optimizer/simplify.py +1075 -959
  50. sqlglot/optimizer/unnest_subqueries.py +12 -2
  51. sqlglot/parser.py +296 -170
  52. sqlglot/planner.py +2 -2
  53. sqlglot/schema.py +15 -4
  54. sqlglot/tokens.py +42 -7
  55. sqlglot/transforms.py +77 -22
  56. sqlglot/typing/__init__.py +316 -0
  57. sqlglot/typing/bigquery.py +376 -0
  58. sqlglot/typing/hive.py +12 -0
  59. sqlglot/typing/presto.py +24 -0
  60. sqlglot/typing/snowflake.py +505 -0
  61. sqlglot/typing/spark2.py +58 -0
  62. sqlglot/typing/tsql.py +9 -0
  63. {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
  64. sqlglot-28.4.0.dist-info/RECORD +92 -0
  65. sqlglot-27.27.0.dist-info/RECORD +0 -84
  66. {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
  67. {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
  68. {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0
sqlglot/__init__.py CHANGED
@@ -29,6 +29,7 @@ from sqlglot.expressions import (
29
29
  condition as condition,
30
30
  delete as delete,
31
31
  except_ as except_,
32
+ find_tables as find_tables,
32
33
  from_ as from_,
33
34
  func as func,
34
35
  insert as insert,
sqlglot/__main__.py CHANGED
@@ -5,6 +5,7 @@ import sys
5
5
  import typing as t
6
6
 
7
7
  import sqlglot
8
+ from sqlglot.helper import to_bool
8
9
 
9
10
  parser = argparse.ArgumentParser(description="Transpile SQL")
10
11
  parser.add_argument(
@@ -28,10 +29,11 @@ parser.add_argument(
28
29
  help="Dialect to write default is generic",
29
30
  )
30
31
  parser.add_argument(
31
- "--no-identify",
32
+ "--identify",
32
33
  dest="identify",
33
- action="store_false",
34
- help="Don't auto identify fields",
34
+ type=str,
35
+ default="safe",
36
+ help="Whether to quote identifiers (safe, true, false)",
35
37
  )
36
38
  parser.add_argument(
37
39
  "--no-pretty",
@@ -87,7 +89,7 @@ else:
87
89
  sql,
88
90
  read=args.read,
89
91
  write=args.write,
90
- identify=args.identify,
92
+ identify="safe" if args.identify == "safe" else to_bool(args.identify),
91
93
  pretty=args.pretty,
92
94
  error_level=error_level,
93
95
  )
sqlglot/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '27.27.0'
32
- __version_tuple__ = version_tuple = (27, 27, 0)
31
+ __version__ = version = '28.4.0'
32
+ __version_tuple__ = version_tuple = (28, 4, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -12,7 +12,6 @@ from sqlglot._typing import E
12
12
  from sqlglot.dialects.dialect import (
13
13
  Dialect,
14
14
  NormalizationStrategy,
15
- annotate_with_type_lambda,
16
15
  arg_max_or_min_no_count,
17
16
  binary_from_function,
18
17
  date_add_interval_sql,
@@ -33,10 +32,12 @@ from sqlglot.dialects.dialect import (
33
32
  unit_to_var,
34
33
  strposition_sql,
35
34
  groupconcat_sql,
35
+ sha2_digest_sql,
36
36
  )
37
+ from sqlglot.generator import unsupported_args
37
38
  from sqlglot.helper import seq_get, split_num_words
38
39
  from sqlglot.tokens import TokenType
39
- from sqlglot.generator import unsupported_args
40
+ from sqlglot.typing.bigquery import EXPRESSION_METADATA
40
41
 
41
42
  if t.TYPE_CHECKING:
42
43
  from sqlglot._typing import Lit
@@ -241,10 +242,28 @@ def _build_datetime(args: t.List) -> exp.Func:
241
242
  return exp.TimestampFromParts.from_arg_list(args)
242
243
 
243
244
 
245
+ def build_date_diff(args: t.List) -> exp.Expression:
246
+ expr = exp.DateDiff(
247
+ this=seq_get(args, 0),
248
+ expression=seq_get(args, 1),
249
+ unit=seq_get(args, 2),
250
+ date_part_boundary=True,
251
+ )
252
+
253
+ # Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST to facilitate transpilation
254
+ # This is done post exp.DateDiff construction since the TimeUnit mixin performs canonicalizations in its constructor too
255
+ unit = expr.args.get("unit")
256
+
257
+ if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK":
258
+ expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY")))
259
+
260
+ return expr
261
+
262
+
244
263
  def _build_regexp_extract(
245
264
  expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None
246
- ) -> t.Callable[[t.List], E]:
247
- def _builder(args: t.List) -> E:
265
+ ) -> t.Callable[[t.List, BigQuery], E]:
266
+ def _builder(args: t.List, dialect: BigQuery) -> E:
248
267
  try:
249
268
  group = re.compile(args[1].name).groups == 1
250
269
  except re.error:
@@ -257,6 +276,11 @@ def _build_regexp_extract(
257
276
  position=seq_get(args, 2),
258
277
  occurrence=seq_get(args, 3),
259
278
  group=exp.Literal.number(1) if group else default_group,
279
+ **(
280
+ {"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL}
281
+ if expr_type is exp.RegexpExtract
282
+ else {}
283
+ ),
260
284
  )
261
285
 
262
286
  return _builder
@@ -290,59 +314,6 @@ def _str_to_datetime_sql(
290
314
  return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone"))
291
315
 
292
316
 
293
- def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
294
- """
295
- Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention:
296
- +---------+---------+---------+------------+---------+
297
- | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
298
- +---------+---------+---------+------------+---------+
299
- | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
300
- +---------+---------+---------+------------+---------+
301
- """
302
- self._annotate_args(expression)
303
-
304
- this: exp.Expression = expression.this
305
-
306
- self._set_type(
307
- expression,
308
- exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type,
309
- )
310
- return expression
311
-
312
-
313
- def _annotate_by_args_with_coerce(self: TypeAnnotator, expression: E) -> E:
314
- """
315
- +------------+------------+------------+-------------+---------+
316
- | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
317
- +------------+------------+------------+-------------+---------+
318
- | INT64 | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
319
- | NUMERIC | NUMERIC | NUMERIC | BIGNUMERIC | FLOAT64 |
320
- | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | FLOAT64 |
321
- | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 |
322
- +------------+------------+------------+-------------+---------+
323
- """
324
- self._annotate_args(expression)
325
-
326
- self._set_type(expression, self._maybe_coerce(expression.this.type, expression.expression.type))
327
- return expression
328
-
329
-
330
- def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
331
- self._annotate_args(expression)
332
-
333
- struct_type = exp.DataType(
334
- this=exp.DataType.Type.STRUCT,
335
- expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
336
- nested=True,
337
- )
338
- self._set_type(
339
- expression,
340
- exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
341
- )
342
-
343
- return expression
344
-
345
-
346
317
  @unsupported_args("ins_cost", "del_cost", "sub_cost")
347
318
  def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
348
319
  max_dist = expression.args.get("max_dist")
@@ -363,11 +334,11 @@ def _build_levenshtein(args: t.List) -> exp.Levenshtein:
363
334
 
364
335
  def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]:
365
336
  def _builder(args: t.List) -> exp.TimeToStr:
366
- return exp.TimeToStr(
367
- this=expr_type(this=seq_get(args, 1)),
368
- format=seq_get(args, 0),
369
- zone=seq_get(args, 2),
337
+ formatted_time = build_formatted_time(exp.TimeToStr, "bigquery")(
338
+ [expr_type(this=seq_get(args, 1)), seq_get(args, 0)]
370
339
  )
340
+ formatted_time.set("zone", seq_get(args, 2))
341
+ return formatted_time
371
342
 
372
343
  return _builder
373
344
 
@@ -398,44 +369,6 @@ def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -
398
369
  return sql
399
370
 
400
371
 
401
- def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat:
402
- annotated = self._annotate_by_args(expression, "expressions")
403
-
404
- # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING
405
- # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat
406
- if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN):
407
- annotated.type = exp.DataType.Type.VARCHAR
408
-
409
- return annotated
410
-
411
-
412
- def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array:
413
- array_args = expression.expressions
414
-
415
- # BigQuery behaves as follows:
416
- #
417
- # SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT<STRING>
418
- # SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING>
419
- if (
420
- len(array_args) == 1
421
- and isinstance(select := array_args[0].unnest(), exp.Select)
422
- and (query_type := select.meta.get("query_type")) is not None
423
- and query_type.is_type(exp.DataType.Type.STRUCT)
424
- and len(query_type.expressions) == 1
425
- and isinstance(col_def := query_type.expressions[0], exp.ColumnDef)
426
- and (projection_type := col_def.kind) is not None
427
- and not projection_type.is_type(exp.DataType.Type.UNKNOWN)
428
- ):
429
- array_type = exp.DataType(
430
- this=exp.DataType.Type.ARRAY,
431
- expressions=[projection_type.copy()],
432
- nested=True,
433
- )
434
- return self._annotate_with_type(expression, array_type)
435
-
436
- return self._annotate_by_args(expression, "expressions", array=True)
437
-
438
-
439
372
  class BigQuery(Dialect):
440
373
  WEEK_OFFSET = -1
441
374
  UNNEST_COLUMN_ONLY = True
@@ -444,8 +377,22 @@ class BigQuery(Dialect):
444
377
  LOG_BASE_FIRST = False
445
378
  HEX_LOWERCASE = True
446
379
  FORCE_EARLY_ALIAS_REF_EXPANSION = True
380
+ EXPAND_ONLY_GROUP_ALIAS_REF = True
447
381
  PRESERVE_ORIGINAL_NAMES = True
448
382
  HEX_STRING_IS_INTEGER_TYPE = True
383
+ BYTE_STRING_IS_BYTES_TYPE = True
384
+ UUID_IS_STRING_TYPE = True
385
+ ANNOTATE_ALL_SCOPES = True
386
+ PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = True
387
+ TABLES_REFERENCEABLE_AS_COLUMNS = True
388
+ SUPPORTS_STRUCT_STAR_EXPANSION = True
389
+ EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True
390
+ QUERY_RESULTS_ARE_STRUCTS = True
391
+ JSON_EXTRACT_SCALAR_SCALAR_ONLY = True
392
+ DEFAULT_NULL_TYPE = exp.DataType.Type.BIGINT
393
+
394
+ # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap
395
+ INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-'
449
396
 
450
397
  # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity
451
398
  NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
@@ -455,9 +402,19 @@ class BigQuery(Dialect):
455
402
 
456
403
  # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time
457
404
  TIME_MAPPING = {
405
+ "%x": "%m/%d/%y",
458
406
  "%D": "%m/%d/%y",
459
407
  "%E6S": "%S.%f",
460
408
  "%e": "%-d",
409
+ "%F": "%Y-%m-%d",
410
+ "%T": "%H:%M:%S",
411
+ "%c": "%a %b %e %H:%M:%S %Y",
412
+ }
413
+
414
+ INVERSE_TIME_MAPPING = {
415
+ # Preserve %E6S instead of expanding to %T.%f - since both %E6S & %T.%f are semantically different in BigQuery
416
+ # %E6S is semantically different from %T.%f: %E6S works as a single atomic specifier for seconds with microseconds, while %T.%f expands incorrectly and fails to parse.
417
+ "%H:%M:%S.%f": "%H:%M:%E6S",
461
418
  }
462
419
 
463
420
  FORMAT_MAPPING = {
@@ -480,7 +437,13 @@ class BigQuery(Dialect):
480
437
  # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table
481
438
  # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix
482
439
  # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column
483
- PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE", "_TABLE_SUFFIX", "_FILE_NAME"}
440
+ PSEUDOCOLUMNS = {
441
+ "_PARTITIONTIME",
442
+ "_PARTITIONDATE",
443
+ "_TABLE_SUFFIX",
444
+ "_FILE_NAME",
445
+ "_DBT_MAX_PARTITION",
446
+ }
484
447
 
485
448
  # All set operations require either a DISTINCT or ALL specifier
486
449
  SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None)
@@ -492,182 +455,16 @@ class BigQuery(Dialect):
492
455
  }
493
456
  COERCES_TO[exp.DataType.Type.DECIMAL] |= {exp.DataType.Type.BIGDECIMAL}
494
457
  COERCES_TO[exp.DataType.Type.BIGINT] |= {exp.DataType.Type.BIGDECIMAL}
495
-
496
- # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types
497
- TYPE_TO_EXPRESSIONS = {
498
- **Dialect.TYPE_TO_EXPRESSIONS,
499
- exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP],
500
- }
501
- TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP)
502
-
503
- ANNOTATORS = {
504
- **Dialect.ANNOTATORS,
505
- **{
506
- expr_type: annotate_with_type_lambda(data_type)
507
- for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
508
- for expr_type in expressions
509
- },
510
- **{
511
- expr_type: lambda self, e: _annotate_math_functions(self, e)
512
- for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round)
513
- },
514
- **{
515
- expr_type: lambda self, e: self._annotate_by_args(e, "this")
516
- for expr_type in (
517
- exp.Abs,
518
- exp.ArgMax,
519
- exp.ArgMin,
520
- exp.DateTrunc,
521
- exp.DatetimeTrunc,
522
- exp.FirstValue,
523
- exp.GroupConcat,
524
- exp.IgnoreNulls,
525
- exp.JSONExtract,
526
- exp.Lead,
527
- exp.Left,
528
- exp.Lower,
529
- exp.NthValue,
530
- exp.Pad,
531
- exp.PercentileDisc,
532
- exp.RegexpExtract,
533
- exp.RegexpReplace,
534
- exp.Repeat,
535
- exp.Replace,
536
- exp.RespectNulls,
537
- exp.Reverse,
538
- exp.Right,
539
- exp.SafeNegate,
540
- exp.Sign,
541
- exp.Substring,
542
- exp.TimestampTrunc,
543
- exp.Translate,
544
- exp.Trim,
545
- exp.Upper,
546
- )
547
- },
548
- exp.Acos: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
549
- exp.Acosh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
550
- exp.Asin: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
551
- exp.Asinh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
552
- exp.Atan: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
553
- exp.Atanh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
554
- exp.Atan2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
555
- exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
556
- exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
557
- exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
558
- exp.Array: _annotate_array,
559
- exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
560
- exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
561
- exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
562
- exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
563
- exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
564
- exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
565
- exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
566
- exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
567
- exp.Cbrt: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
568
- exp.CodePointsToBytes: lambda self, e: self._annotate_with_type(
569
- e, exp.DataType.Type.BINARY
570
- ),
571
- exp.CodePointsToString: lambda self, e: self._annotate_with_type(
572
- e, exp.DataType.Type.VARCHAR
573
- ),
574
- exp.Concat: _annotate_concat,
575
- exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
576
- exp.Cot: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
577
- exp.CosineDistance: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
578
- exp.Coth: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
579
- exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
580
- exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
581
- exp.Csc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
582
- exp.Csch: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
583
- exp.CumeDist: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
584
- exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
585
- exp.DenseRank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
586
- exp.EuclideanDistance: lambda self, e: self._annotate_with_type(
587
- e, exp.DataType.Type.DOUBLE
588
- ),
589
- exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
590
- exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
591
- exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
592
- exp.Format: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
593
- exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
594
- e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
595
- ),
596
- exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
597
- exp.IsInf: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
598
- exp.IsNan: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
599
- exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
600
- exp.JSONArrayAppend: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
601
- exp.JSONArrayInsert: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
602
- exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
603
- exp.JSONExtractScalar: lambda self, e: self._annotate_with_type(
604
- e, exp.DataType.Type.VARCHAR
605
- ),
606
- exp.JSONExtractArray: lambda self, e: self._annotate_by_args(e, "this", array=True),
607
- exp.JSONFormat: lambda self, e: self._annotate_with_type(
608
- e, exp.DataType.Type.JSON if e.args.get("to_json") else exp.DataType.Type.VARCHAR
609
- ),
610
- exp.JSONKeysAtDepth: lambda self, e: self._annotate_with_type(
611
- e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
612
- ),
613
- exp.JSONObject: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
614
- exp.JSONRemove: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
615
- exp.JSONSet: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
616
- exp.JSONStripNulls: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
617
- exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
618
- exp.JSONValueArray: lambda self, e: self._annotate_with_type(
619
- e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
620
- ),
621
- exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
622
- exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
623
- exp.LaxBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
624
- exp.LaxFloat64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
625
- exp.LaxInt64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
626
- exp.LaxString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
627
- exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
628
- exp.Normalize: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
629
- exp.Ntile: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
630
- exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
631
- exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
632
- exp.ParseBignumeric: lambda self, e: self._annotate_with_type(
633
- e, exp.DataType.Type.BIGDECIMAL
634
- ),
635
- exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL),
636
- exp.PercentileCont: lambda self, e: _annotate_by_args_with_coerce(self, e),
637
- exp.PercentRank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
638
- exp.Rank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
639
- exp.RangeBucket: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
640
- exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
641
- exp.RegexpInstr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
642
- exp.RowNumber: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
643
- exp.Rand: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
644
- exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type(
645
- e, exp.DataType.Type.VARCHAR
646
- ),
647
- exp.SafeAdd: lambda self, e: _annotate_by_args_with_coerce(self, e),
648
- exp.SafeMultiply: lambda self, e: _annotate_by_args_with_coerce(self, e),
649
- exp.SafeSubtract: lambda self, e: _annotate_by_args_with_coerce(self, e),
650
- exp.Sec: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
651
- exp.Sech: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
652
- exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
653
- exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
654
- exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
655
- exp.Sin: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
656
- exp.Sinh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
657
- exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True),
658
- exp.TimestampFromParts: lambda self, e: self._annotate_with_type(
659
- e, exp.DataType.Type.DATETIME
660
- ),
661
- exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
662
- exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
663
- exp.ToCodePoints: lambda self, e: self._annotate_with_type(
664
- e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
665
- ),
666
- exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
667
- exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
668
- exp.Uuid: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
458
+ COERCES_TO[exp.DataType.Type.VARCHAR] |= {
459
+ exp.DataType.Type.DATE,
460
+ exp.DataType.Type.DATETIME,
461
+ exp.DataType.Type.TIME,
462
+ exp.DataType.Type.TIMESTAMP,
463
+ exp.DataType.Type.TIMESTAMPTZ,
669
464
  }
670
465
 
466
+ EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
467
+
671
468
  def normalize_identifier(self, expression: E) -> E:
672
469
  if (
673
470
  isinstance(expression, exp.Identifier)
@@ -786,11 +583,12 @@ class BigQuery(Dialect):
786
583
  "BIT_AND": exp.BitwiseAndAgg.from_arg_list,
787
584
  "BIT_OR": exp.BitwiseOrAgg.from_arg_list,
788
585
  "BIT_XOR": exp.BitwiseXorAgg.from_arg_list,
789
- "BIT_COUNT": exp.BitwiseCountAgg.from_arg_list,
586
+ "BIT_COUNT": exp.BitwiseCount.from_arg_list,
790
587
  "BOOL": exp.JSONBool.from_arg_list,
791
588
  "CONTAINS_SUBSTR": _build_contains_substring,
792
589
  "DATE": _build_date,
793
590
  "DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
591
+ "DATE_DIFF": build_date_diff,
794
592
  "DATE_SUB": build_date_delta_with_interval(exp.DateSub),
795
593
  "DATE_TRUNC": lambda args: exp.DateTrunc(
796
594
  unit=seq_get(args, 1),
@@ -804,6 +602,12 @@ class BigQuery(Dialect):
804
602
  "EDIT_DISTANCE": _build_levenshtein,
805
603
  "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate),
806
604
  "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list,
605
+ "GREATEST": lambda args: exp.Greatest(
606
+ this=seq_get(args, 0), expressions=args[1:], null_if_any_null=True
607
+ ),
608
+ "LEAST": lambda args: exp.Least(
609
+ this=seq_get(args, 0), expressions=args[1:], null_if_any_null=True
610
+ ),
807
611
  "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar),
808
612
  "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
809
613
  "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
@@ -815,6 +619,7 @@ class BigQuery(Dialect):
815
619
  "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
816
620
  "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
817
621
  "MD5": exp.MD5Digest.from_arg_list,
622
+ "SHA1": exp.SHA1Digest.from_arg_list,
818
623
  "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize(
819
624
  this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True
820
625
  ),
@@ -836,7 +641,9 @@ class BigQuery(Dialect):
836
641
  "REGEXP_EXTRACT_ALL": _build_regexp_extract(
837
642
  exp.RegexpExtractAll, default_group=exp.Literal.number(0)
838
643
  ),
839
- "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
644
+ "SHA256": lambda args: exp.SHA2Digest(
645
+ this=seq_get(args, 0), length=exp.Literal.number(256)
646
+ ),
840
647
  "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
841
648
  "SPLIT": lambda args: exp.Split(
842
649
  # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split
@@ -872,7 +679,11 @@ class BigQuery(Dialect):
872
679
 
873
680
  FUNCTION_PARSERS = {
874
681
  **parser.Parser.FUNCTION_PARSERS,
875
- "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]),
682
+ "ARRAY": lambda self: self.expression(
683
+ exp.Array,
684
+ expressions=[self._parse_statement()],
685
+ struct_name_inheritance=True,
686
+ ),
876
687
  "JSON_ARRAY": lambda self: self.expression(
877
688
  exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
878
689
  ),
@@ -1108,6 +919,9 @@ class BigQuery(Dialect):
1108
919
  ) -> t.Optional[exp.Expression]:
1109
920
  bracket = super()._parse_bracket(this)
1110
921
 
922
+ if isinstance(bracket, exp.Array):
923
+ bracket.set("struct_name_inheritance", True)
924
+
1111
925
  if this is bracket:
1112
926
  return bracket
1113
927
 
@@ -1254,6 +1068,24 @@ class BigQuery(Dialect):
1254
1068
  this=self._match_text_seq("AS") and self._parse_select(),
1255
1069
  )
1256
1070
 
1071
+ def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
1072
+ this = super()._parse_column_ops(this)
1073
+
1074
+ if isinstance(this, exp.Dot):
1075
+ prefix_name = this.this.name.upper()
1076
+ func_name = this.name.upper()
1077
+ if prefix_name == "NET":
1078
+ if func_name == "HOST":
1079
+ this = self.expression(
1080
+ exp.NetHost, this=seq_get(this.expression.expressions, 0)
1081
+ )
1082
+ elif prefix_name == "SAFE":
1083
+ if func_name == "TIMESTAMP":
1084
+ this = _build_timestamp(this.expression.expressions)
1085
+ this.set("safe", True)
1086
+
1087
+ return this
1088
+
1257
1089
  class Generator(generator.Generator):
1258
1090
  INTERVAL_ALLOWS_PLURAL_FORM = False
1259
1091
  JOIN_HINTS = False
@@ -1302,7 +1134,7 @@ class BigQuery(Dialect):
1302
1134
  exp.BitwiseAndAgg: rename_func("BIT_AND"),
1303
1135
  exp.BitwiseOrAgg: rename_func("BIT_OR"),
1304
1136
  exp.BitwiseXorAgg: rename_func("BIT_XOR"),
1305
- exp.BitwiseCountAgg: rename_func("BIT_COUNT"),
1137
+ exp.BitwiseCount: rename_func("BIT_COUNT"),
1306
1138
  exp.ByteLength: rename_func("BYTE_LENGTH"),
1307
1139
  exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]),
1308
1140
  exp.CollateProperty: lambda self, e: (
@@ -1329,7 +1161,7 @@ class BigQuery(Dialect):
1329
1161
  ),
1330
1162
  exp.GenerateSeries: rename_func("GENERATE_ARRAY"),
1331
1163
  exp.GroupConcat: lambda self, e: groupconcat_sql(
1332
- self, e, func_name="STRING_AGG", within_group=False
1164
+ self, e, func_name="STRING_AGG", within_group=False, sep=None
1333
1165
  ),
1334
1166
  exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))),
1335
1167
  exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"),
@@ -1388,6 +1220,8 @@ class BigQuery(Dialect):
1388
1220
  ),
1389
1221
  exp.SHA: rename_func("SHA1"),
1390
1222
  exp.SHA2: sha256_sql,
1223
+ exp.SHA1Digest: rename_func("SHA1"),
1224
+ exp.SHA2Digest: sha2_digest_sql,
1391
1225
  exp.StabilityProperty: lambda self, e: (
1392
1226
  "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
1393
1227
  ),
@@ -1399,6 +1233,7 @@ class BigQuery(Dialect):
1399
1233
  ),
1400
1234
  exp.StrToDate: _str_to_datetime_sql,
1401
1235
  exp.StrToTime: _str_to_datetime_sql,
1236
+ exp.SessionUser: lambda *_: "SESSION_USER()",
1402
1237
  exp.TimeAdd: date_add_interval_sql("TIME", "ADD"),
1403
1238
  exp.TimeFromParts: rename_func("TIME"),
1404
1239
  exp.TimestampFromParts: rename_func("DATETIME"),
@@ -1716,3 +1551,7 @@ class BigQuery(Dialect):
1716
1551
  kind = f" {kind}" if kind else ""
1717
1552
 
1718
1553
  return f"{variables}{kind}{default}"
1554
+
1555
+ def timestamp_sql(self, expression: exp.Timestamp) -> str:
1556
+ prefix = "SAFE." if expression.args.get("safe") else ""
1557
+ return self.func(f"{prefix}TIMESTAMP", expression.this, expression.args.get("zone"))