sqlglot 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. sqlglot/__main__.py +6 -4
  2. sqlglot/_version.py +2 -2
  3. sqlglot/dialects/bigquery.py +116 -295
  4. sqlglot/dialects/clickhouse.py +67 -2
  5. sqlglot/dialects/databricks.py +38 -1
  6. sqlglot/dialects/dialect.py +327 -286
  7. sqlglot/dialects/dremio.py +4 -1
  8. sqlglot/dialects/duckdb.py +718 -22
  9. sqlglot/dialects/exasol.py +243 -10
  10. sqlglot/dialects/hive.py +8 -8
  11. sqlglot/dialects/mysql.py +11 -2
  12. sqlglot/dialects/oracle.py +29 -0
  13. sqlglot/dialects/postgres.py +46 -24
  14. sqlglot/dialects/presto.py +47 -16
  15. sqlglot/dialects/redshift.py +16 -0
  16. sqlglot/dialects/risingwave.py +3 -0
  17. sqlglot/dialects/singlestore.py +12 -3
  18. sqlglot/dialects/snowflake.py +199 -271
  19. sqlglot/dialects/spark.py +2 -2
  20. sqlglot/dialects/spark2.py +11 -48
  21. sqlglot/dialects/sqlite.py +9 -0
  22. sqlglot/dialects/teradata.py +5 -8
  23. sqlglot/dialects/trino.py +6 -0
  24. sqlglot/dialects/tsql.py +61 -25
  25. sqlglot/diff.py +4 -2
  26. sqlglot/errors.py +69 -0
  27. sqlglot/expressions.py +484 -84
  28. sqlglot/generator.py +143 -41
  29. sqlglot/helper.py +2 -2
  30. sqlglot/optimizer/annotate_types.py +247 -140
  31. sqlglot/optimizer/canonicalize.py +6 -1
  32. sqlglot/optimizer/eliminate_joins.py +1 -1
  33. sqlglot/optimizer/eliminate_subqueries.py +2 -2
  34. sqlglot/optimizer/merge_subqueries.py +5 -5
  35. sqlglot/optimizer/normalize.py +20 -13
  36. sqlglot/optimizer/normalize_identifiers.py +17 -3
  37. sqlglot/optimizer/optimizer.py +4 -0
  38. sqlglot/optimizer/pushdown_predicates.py +1 -1
  39. sqlglot/optimizer/qualify.py +14 -6
  40. sqlglot/optimizer/qualify_columns.py +113 -352
  41. sqlglot/optimizer/qualify_tables.py +112 -70
  42. sqlglot/optimizer/resolver.py +374 -0
  43. sqlglot/optimizer/scope.py +27 -16
  44. sqlglot/optimizer/simplify.py +1074 -964
  45. sqlglot/optimizer/unnest_subqueries.py +12 -2
  46. sqlglot/parser.py +276 -160
  47. sqlglot/planner.py +2 -2
  48. sqlglot/schema.py +15 -4
  49. sqlglot/tokens.py +42 -7
  50. sqlglot/transforms.py +77 -22
  51. sqlglot/typing/__init__.py +316 -0
  52. sqlglot/typing/bigquery.py +376 -0
  53. sqlglot/typing/hive.py +12 -0
  54. sqlglot/typing/presto.py +24 -0
  55. sqlglot/typing/snowflake.py +505 -0
  56. sqlglot/typing/spark2.py +58 -0
  57. sqlglot/typing/tsql.py +9 -0
  58. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
  59. sqlglot-28.4.0.dist-info/RECORD +92 -0
  60. sqlglot-27.29.0.dist-info/RECORD +0 -84
  61. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
  62. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
  63. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0
sqlglot/__main__.py CHANGED
@@ -5,6 +5,7 @@ import sys
5
5
  import typing as t
6
6
 
7
7
  import sqlglot
8
+ from sqlglot.helper import to_bool
8
9
 
9
10
  parser = argparse.ArgumentParser(description="Transpile SQL")
10
11
  parser.add_argument(
@@ -28,10 +29,11 @@ parser.add_argument(
28
29
  help="Dialect to write default is generic",
29
30
  )
30
31
  parser.add_argument(
31
- "--no-identify",
32
+ "--identify",
32
33
  dest="identify",
33
- action="store_false",
34
- help="Don't auto identify fields",
34
+ type=str,
35
+ default="safe",
36
+ help="Whether to quote identifiers (safe, true, false)",
35
37
  )
36
38
  parser.add_argument(
37
39
  "--no-pretty",
@@ -87,7 +89,7 @@ else:
87
89
  sql,
88
90
  read=args.read,
89
91
  write=args.write,
90
- identify=args.identify,
92
+ identify="safe" if args.identify == "safe" else to_bool(args.identify),
91
93
  pretty=args.pretty,
92
94
  error_level=error_level,
93
95
  )
sqlglot/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '27.29.0'
32
- __version_tuple__ = version_tuple = (27, 29, 0)
31
+ __version__ = version = '28.4.0'
32
+ __version_tuple__ = version_tuple = (28, 4, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -12,7 +12,6 @@ from sqlglot._typing import E
12
12
  from sqlglot.dialects.dialect import (
13
13
  Dialect,
14
14
  NormalizationStrategy,
15
- annotate_with_type_lambda,
16
15
  arg_max_or_min_no_count,
17
16
  binary_from_function,
18
17
  date_add_interval_sql,
@@ -33,10 +32,12 @@ from sqlglot.dialects.dialect import (
33
32
  unit_to_var,
34
33
  strposition_sql,
35
34
  groupconcat_sql,
35
+ sha2_digest_sql,
36
36
  )
37
+ from sqlglot.generator import unsupported_args
37
38
  from sqlglot.helper import seq_get, split_num_words
38
39
  from sqlglot.tokens import TokenType
39
- from sqlglot.generator import unsupported_args
40
+ from sqlglot.typing.bigquery import EXPRESSION_METADATA
40
41
 
41
42
  if t.TYPE_CHECKING:
42
43
  from sqlglot._typing import Lit
@@ -241,10 +242,28 @@ def _build_datetime(args: t.List) -> exp.Func:
241
242
  return exp.TimestampFromParts.from_arg_list(args)
242
243
 
243
244
 
245
+ def build_date_diff(args: t.List) -> exp.Expression:
246
+ expr = exp.DateDiff(
247
+ this=seq_get(args, 0),
248
+ expression=seq_get(args, 1),
249
+ unit=seq_get(args, 2),
250
+ date_part_boundary=True,
251
+ )
252
+
253
+ # Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST to facilitate transpilation
254
+ # This is done post exp.DateDiff construction since the TimeUnit mixin performs canonicalizations in its constructor too
255
+ unit = expr.args.get("unit")
256
+
257
+ if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK":
258
+ expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY")))
259
+
260
+ return expr
261
+
262
+
244
263
  def _build_regexp_extract(
245
264
  expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None
246
- ) -> t.Callable[[t.List], E]:
247
- def _builder(args: t.List) -> E:
265
+ ) -> t.Callable[[t.List, BigQuery], E]:
266
+ def _builder(args: t.List, dialect: BigQuery) -> E:
248
267
  try:
249
268
  group = re.compile(args[1].name).groups == 1
250
269
  except re.error:
@@ -257,6 +276,11 @@ def _build_regexp_extract(
257
276
  position=seq_get(args, 2),
258
277
  occurrence=seq_get(args, 3),
259
278
  group=exp.Literal.number(1) if group else default_group,
279
+ **(
280
+ {"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL}
281
+ if expr_type is exp.RegexpExtract
282
+ else {}
283
+ ),
260
284
  )
261
285
 
262
286
  return _builder
@@ -290,59 +314,6 @@ def _str_to_datetime_sql(
290
314
  return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone"))
291
315
 
292
316
 
293
- def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
294
- """
295
- Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention:
296
- +---------+---------+---------+------------+---------+
297
- | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
298
- +---------+---------+---------+------------+---------+
299
- | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
300
- +---------+---------+---------+------------+---------+
301
- """
302
- self._annotate_args(expression)
303
-
304
- this: exp.Expression = expression.this
305
-
306
- self._set_type(
307
- expression,
308
- exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type,
309
- )
310
- return expression
311
-
312
-
313
- def _annotate_by_args_with_coerce(self: TypeAnnotator, expression: E) -> E:
314
- """
315
- +------------+------------+------------+-------------+---------+
316
- | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
317
- +------------+------------+------------+-------------+---------+
318
- | INT64 | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
319
- | NUMERIC | NUMERIC | NUMERIC | BIGNUMERIC | FLOAT64 |
320
- | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | FLOAT64 |
321
- | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 |
322
- +------------+------------+------------+-------------+---------+
323
- """
324
- self._annotate_args(expression)
325
-
326
- self._set_type(expression, self._maybe_coerce(expression.this.type, expression.expression.type))
327
- return expression
328
-
329
-
330
- def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
331
- self._annotate_args(expression)
332
-
333
- struct_type = exp.DataType(
334
- this=exp.DataType.Type.STRUCT,
335
- expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
336
- nested=True,
337
- )
338
- self._set_type(
339
- expression,
340
- exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
341
- )
342
-
343
- return expression
344
-
345
-
346
317
  @unsupported_args("ins_cost", "del_cost", "sub_cost")
347
318
  def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
348
319
  max_dist = expression.args.get("max_dist")
@@ -363,11 +334,11 @@ def _build_levenshtein(args: t.List) -> exp.Levenshtein:
363
334
 
364
335
  def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]:
365
336
  def _builder(args: t.List) -> exp.TimeToStr:
366
- return exp.TimeToStr(
367
- this=expr_type(this=seq_get(args, 1)),
368
- format=seq_get(args, 0),
369
- zone=seq_get(args, 2),
337
+ formatted_time = build_formatted_time(exp.TimeToStr, "bigquery")(
338
+ [expr_type(this=seq_get(args, 1)), seq_get(args, 0)]
370
339
  )
340
+ formatted_time.set("zone", seq_get(args, 2))
341
+ return formatted_time
371
342
 
372
343
  return _builder
373
344
 
@@ -398,44 +369,6 @@ def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -
398
369
  return sql
399
370
 
400
371
 
401
- def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat:
402
- annotated = self._annotate_by_args(expression, "expressions")
403
-
404
- # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING
405
- # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat
406
- if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN):
407
- annotated.type = exp.DataType.Type.VARCHAR
408
-
409
- return annotated
410
-
411
-
412
- def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array:
413
- array_args = expression.expressions
414
-
415
- # BigQuery behaves as follows:
416
- #
417
- # SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT<STRING>
418
- # SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING>
419
- if (
420
- len(array_args) == 1
421
- and isinstance(select := array_args[0].unnest(), exp.Select)
422
- and (query_type := select.meta.get("query_type")) is not None
423
- and query_type.is_type(exp.DataType.Type.STRUCT)
424
- and len(query_type.expressions) == 1
425
- and isinstance(col_def := query_type.expressions[0], exp.ColumnDef)
426
- and (projection_type := col_def.kind) is not None
427
- and not projection_type.is_type(exp.DataType.Type.UNKNOWN)
428
- ):
429
- array_type = exp.DataType(
430
- this=exp.DataType.Type.ARRAY,
431
- expressions=[projection_type.copy()],
432
- nested=True,
433
- )
434
- return self._annotate_with_type(expression, array_type)
435
-
436
- return self._annotate_by_args(expression, "expressions", array=True)
437
-
438
-
439
372
  class BigQuery(Dialect):
440
373
  WEEK_OFFSET = -1
441
374
  UNNEST_COLUMN_ONLY = True
@@ -444,8 +377,22 @@ class BigQuery(Dialect):
444
377
  LOG_BASE_FIRST = False
445
378
  HEX_LOWERCASE = True
446
379
  FORCE_EARLY_ALIAS_REF_EXPANSION = True
380
+ EXPAND_ONLY_GROUP_ALIAS_REF = True
447
381
  PRESERVE_ORIGINAL_NAMES = True
448
382
  HEX_STRING_IS_INTEGER_TYPE = True
383
+ BYTE_STRING_IS_BYTES_TYPE = True
384
+ UUID_IS_STRING_TYPE = True
385
+ ANNOTATE_ALL_SCOPES = True
386
+ PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = True
387
+ TABLES_REFERENCEABLE_AS_COLUMNS = True
388
+ SUPPORTS_STRUCT_STAR_EXPANSION = True
389
+ EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True
390
+ QUERY_RESULTS_ARE_STRUCTS = True
391
+ JSON_EXTRACT_SCALAR_SCALAR_ONLY = True
392
+ DEFAULT_NULL_TYPE = exp.DataType.Type.BIGINT
393
+
394
+ # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap
395
+ INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-'
449
396
 
450
397
  # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity
451
398
  NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
@@ -455,9 +402,19 @@ class BigQuery(Dialect):
455
402
 
456
403
  # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time
457
404
  TIME_MAPPING = {
405
+ "%x": "%m/%d/%y",
458
406
  "%D": "%m/%d/%y",
459
407
  "%E6S": "%S.%f",
460
408
  "%e": "%-d",
409
+ "%F": "%Y-%m-%d",
410
+ "%T": "%H:%M:%S",
411
+ "%c": "%a %b %e %H:%M:%S %Y",
412
+ }
413
+
414
+ INVERSE_TIME_MAPPING = {
415
+ # Preserve %E6S instead of expanding to %T.%f - since both %E6S & %T.%f are semantically different in BigQuery
416
+ # %E6S is semantically different from %T.%f: %E6S works as a single atomic specifier for seconds with microseconds, while %T.%f expands incorrectly and fails to parse.
417
+ "%H:%M:%S.%f": "%H:%M:%E6S",
461
418
  }
462
419
 
463
420
  FORMAT_MAPPING = {
@@ -480,7 +437,13 @@ class BigQuery(Dialect):
480
437
  # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table
481
438
  # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix
482
439
  # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column
483
- PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE", "_TABLE_SUFFIX", "_FILE_NAME"}
440
+ PSEUDOCOLUMNS = {
441
+ "_PARTITIONTIME",
442
+ "_PARTITIONDATE",
443
+ "_TABLE_SUFFIX",
444
+ "_FILE_NAME",
445
+ "_DBT_MAX_PARTITION",
446
+ }
484
447
 
485
448
  # All set operations require either a DISTINCT or ALL specifier
486
449
  SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None)
@@ -492,200 +455,16 @@ class BigQuery(Dialect):
492
455
  }
493
456
  COERCES_TO[exp.DataType.Type.DECIMAL] |= {exp.DataType.Type.BIGDECIMAL}
494
457
  COERCES_TO[exp.DataType.Type.BIGINT] |= {exp.DataType.Type.BIGDECIMAL}
495
-
496
- # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types
497
- TYPE_TO_EXPRESSIONS = {
498
- **Dialect.TYPE_TO_EXPRESSIONS,
499
- exp.DataType.Type.BIGINT: {
500
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT],
501
- exp.Ascii,
502
- exp.BitwiseAndAgg,
503
- exp.BitwiseOrAgg,
504
- exp.BitwiseXorAgg,
505
- exp.BitwiseCount,
506
- exp.ByteLength,
507
- exp.DenseRank,
508
- exp.FarmFingerprint,
509
- exp.Grouping,
510
- exp.LaxInt64,
511
- exp.Length,
512
- exp.Ntile,
513
- exp.Rank,
514
- exp.RangeBucket,
515
- exp.RegexpInstr,
516
- exp.RowNumber,
517
- exp.Unicode,
518
- },
519
- exp.DataType.Type.BINARY: {
520
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY],
521
- exp.ByteString,
522
- exp.CodePointsToBytes,
523
- exp.MD5Digest,
524
- exp.SHA,
525
- exp.SHA2,
526
- exp.Unhex,
527
- },
528
- exp.DataType.Type.BOOLEAN: {
529
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BOOLEAN],
530
- exp.IsInf,
531
- exp.IsNan,
532
- exp.JSONBool,
533
- exp.LaxBool,
534
- },
535
- exp.DataType.Type.DATE: {
536
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.DATE],
537
- exp.DateFromUnixDate,
538
- },
539
- exp.DataType.Type.DATETIME: {
540
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.DATETIME],
541
- exp.ParseDatetime,
542
- exp.TimestampFromParts,
543
- },
544
- exp.DataType.Type.DECIMAL: {
545
- exp.ParseNumeric,
546
- },
547
- exp.DataType.Type.BIGDECIMAL: {
548
- exp.ParseBignumeric,
549
- },
550
- exp.DataType.Type.DOUBLE: {
551
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.DOUBLE],
552
- exp.Acos,
553
- exp.Acosh,
554
- exp.Asin,
555
- exp.Asinh,
556
- exp.Atan,
557
- exp.Atanh,
558
- exp.Atan2,
559
- exp.Cbrt,
560
- exp.Corr,
561
- exp.Cot,
562
- exp.CosineDistance,
563
- exp.Coth,
564
- exp.CovarPop,
565
- exp.CovarSamp,
566
- exp.Csc,
567
- exp.Csch,
568
- exp.CumeDist,
569
- exp.EuclideanDistance,
570
- exp.Float64,
571
- exp.LaxFloat64,
572
- exp.PercentRank,
573
- exp.Rand,
574
- exp.Sec,
575
- exp.Sech,
576
- exp.Sin,
577
- exp.Sinh,
578
- },
579
- exp.DataType.Type.JSON: {
580
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.JSON],
581
- exp.JSONArray,
582
- exp.JSONArrayAppend,
583
- exp.JSONArrayInsert,
584
- exp.JSONObject,
585
- exp.JSONRemove,
586
- exp.JSONSet,
587
- exp.JSONStripNulls,
588
- },
589
- exp.DataType.Type.TIME: {
590
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIME],
591
- exp.ParseTime,
592
- exp.TimeFromParts,
593
- exp.TimeTrunc,
594
- exp.TsOrDsToTime,
595
- },
596
- exp.DataType.Type.VARCHAR: {
597
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR],
598
- exp.CodePointsToString,
599
- exp.Format,
600
- exp.JSONExtractScalar,
601
- exp.JSONType,
602
- exp.LaxString,
603
- exp.LowerHex,
604
- exp.Normalize,
605
- exp.SafeConvertBytesToString,
606
- exp.Soundex,
607
- exp.Uuid,
608
- },
609
- exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP],
610
- }
611
- TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP)
612
-
613
- ANNOTATORS = {
614
- **Dialect.ANNOTATORS,
615
- **{
616
- expr_type: annotate_with_type_lambda(data_type)
617
- for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
618
- for expr_type in expressions
619
- },
620
- **{
621
- expr_type: lambda self, e: _annotate_math_functions(self, e)
622
- for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round)
623
- },
624
- **{
625
- expr_type: lambda self, e: self._annotate_by_args(e, "this")
626
- for expr_type in (
627
- exp.Abs,
628
- exp.ArgMax,
629
- exp.ArgMin,
630
- exp.DateTrunc,
631
- exp.DatetimeTrunc,
632
- exp.FirstValue,
633
- exp.GroupConcat,
634
- exp.IgnoreNulls,
635
- exp.JSONExtract,
636
- exp.Lead,
637
- exp.Left,
638
- exp.Lower,
639
- exp.NthValue,
640
- exp.Pad,
641
- exp.PercentileDisc,
642
- exp.RegexpExtract,
643
- exp.RegexpReplace,
644
- exp.Repeat,
645
- exp.Replace,
646
- exp.RespectNulls,
647
- exp.Reverse,
648
- exp.Right,
649
- exp.SafeNegate,
650
- exp.Sign,
651
- exp.Substring,
652
- exp.TimestampTrunc,
653
- exp.Translate,
654
- exp.Trim,
655
- exp.Upper,
656
- )
657
- },
658
- exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
659
- exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
660
- exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
661
- exp.Array: _annotate_array,
662
- exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
663
- exp.Concat: _annotate_concat,
664
- exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
665
- e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
666
- ),
667
- exp.JSONExtractArray: lambda self, e: self._annotate_by_args(e, "this", array=True),
668
- exp.JSONFormat: lambda self, e: self._annotate_with_type(
669
- e, exp.DataType.Type.JSON if e.args.get("to_json") else exp.DataType.Type.VARCHAR
670
- ),
671
- exp.JSONKeysAtDepth: lambda self, e: self._annotate_with_type(
672
- e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
673
- ),
674
- exp.JSONValueArray: lambda self, e: self._annotate_with_type(
675
- e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
676
- ),
677
- exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
678
- exp.PercentileCont: lambda self, e: _annotate_by_args_with_coerce(self, e),
679
- exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
680
- exp.SafeAdd: lambda self, e: _annotate_by_args_with_coerce(self, e),
681
- exp.SafeMultiply: lambda self, e: _annotate_by_args_with_coerce(self, e),
682
- exp.SafeSubtract: lambda self, e: _annotate_by_args_with_coerce(self, e),
683
- exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True),
684
- exp.ToCodePoints: lambda self, e: self._annotate_with_type(
685
- e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
686
- ),
458
+ COERCES_TO[exp.DataType.Type.VARCHAR] |= {
459
+ exp.DataType.Type.DATE,
460
+ exp.DataType.Type.DATETIME,
461
+ exp.DataType.Type.TIME,
462
+ exp.DataType.Type.TIMESTAMP,
463
+ exp.DataType.Type.TIMESTAMPTZ,
687
464
  }
688
465
 
466
+ EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
467
+
689
468
  def normalize_identifier(self, expression: E) -> E:
690
469
  if (
691
470
  isinstance(expression, exp.Identifier)
@@ -809,6 +588,7 @@ class BigQuery(Dialect):
809
588
  "CONTAINS_SUBSTR": _build_contains_substring,
810
589
  "DATE": _build_date,
811
590
  "DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
591
+ "DATE_DIFF": build_date_diff,
812
592
  "DATE_SUB": build_date_delta_with_interval(exp.DateSub),
813
593
  "DATE_TRUNC": lambda args: exp.DateTrunc(
814
594
  unit=seq_get(args, 1),
@@ -822,6 +602,12 @@ class BigQuery(Dialect):
822
602
  "EDIT_DISTANCE": _build_levenshtein,
823
603
  "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate),
824
604
  "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list,
605
+ "GREATEST": lambda args: exp.Greatest(
606
+ this=seq_get(args, 0), expressions=args[1:], null_if_any_null=True
607
+ ),
608
+ "LEAST": lambda args: exp.Least(
609
+ this=seq_get(args, 0), expressions=args[1:], null_if_any_null=True
610
+ ),
825
611
  "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar),
826
612
  "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
827
613
  "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
@@ -833,6 +619,7 @@ class BigQuery(Dialect):
833
619
  "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
834
620
  "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
835
621
  "MD5": exp.MD5Digest.from_arg_list,
622
+ "SHA1": exp.SHA1Digest.from_arg_list,
836
623
  "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize(
837
624
  this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True
838
625
  ),
@@ -854,7 +641,9 @@ class BigQuery(Dialect):
854
641
  "REGEXP_EXTRACT_ALL": _build_regexp_extract(
855
642
  exp.RegexpExtractAll, default_group=exp.Literal.number(0)
856
643
  ),
857
- "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
644
+ "SHA256": lambda args: exp.SHA2Digest(
645
+ this=seq_get(args, 0), length=exp.Literal.number(256)
646
+ ),
858
647
  "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
859
648
  "SPLIT": lambda args: exp.Split(
860
649
  # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split
@@ -890,7 +679,11 @@ class BigQuery(Dialect):
890
679
 
891
680
  FUNCTION_PARSERS = {
892
681
  **parser.Parser.FUNCTION_PARSERS,
893
- "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]),
682
+ "ARRAY": lambda self: self.expression(
683
+ exp.Array,
684
+ expressions=[self._parse_statement()],
685
+ struct_name_inheritance=True,
686
+ ),
894
687
  "JSON_ARRAY": lambda self: self.expression(
895
688
  exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
896
689
  ),
@@ -1126,6 +919,9 @@ class BigQuery(Dialect):
1126
919
  ) -> t.Optional[exp.Expression]:
1127
920
  bracket = super()._parse_bracket(this)
1128
921
 
922
+ if isinstance(bracket, exp.Array):
923
+ bracket.set("struct_name_inheritance", True)
924
+
1129
925
  if this is bracket:
1130
926
  return bracket
1131
927
 
@@ -1272,6 +1068,24 @@ class BigQuery(Dialect):
1272
1068
  this=self._match_text_seq("AS") and self._parse_select(),
1273
1069
  )
1274
1070
 
1071
+ def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
1072
+ this = super()._parse_column_ops(this)
1073
+
1074
+ if isinstance(this, exp.Dot):
1075
+ prefix_name = this.this.name.upper()
1076
+ func_name = this.name.upper()
1077
+ if prefix_name == "NET":
1078
+ if func_name == "HOST":
1079
+ this = self.expression(
1080
+ exp.NetHost, this=seq_get(this.expression.expressions, 0)
1081
+ )
1082
+ elif prefix_name == "SAFE":
1083
+ if func_name == "TIMESTAMP":
1084
+ this = _build_timestamp(this.expression.expressions)
1085
+ this.set("safe", True)
1086
+
1087
+ return this
1088
+
1275
1089
  class Generator(generator.Generator):
1276
1090
  INTERVAL_ALLOWS_PLURAL_FORM = False
1277
1091
  JOIN_HINTS = False
@@ -1347,7 +1161,7 @@ class BigQuery(Dialect):
1347
1161
  ),
1348
1162
  exp.GenerateSeries: rename_func("GENERATE_ARRAY"),
1349
1163
  exp.GroupConcat: lambda self, e: groupconcat_sql(
1350
- self, e, func_name="STRING_AGG", within_group=False
1164
+ self, e, func_name="STRING_AGG", within_group=False, sep=None
1351
1165
  ),
1352
1166
  exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))),
1353
1167
  exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"),
@@ -1406,6 +1220,8 @@ class BigQuery(Dialect):
1406
1220
  ),
1407
1221
  exp.SHA: rename_func("SHA1"),
1408
1222
  exp.SHA2: sha256_sql,
1223
+ exp.SHA1Digest: rename_func("SHA1"),
1224
+ exp.SHA2Digest: sha2_digest_sql,
1409
1225
  exp.StabilityProperty: lambda self, e: (
1410
1226
  "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
1411
1227
  ),
@@ -1417,6 +1233,7 @@ class BigQuery(Dialect):
1417
1233
  ),
1418
1234
  exp.StrToDate: _str_to_datetime_sql,
1419
1235
  exp.StrToTime: _str_to_datetime_sql,
1236
+ exp.SessionUser: lambda *_: "SESSION_USER()",
1420
1237
  exp.TimeAdd: date_add_interval_sql("TIME", "ADD"),
1421
1238
  exp.TimeFromParts: rename_func("TIME"),
1422
1239
  exp.TimestampFromParts: rename_func("DATETIME"),
@@ -1734,3 +1551,7 @@ class BigQuery(Dialect):
1734
1551
  kind = f" {kind}" if kind else ""
1735
1552
 
1736
1553
  return f"{variables}{kind}{default}"
1554
+
1555
+ def timestamp_sql(self, expression: exp.Timestamp) -> str:
1556
+ prefix = "SAFE." if expression.args.get("safe") else ""
1557
+ return self.func(f"{prefix}TIMESTAMP", expression.this, expression.args.get("zone"))