sqlglot 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. sqlglot/__main__.py +6 -4
  2. sqlglot/_version.py +2 -2
  3. sqlglot/dialects/bigquery.py +116 -295
  4. sqlglot/dialects/clickhouse.py +67 -2
  5. sqlglot/dialects/databricks.py +38 -1
  6. sqlglot/dialects/dialect.py +327 -286
  7. sqlglot/dialects/dremio.py +4 -1
  8. sqlglot/dialects/duckdb.py +718 -22
  9. sqlglot/dialects/exasol.py +243 -10
  10. sqlglot/dialects/hive.py +8 -8
  11. sqlglot/dialects/mysql.py +11 -2
  12. sqlglot/dialects/oracle.py +29 -0
  13. sqlglot/dialects/postgres.py +46 -24
  14. sqlglot/dialects/presto.py +47 -16
  15. sqlglot/dialects/redshift.py +16 -0
  16. sqlglot/dialects/risingwave.py +3 -0
  17. sqlglot/dialects/singlestore.py +12 -3
  18. sqlglot/dialects/snowflake.py +199 -271
  19. sqlglot/dialects/spark.py +2 -2
  20. sqlglot/dialects/spark2.py +11 -48
  21. sqlglot/dialects/sqlite.py +9 -0
  22. sqlglot/dialects/teradata.py +5 -8
  23. sqlglot/dialects/trino.py +6 -0
  24. sqlglot/dialects/tsql.py +61 -25
  25. sqlglot/diff.py +4 -2
  26. sqlglot/errors.py +69 -0
  27. sqlglot/expressions.py +484 -84
  28. sqlglot/generator.py +143 -41
  29. sqlglot/helper.py +2 -2
  30. sqlglot/optimizer/annotate_types.py +247 -140
  31. sqlglot/optimizer/canonicalize.py +6 -1
  32. sqlglot/optimizer/eliminate_joins.py +1 -1
  33. sqlglot/optimizer/eliminate_subqueries.py +2 -2
  34. sqlglot/optimizer/merge_subqueries.py +5 -5
  35. sqlglot/optimizer/normalize.py +20 -13
  36. sqlglot/optimizer/normalize_identifiers.py +17 -3
  37. sqlglot/optimizer/optimizer.py +4 -0
  38. sqlglot/optimizer/pushdown_predicates.py +1 -1
  39. sqlglot/optimizer/qualify.py +14 -6
  40. sqlglot/optimizer/qualify_columns.py +113 -352
  41. sqlglot/optimizer/qualify_tables.py +112 -70
  42. sqlglot/optimizer/resolver.py +374 -0
  43. sqlglot/optimizer/scope.py +27 -16
  44. sqlglot/optimizer/simplify.py +1074 -964
  45. sqlglot/optimizer/unnest_subqueries.py +12 -2
  46. sqlglot/parser.py +276 -160
  47. sqlglot/planner.py +2 -2
  48. sqlglot/schema.py +15 -4
  49. sqlglot/tokens.py +42 -7
  50. sqlglot/transforms.py +77 -22
  51. sqlglot/typing/__init__.py +316 -0
  52. sqlglot/typing/bigquery.py +376 -0
  53. sqlglot/typing/hive.py +12 -0
  54. sqlglot/typing/presto.py +24 -0
  55. sqlglot/typing/snowflake.py +505 -0
  56. sqlglot/typing/spark2.py +58 -0
  57. sqlglot/typing/tsql.py +9 -0
  58. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
  59. sqlglot-28.4.0.dist-info/RECORD +92 -0
  60. sqlglot-27.29.0.dist-info/RECORD +0 -84
  61. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
  62. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
  63. {sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,6 @@ from sqlglot import exp, generator, jsonpath, parser, tokens, transforms
6
6
  from sqlglot.dialects.dialect import (
7
7
  Dialect,
8
8
  NormalizationStrategy,
9
- annotate_with_type_lambda,
10
9
  build_timetostr_or_tochar,
11
10
  build_like,
12
11
  binary_from_function,
@@ -34,17 +33,14 @@ from sqlglot.dialects.dialect import (
34
33
  )
35
34
  from sqlglot.generator import unsupported_args
36
35
  from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get
37
- from sqlglot.optimizer.annotate_types import TypeAnnotator
38
36
  from sqlglot.optimizer.scope import build_scope, find_all_in_scope
39
37
  from sqlglot.tokens import TokenType
38
+ from sqlglot.typing.snowflake import EXPRESSION_METADATA
40
39
 
41
40
  if t.TYPE_CHECKING:
42
41
  from sqlglot._typing import E, B
43
42
 
44
43
 
45
- DATE_PARTS = ["YEAR", "QUARTER", "MONTH", "WEEK", "DAY"]
46
-
47
-
48
44
  def _build_strtok(args: t.List) -> exp.SplitPart:
49
45
  # Add default delimiter (space) if missing - per Snowflake docs
50
46
  if len(args) == 1:
@@ -57,6 +53,21 @@ def _build_strtok(args: t.List) -> exp.SplitPart:
57
53
  return exp.SplitPart.from_arg_list(args)
58
54
 
59
55
 
56
+ def _build_approx_top_k(args: t.List) -> exp.ApproxTopK:
57
+ """
58
+ Normalizes APPROX_TOP_K arguments to match Snowflake semantics.
59
+
60
+ Snowflake APPROX_TOP_K signature: APPROX_TOP_K(column [, k] [, counters])
61
+ - k defaults to 1 if omitted (per Snowflake documentation)
62
+ - counters is optional precision parameter
63
+ """
64
+ # Add default k=1 if only column is provided
65
+ if len(args) == 1:
66
+ args.append(exp.Literal.number(1))
67
+
68
+ return exp.ApproxTopK.from_arg_list(args)
69
+
70
+
60
71
  def _build_datetime(
61
72
  name: str, kind: exp.DataType.Type, safe: bool = False
62
73
  ) -> t.Callable[[t.List], exp.Func]:
@@ -132,6 +143,11 @@ def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]:
132
143
  def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]:
133
144
  def _builder(args: t.List) -> B | exp.Anonymous:
134
145
  if len(args) == 3:
146
+ # Special handling for bitwise operations with padside argument
147
+ if expr_type in (exp.BitwiseAnd, exp.BitwiseOr, exp.BitwiseXor):
148
+ return expr_type(
149
+ this=seq_get(args, 0), expression=seq_get(args, 1), padside=seq_get(args, 2)
150
+ )
135
151
  return exp.Anonymous(this=name, expressions=args)
136
152
 
137
153
  return binary_from_function(expr_type)(args)
@@ -348,8 +364,8 @@ def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression
348
364
  return expression
349
365
 
350
366
 
351
- def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]:
352
- def _builder(args: t.List) -> E:
367
+ def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List, Snowflake], E]:
368
+ def _builder(args: t.List, dialect: Snowflake) -> E:
353
369
  return expr_type(
354
370
  this=seq_get(args, 0),
355
371
  expression=seq_get(args, 1),
@@ -357,6 +373,11 @@ def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]:
357
373
  occurrence=seq_get(args, 3),
358
374
  parameters=seq_get(args, 4),
359
375
  group=seq_get(args, 5) or exp.Literal.number(0),
376
+ **(
377
+ {"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL}
378
+ if expr_type is exp.RegexpExtract
379
+ else {}
380
+ ),
360
381
  )
361
382
 
362
383
  return _builder
@@ -533,32 +554,6 @@ def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression:
533
554
  return expression
534
555
 
535
556
 
536
- def _annotate_reverse(self: TypeAnnotator, expression: exp.Reverse) -> exp.Reverse:
537
- expression = self._annotate_by_args(expression, "this")
538
- if expression.is_type(exp.DataType.Type.NULL):
539
- # Snowflake treats REVERSE(NULL) as a VARCHAR
540
- self._set_type(expression, exp.DataType.Type.VARCHAR)
541
-
542
- return expression
543
-
544
-
545
- def _annotate_timestamp_from_parts(
546
- self: TypeAnnotator, expression: exp.TimestampFromParts
547
- ) -> exp.TimestampFromParts:
548
- """Annotate TimestampFromParts with correct type based on arguments.
549
- TIMESTAMP_FROM_PARTS with time_zone -> TIMESTAMPTZ
550
- TIMESTAMP_FROM_PARTS without time_zone -> TIMESTAMP (defaults to TIMESTAMP_NTZ)
551
- """
552
- self._annotate_args(expression)
553
-
554
- if expression.args.get("zone"):
555
- self._set_type(expression, exp.DataType.Type.TIMESTAMPTZ)
556
- else:
557
- self._set_type(expression, exp.DataType.Type.TIMESTAMP)
558
-
559
- return expression
560
-
561
-
562
557
  def _build_timestamp_from_parts(args: t.List) -> exp.Func:
563
558
  """Build TimestampFromParts with support for both syntaxes:
564
559
  1. TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second [, nanosecond] [, time_zone])
@@ -570,19 +565,46 @@ def _build_timestamp_from_parts(args: t.List) -> exp.Func:
570
565
  return exp.TimestampFromParts.from_arg_list(args)
571
566
 
572
567
 
573
- def _annotate_date_or_time_add(self: TypeAnnotator, expression: E) -> E:
574
- self._annotate_args(expression)
568
+ def _build_round(args: t.List) -> exp.Round:
569
+ """
570
+ Build Round expression, unwrapping Snowflake's named parameters.
571
+
572
+ Maps EXPR => this, SCALE => decimals, ROUNDING_MODE => truncate.
575
573
 
576
- if (
577
- expression.this.is_type(exp.DataType.Type.DATE)
578
- and expression.text("unit").upper() not in DATE_PARTS
579
- ):
580
- self._set_type(expression, exp.DataType.Type.TIMESTAMPNTZ)
581
- else:
582
- self._annotate_by_args(expression, "this")
574
+ Note: Snowflake does not support mixing named and positional arguments.
575
+ Arguments are either all named or all positional.
576
+ """
577
+ kwarg_map = {"EXPR": "this", "SCALE": "decimals", "ROUNDING_MODE": "truncate"}
578
+ round_args = {}
579
+ positional_keys = ["this", "decimals", "truncate"]
580
+ positional_idx = 0
581
+
582
+ for arg in args:
583
+ if isinstance(arg, exp.Kwarg):
584
+ key = arg.this.name.upper()
585
+ round_key = kwarg_map.get(key)
586
+ if round_key:
587
+ round_args[round_key] = arg.expression
588
+ else:
589
+ if positional_idx < len(positional_keys):
590
+ round_args[positional_keys[positional_idx]] = arg
591
+ positional_idx += 1
592
+
593
+ expression = exp.Round(**round_args)
594
+ expression.set("casts_non_integer_decimals", True)
583
595
  return expression
584
596
 
585
597
 
598
+ def _build_try_to_number(args: t.List[exp.Expression]) -> exp.Expression:
599
+ return exp.ToNumber(
600
+ this=seq_get(args, 0),
601
+ format=seq_get(args, 1),
602
+ precision=seq_get(args, 2),
603
+ scale=seq_get(args, 3),
604
+ safe=True,
605
+ )
606
+
607
+
586
608
  class Snowflake(Dialect):
587
609
  # https://docs.snowflake.com/en/sql-reference/identifiers-syntax
588
610
  NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE
@@ -596,175 +618,12 @@ class Snowflake(Dialect):
596
618
  ARRAY_AGG_INCLUDES_NULLS = None
597
619
  ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False
598
620
  TRY_CAST_REQUIRES_STRING = True
621
+ SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True
599
622
 
600
- TYPE_TO_EXPRESSIONS = {
601
- **Dialect.TYPE_TO_EXPRESSIONS,
602
- exp.DataType.Type.DOUBLE: {
603
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.DOUBLE],
604
- exp.Cos,
605
- exp.Cosh,
606
- exp.Cot,
607
- exp.Degrees,
608
- exp.Exp,
609
- exp.MonthsBetween,
610
- exp.RegrValx,
611
- exp.RegrValy,
612
- exp.Sin,
613
- exp.Sinh,
614
- exp.Tan,
615
- exp.Tanh,
616
- exp.Asin,
617
- exp.Asinh,
618
- exp.Atan,
619
- exp.Atan2,
620
- exp.Atanh,
621
- exp.Cbrt,
622
- },
623
- exp.DataType.Type.INT: {
624
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.INT],
625
- exp.Ascii,
626
- exp.ByteLength,
627
- exp.Length,
628
- exp.RtrimmedLength,
629
- exp.BitLength,
630
- exp.Hour,
631
- exp.Levenshtein,
632
- exp.JarowinklerSimilarity,
633
- exp.Minute,
634
- exp.Second,
635
- exp.StrPosition,
636
- exp.Unicode,
637
- exp.WidthBucket,
638
- },
639
- exp.DataType.Type.VARCHAR: {
640
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR],
641
- exp.Base64DecodeString,
642
- exp.TryBase64DecodeString,
643
- exp.Base64Encode,
644
- exp.DecompressString,
645
- exp.MD5,
646
- exp.AIAgg,
647
- exp.AIClassify,
648
- exp.AISummarizeAgg,
649
- exp.Chr,
650
- exp.Collate,
651
- exp.Collation,
652
- exp.HexDecodeString,
653
- exp.TryHexDecodeString,
654
- exp.HexEncode,
655
- exp.Initcap,
656
- exp.Monthname,
657
- exp.RegexpExtract,
658
- exp.RegexpReplace,
659
- exp.Repeat,
660
- exp.Replace,
661
- exp.SHA,
662
- exp.SHA2,
663
- exp.Soundex,
664
- exp.SoundexP123,
665
- exp.Space,
666
- exp.SplitPart,
667
- exp.Translate,
668
- exp.Uuid,
669
- },
670
- exp.DataType.Type.BINARY: {
671
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY],
672
- exp.Base64DecodeBinary,
673
- exp.TryBase64DecodeBinary,
674
- exp.TryHexDecodeBinary,
675
- exp.Compress,
676
- exp.DecompressBinary,
677
- exp.MD5Digest,
678
- exp.SHA1Digest,
679
- exp.SHA2Digest,
680
- exp.Unhex,
681
- },
682
- exp.DataType.Type.BIGINT: {
683
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT],
684
- exp.Factorial,
685
- exp.MD5NumberLower64,
686
- exp.MD5NumberUpper64,
687
- exp.Rand,
688
- },
689
- exp.DataType.Type.ARRAY: {
690
- exp.Split,
691
- exp.RegexpExtractAll,
692
- exp.StringToArray,
693
- },
694
- exp.DataType.Type.OBJECT: {
695
- exp.ParseUrl,
696
- exp.ParseIp,
697
- },
698
- exp.DataType.Type.DECIMAL: {
699
- exp.RegexpCount,
700
- },
701
- exp.DataType.Type.BOOLEAN: {
702
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BOOLEAN],
703
- exp.Boolnot,
704
- exp.Booland,
705
- exp.Boolor,
706
- exp.EqualNull,
707
- exp.IsNullValue,
708
- exp.Search,
709
- },
710
- exp.DataType.Type.DATE: {
711
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.DATE],
712
- exp.NextDay,
713
- exp.PreviousDay,
714
- },
715
- exp.DataType.Type.TIME: {
716
- *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIME],
717
- exp.TimeFromParts,
718
- },
719
- }
623
+ EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
720
624
 
721
- ANNOTATORS = {
722
- **Dialect.ANNOTATORS,
723
- **{
724
- expr_type: annotate_with_type_lambda(data_type)
725
- for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
726
- for expr_type in expressions
727
- },
728
- **{
729
- expr_type: lambda self, e: self._annotate_by_args(e, "this")
730
- for expr_type in (
731
- exp.AddMonths,
732
- exp.Floor,
733
- exp.Left,
734
- exp.Pad,
735
- exp.Right,
736
- exp.Stuff,
737
- exp.Substring,
738
- exp.Round,
739
- exp.Ceil,
740
- exp.DateTrunc,
741
- exp.TimeSlice,
742
- exp.TimestampTrunc,
743
- )
744
- },
745
- **{
746
- expr_type: lambda self, e: self._annotate_with_type(
747
- e, exp.DataType.build("NUMBER", dialect="snowflake")
748
- )
749
- for expr_type in (
750
- exp.RegexpCount,
751
- exp.RegexpInstr,
752
- )
753
- },
754
- exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"),
755
- exp.ConvertTimezone: lambda self, e: self._annotate_with_type(
756
- e,
757
- exp.DataType.Type.TIMESTAMPNTZ
758
- if e.args.get("source_tz")
759
- else exp.DataType.Type.TIMESTAMPTZ,
760
- ),
761
- exp.DateAdd: _annotate_date_or_time_add,
762
- exp.TimeAdd: _annotate_date_or_time_add,
763
- exp.GreatestIgnoreNulls: lambda self, e: self._annotate_by_args(e, "expressions"),
764
- exp.LeastIgnoreNulls: lambda self, e: self._annotate_by_args(e, "expressions"),
765
- exp.Reverse: _annotate_reverse,
766
- exp.TimestampFromParts: _annotate_timestamp_from_parts,
767
- }
625
+ # https://docs.snowflake.com/en/en/sql-reference/functions/initcap
626
+ INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>'
768
627
 
769
628
  TIME_MAPPING = {
770
629
  "YYYY": "%Y",
@@ -798,17 +657,16 @@ class Snowflake(Dialect):
798
657
  "ISOWEEK": "WEEKISO",
799
658
  }
800
659
 
801
- def quote_identifier(self, expression: E, identify: bool = True) -> E:
660
+ PSEUDOCOLUMNS = {"LEVEL"}
661
+
662
+ def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool:
802
663
  # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an
803
664
  # unquoted DUAL keyword in a special way and does not map it to a user-defined table
804
- if (
805
- isinstance(expression, exp.Identifier)
806
- and isinstance(expression.parent, exp.Table)
807
- and expression.name.lower() == "dual"
808
- ):
809
- return expression # type: ignore
810
-
811
- return super().quote_identifier(expression, identify=identify)
665
+ return super().can_quote(identifier, identify) and not (
666
+ isinstance(identifier.parent, exp.Table)
667
+ and not identifier.quoted
668
+ and identifier.name.lower() == "dual"
669
+ )
812
670
 
813
671
  class JSONPathTokenizer(jsonpath.JSONPathTokenizer):
814
672
  SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy()
@@ -834,6 +692,7 @@ class Snowflake(Dialect):
834
692
  FUNCTIONS = {
835
693
  **parser.Parser.FUNCTIONS,
836
694
  "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list,
695
+ "APPROX_TOP_K": _build_approx_top_k,
837
696
  "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args),
838
697
  "ARRAY_CONTAINS": lambda args: exp.ArrayContains(
839
698
  this=seq_get(args, 1), expression=seq_get(args, 0), ensure_variant=False
@@ -845,6 +704,10 @@ class Snowflake(Dialect):
845
704
  step=seq_get(args, 2),
846
705
  ),
847
706
  "ARRAY_SORT": exp.SortArray.from_arg_list,
707
+ "BITAND": _build_bitwise(exp.BitwiseAnd, "BITAND"),
708
+ "BIT_AND": _build_bitwise(exp.BitwiseAnd, "BITAND"),
709
+ "BITNOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)),
710
+ "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)),
848
711
  "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"),
849
712
  "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"),
850
713
  "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"),
@@ -865,6 +728,7 @@ class Snowflake(Dialect):
865
728
  "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list,
866
729
  "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list,
867
730
  "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list,
731
+ "BITMAP_OR_AGG": exp.BitmapOrAgg.from_arg_list,
868
732
  "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"),
869
733
  "DATE": _build_datetime("DATE", exp.DataType.Type.DATE),
870
734
  "DATE_TRUNC": _date_trunc_to_time,
@@ -878,6 +742,7 @@ class Snowflake(Dialect):
878
742
  ),
879
743
  "FLATTEN": exp.Explode.from_arg_list,
880
744
  "GET": exp.GetExtract.from_arg_list,
745
+ "GETDATE": exp.CurrentTimestamp.from_arg_list,
881
746
  "GET_PATH": lambda args, dialect: exp.JSONExtract(
882
747
  this=seq_get(args, 0),
883
748
  expression=dialect.to_json_path(seq_get(args, 1)),
@@ -906,6 +771,7 @@ class Snowflake(Dialect):
906
771
  "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll),
907
772
  "REPLACE": build_replace_with_optional_replacement,
908
773
  "RLIKE": exp.RegexpLike.from_arg_list,
774
+ "ROUND": _build_round,
909
775
  "SHA1_BINARY": exp.SHA1Digest.from_arg_list,
910
776
  "SHA1_HEX": exp.SHA.from_arg_list,
911
777
  "SHA2_BINARY": exp.SHA2Digest.from_arg_list,
@@ -922,18 +788,34 @@ class Snowflake(Dialect):
922
788
  "TIMESTAMPNTZFROMPARTS": _build_timestamp_from_parts,
923
789
  "TIMESTAMP_NTZ_FROM_PARTS": _build_timestamp_from_parts,
924
790
  "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True),
791
+ "TRY_TO_BINARY": lambda args: exp.ToBinary(
792
+ this=seq_get(args, 0), format=seq_get(args, 1), safe=True
793
+ ),
794
+ "TRY_TO_BOOLEAN": lambda args: exp.ToBoolean(this=seq_get(args, 0), safe=True),
925
795
  "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True),
796
+ **dict.fromkeys(
797
+ ("TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"), _build_try_to_number
798
+ ),
799
+ "TRY_TO_DOUBLE": lambda args: exp.ToDouble(
800
+ this=seq_get(args, 0), format=seq_get(args, 1), safe=True
801
+ ),
802
+ "TRY_TO_FILE": lambda args: exp.ToFile(
803
+ this=seq_get(args, 0), path=seq_get(args, 1), safe=True
804
+ ),
926
805
  "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True),
927
806
  "TRY_TO_TIMESTAMP": _build_datetime(
928
807
  "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True
929
808
  ),
930
809
  "TO_CHAR": build_timetostr_or_tochar,
931
810
  "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE),
932
- "TO_NUMBER": lambda args: exp.ToNumber(
933
- this=seq_get(args, 0),
934
- format=seq_get(args, 1),
935
- precision=seq_get(args, 2),
936
- scale=seq_get(args, 3),
811
+ **dict.fromkeys(
812
+ ("TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"),
813
+ lambda args: exp.ToNumber(
814
+ this=seq_get(args, 0),
815
+ format=seq_get(args, 1),
816
+ precision=seq_get(args, 2),
817
+ scale=seq_get(args, 3),
818
+ ),
937
819
  ),
938
820
  "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME),
939
821
  "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP),
@@ -942,11 +824,16 @@ class Snowflake(Dialect):
942
824
  "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ),
943
825
  "TO_VARCHAR": build_timetostr_or_tochar,
944
826
  "TO_JSON": exp.JSONFormat.from_arg_list,
827
+ "VECTOR_COSINE_SIMILARITY": exp.CosineDistance.from_arg_list,
828
+ "VECTOR_INNER_PRODUCT": exp.DotProduct.from_arg_list,
829
+ "VECTOR_L1_DISTANCE": exp.ManhattanDistance.from_arg_list,
945
830
  "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list,
946
831
  "ZEROIFNULL": _build_if_from_zeroifnull,
947
832
  "LIKE": build_like(exp.Like),
948
833
  "ILIKE": build_like(exp.ILike),
949
834
  "SEARCH": _build_search,
835
+ "SKEW": exp.Skewness.from_arg_list,
836
+ "SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list,
950
837
  "WEEKISO": exp.WeekOfYear.from_arg_list,
951
838
  "WEEKOFYEAR": exp.Week.from_arg_list,
952
839
  }
@@ -1158,29 +1045,7 @@ class Snowflake(Dialect):
1158
1045
  expression = (
1159
1046
  self._match_set((TokenType.FROM, TokenType.COMMA)) and self._parse_bitwise()
1160
1047
  )
1161
-
1162
- this = map_date_part(this)
1163
- name = this.name.upper()
1164
-
1165
- if name.startswith("EPOCH"):
1166
- if name == "EPOCH_MILLISECOND":
1167
- scale = 10**3
1168
- elif name == "EPOCH_MICROSECOND":
1169
- scale = 10**6
1170
- elif name == "EPOCH_NANOSECOND":
1171
- scale = 10**9
1172
- else:
1173
- scale = None
1174
-
1175
- ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP"))
1176
- to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts)
1177
-
1178
- if scale:
1179
- to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
1180
-
1181
- return to_unix
1182
-
1183
- return self.expression(exp.Extract, this=this, expression=expression)
1048
+ return self.expression(exp.Extract, this=map_date_part(this), expression=expression)
1184
1049
 
1185
1050
  def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]:
1186
1051
  if is_map:
@@ -1316,19 +1181,17 @@ class Snowflake(Dialect):
1316
1181
 
1317
1182
  return self.expression(
1318
1183
  exp.Show,
1319
- **{
1320
- "terse": terse,
1321
- "this": this,
1322
- "history": history,
1323
- "like": like,
1324
- "scope": scope,
1325
- "scope_kind": scope_kind,
1326
- "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(),
1327
- "limit": self._parse_limit(),
1328
- "from": self._parse_string() if self._match(TokenType.FROM) else None,
1329
- "privileges": self._match_text_seq("WITH", "PRIVILEGES")
1330
- and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)),
1331
- },
1184
+ terse=terse,
1185
+ this=this,
1186
+ history=history,
1187
+ like=like,
1188
+ scope=scope,
1189
+ scope_kind=scope_kind,
1190
+ starts_with=self._match_text_seq("STARTS", "WITH") and self._parse_string(),
1191
+ limit=self._parse_limit(),
1192
+ from_=self._parse_string() if self._match(TokenType.FROM) else None,
1193
+ privileges=self._match_text_seq("WITH", "PRIVILEGES")
1194
+ and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)),
1332
1195
  )
1333
1196
 
1334
1197
  def _parse_put(self) -> exp.Put | exp.Command:
@@ -1432,15 +1295,26 @@ class Snowflake(Dialect):
1432
1295
  kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()}
1433
1296
 
1434
1297
  while self._curr and not self._match(TokenType.R_PAREN, advance=False):
1435
- if self._match_text_seq("DIMENSIONS"):
1436
- kwargs["dimensions"] = self._parse_csv(self._parse_disjunction)
1437
- if self._match_text_seq("METRICS"):
1438
- kwargs["metrics"] = self._parse_csv(self._parse_disjunction)
1439
- if self._match_text_seq("WHERE"):
1298
+ if self._match_texts(("DIMENSIONS", "METRICS", "FACTS")):
1299
+ keyword = self._prev.text.lower()
1300
+ kwargs[keyword] = self._parse_csv(self._parse_disjunction)
1301
+ elif self._match_text_seq("WHERE"):
1440
1302
  kwargs["where"] = self._parse_expression()
1303
+ else:
1304
+ self.raise_error("Expecting ) or encountered unexpected keyword")
1305
+ break
1441
1306
 
1442
1307
  return self.expression(exp.SemanticView, **kwargs)
1443
1308
 
1309
+ def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command:
1310
+ set = super()._parse_set(unset=unset, tag=tag)
1311
+
1312
+ if isinstance(set, exp.Set):
1313
+ for expr in set.expressions:
1314
+ if isinstance(expr, exp.SetItem):
1315
+ expr.set("kind", "VARIABLE")
1316
+ return set
1317
+
1444
1318
  class Tokenizer(tokens.Tokenizer):
1445
1319
  STRING_ESCAPES = ["\\", "'"]
1446
1320
  HEX_STRINGS = [("x'", "'"), ("X'", "'")]
@@ -1472,6 +1346,9 @@ class Snowflake(Dialect):
1472
1346
  "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
1473
1347
  "TOP": TokenType.TOP,
1474
1348
  "WAREHOUSE": TokenType.WAREHOUSE,
1349
+ # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float
1350
+ # FLOAT is a synonym for DOUBLE in Snowflake
1351
+ "FLOAT": TokenType.DOUBLE,
1475
1352
  }
1476
1353
  KEYWORDS.pop("/*+")
1477
1354
 
@@ -1516,6 +1393,7 @@ class Snowflake(Dialect):
1516
1393
  exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
1517
1394
  exp.ArgMax: rename_func("MAX_BY"),
1518
1395
  exp.ArgMin: rename_func("MIN_BY"),
1396
+ exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]),
1519
1397
  exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"),
1520
1398
  exp.ArrayContains: lambda self, e: self.func(
1521
1399
  "ARRAY_CONTAINS",
@@ -1547,10 +1425,12 @@ class Snowflake(Dialect):
1547
1425
  exp.DayOfWeek: rename_func("DAYOFWEEK"),
1548
1426
  exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"),
1549
1427
  exp.DayOfYear: rename_func("DAYOFYEAR"),
1428
+ exp.DotProduct: rename_func("VECTOR_INNER_PRODUCT"),
1550
1429
  exp.Explode: rename_func("FLATTEN"),
1551
1430
  exp.Extract: lambda self, e: self.func(
1552
1431
  "DATE_PART", map_date_part(e.this, self.dialect), e.expression
1553
1432
  ),
1433
+ exp.CosineDistance: rename_func("VECTOR_COSINE_SIMILARITY"),
1554
1434
  exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"),
1555
1435
  exp.FileFormatProperty: lambda self,
1556
1436
  e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})",
@@ -1577,11 +1457,31 @@ class Snowflake(Dialect):
1577
1457
  exp.LogicalAnd: rename_func("BOOLAND_AGG"),
1578
1458
  exp.LogicalOr: rename_func("BOOLOR_AGG"),
1579
1459
  exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
1460
+ exp.ManhattanDistance: rename_func("VECTOR_L1_DISTANCE"),
1580
1461
  exp.MakeInterval: no_make_interval_sql,
1581
1462
  exp.Max: max_or_greatest,
1582
1463
  exp.Min: min_or_least,
1583
1464
  exp.ParseJSON: lambda self, e: self.func(
1584
- "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this
1465
+ f"{'TRY_' if e.args.get('safe') else ''}PARSE_JSON", e.this
1466
+ ),
1467
+ exp.ToBinary: lambda self, e: self.func(
1468
+ f"{'TRY_' if e.args.get('safe') else ''}TO_BINARY", e.this, e.args.get("format")
1469
+ ),
1470
+ exp.ToBoolean: lambda self, e: self.func(
1471
+ f"{'TRY_' if e.args.get('safe') else ''}TO_BOOLEAN", e.this
1472
+ ),
1473
+ exp.ToDouble: lambda self, e: self.func(
1474
+ f"{'TRY_' if e.args.get('safe') else ''}TO_DOUBLE", e.this, e.args.get("format")
1475
+ ),
1476
+ exp.ToFile: lambda self, e: self.func(
1477
+ f"{'TRY_' if e.args.get('safe') else ''}TO_FILE", e.this, e.args.get("path")
1478
+ ),
1479
+ exp.ToNumber: lambda self, e: self.func(
1480
+ f"{'TRY_' if e.args.get('safe') else ''}TO_NUMBER",
1481
+ e.this,
1482
+ e.args.get("format"),
1483
+ e.args.get("precision"),
1484
+ e.args.get("scale"),
1585
1485
  ),
1586
1486
  exp.JSONFormat: rename_func("TO_JSON"),
1587
1487
  exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
@@ -1608,11 +1508,13 @@ class Snowflake(Dialect):
1608
1508
  ]
1609
1509
  ),
1610
1510
  exp.SHA: rename_func("SHA1"),
1511
+ exp.SHA1Digest: rename_func("SHA1_BINARY"),
1611
1512
  exp.MD5Digest: rename_func("MD5_BINARY"),
1612
1513
  exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"),
1613
1514
  exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"),
1614
1515
  exp.LowerHex: rename_func("TO_CHAR"),
1615
1516
  exp.SortArray: rename_func("ARRAY_SORT"),
1517
+ exp.Skewness: rename_func("SKEW"),
1616
1518
  exp.StarMap: rename_func("OBJECT_CONSTRUCT"),
1617
1519
  exp.StartsWith: rename_func("STARTSWITH"),
1618
1520
  exp.EndsWith: rename_func("ENDSWITH"),
@@ -1641,14 +1543,13 @@ class Snowflake(Dialect):
1641
1543
  exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
1642
1544
  exp.ToArray: rename_func("TO_ARRAY"),
1643
1545
  exp.ToChar: lambda self, e: self.function_fallback_sql(e),
1644
- exp.ToDouble: rename_func("TO_DOUBLE"),
1645
1546
  exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True),
1646
1547
  exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
1647
1548
  exp.TsOrDsToDate: lambda self, e: self.func(
1648
- "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e)
1549
+ f"{'TRY_' if e.args.get('safe') else ''}TO_DATE", e.this, self.format_time(e)
1649
1550
  ),
1650
1551
  exp.TsOrDsToTime: lambda self, e: self.func(
1651
- "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e)
1552
+ f"{'TRY_' if e.args.get('safe') else ''}TO_TIME", e.this, self.format_time(e)
1652
1553
  ),
1653
1554
  exp.Unhex: rename_func("HEX_DECODE_BINARY"),
1654
1555
  exp.UnixToTime: rename_func("TO_TIMESTAMP"),
@@ -1664,6 +1565,9 @@ class Snowflake(Dialect):
1664
1565
  exp.ArrayConcatAgg: lambda self, e: self.func(
1665
1566
  "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this)
1666
1567
  ),
1568
+ exp.SHA2Digest: lambda self, e: self.func(
1569
+ "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256)
1570
+ ),
1667
1571
  }
1668
1572
 
1669
1573
  SUPPORTED_JSON_PATH_PARTS = {
@@ -1712,6 +1616,15 @@ class Snowflake(Dialect):
1712
1616
  return super().values_sql(expression, values_as_table=values_as_table)
1713
1617
 
1714
1618
  def datatype_sql(self, expression: exp.DataType) -> str:
1619
+ # Check if this is a FLOAT type nested inside a VECTOR type
1620
+ # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types
1621
+ # https://docs.snowflake.com/en/sql-reference/data-types-vector
1622
+ if expression.is_type(exp.DataType.Type.DOUBLE):
1623
+ parent = expression.parent
1624
+ if isinstance(parent, exp.DataType) and parent.is_type(exp.DataType.Type.VECTOR):
1625
+ # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE
1626
+ return "FLOAT"
1627
+
1715
1628
  expressions = expression.expressions
1716
1629
  if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES):
1717
1630
  for field_type in expressions:
@@ -1841,7 +1754,7 @@ class Snowflake(Dialect):
1841
1754
 
1842
1755
  limit = self.sql(expression, "limit")
1843
1756
 
1844
- from_ = self.sql(expression, "from")
1757
+ from_ = self.sql(expression, "from_")
1845
1758
  if from_:
1846
1759
  from_ = f" FROM {from_}"
1847
1760
 
@@ -1917,9 +1830,10 @@ class Snowflake(Dialect):
1917
1830
  return f"SET{exprs}{file_format}{copy_options}{tag}"
1918
1831
 
1919
1832
  def strtotime_sql(self, expression: exp.StrToTime):
1920
- safe_prefix = "TRY_" if expression.args.get("safe") else ""
1921
1833
  return self.func(
1922
- f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression)
1834
+ f"{'TRY_' if expression.args.get('safe') else ''}TO_TIMESTAMP",
1835
+ expression.this,
1836
+ self.format_time(expression),
1923
1837
  )
1924
1838
 
1925
1839
  def timestampsub_sql(self, expression: exp.TimestampSub):
@@ -2077,3 +1991,17 @@ class Snowflake(Dialect):
2077
1991
  expression.set("part_index", exp.Literal.number(1))
2078
1992
 
2079
1993
  return rename_func("SPLIT_PART")(self, expression)
1994
+
1995
+ def uniform_sql(self, expression: exp.Uniform) -> str:
1996
+ gen = expression.args.get("gen")
1997
+ seed = expression.args.get("seed")
1998
+
1999
+ # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed)
2000
+ if seed:
2001
+ gen = exp.Rand(this=seed)
2002
+
2003
+ # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM()
2004
+ if not gen:
2005
+ gen = exp.Rand()
2006
+
2007
+ return self.func("UNIFORM", expression.this, expression.expression, gen)