PyPI - sqlglot - Versions diffs - 27.10.0__py3-none-any.whl → 27.12.0__py3-none-any.whl - Mend

sqlglot 27.10.0py3-none-any.whl → 27.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

sqlglot/_version.py +2 -2
sqlglot/dialects/bigquery.py +136 -15
sqlglot/dialects/clickhouse.py +4 -0
sqlglot/dialects/databricks.py +1 -0
sqlglot/dialects/doris.py +2 -0
sqlglot/dialects/dremio.py +29 -4
sqlglot/dialects/duckdb.py +4 -1
sqlglot/dialects/hive.py +11 -5
sqlglot/dialects/oracle.py +2 -0
sqlglot/dialects/singlestore.py +216 -1
sqlglot/dialects/snowflake.py +8 -4
sqlglot/dialects/spark.py +6 -0
sqlglot/dialects/spark2.py +2 -0
sqlglot/dialects/sqlite.py +1 -0
sqlglot/expressions.py +200 -2
sqlglot/generator.py +26 -4
sqlglot/optimizer/qualify_columns.py +18 -3
sqlglot/parser.py +34 -10
sqlglot/tokens.py +5 -1
{sqlglot-27.10.0.dist-info → sqlglot-27.12.0.dist-info}/METADATA +2 -2
{sqlglot-27.10.0.dist-info → sqlglot-27.12.0.dist-info}/RECORD +24 -24
{sqlglot-27.10.0.dist-info → sqlglot-27.12.0.dist-info}/WHEEL +0 -0
{sqlglot-27.10.0.dist-info → sqlglot-27.12.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-27.10.0.dist-info → sqlglot-27.12.0.dist-info}/top_level.txt +0 -0

sqlglot/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '27.10.0'
-__version_tuple__ = version_tuple = (27, 10, 0)
+__version__ = version = '27.12.0'
+__version_tuple__ = version_tuple = (27, 12, 0)
 __commit_id__ = commit_id = None

sqlglot/dialects/bigquery.py CHANGED Viewed

@@ -4,6 +4,9 @@ import logging
 import re
 import typing as t
+from sqlglot.optimizer.annotate_types import TypeAnnotator
 from sqlglot import exp, generator, jsonpath, parser, tokens, transforms
 from sqlglot._typing import E
 from sqlglot.dialects.dialect import (
@@ -172,6 +175,18 @@ def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5:
     return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg)
+def _build_json_strip_nulls(args: t.List) -> exp.JSONStripNulls:
+    expression = exp.JSONStripNulls(this=seq_get(args, 0))
+    for arg in args[1:]:
+        if isinstance(arg, exp.Kwarg):
+            expression.set(arg.this.name.lower(), arg)
+        else:
+            expression.set("expression", arg)
+    return expression
 def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str:
     return self.sql(
         exp.Exists(
@@ -295,6 +310,23 @@ def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
     return expression
+def _annotate_by_args_with_coerce(self: TypeAnnotator, expression: E) -> E:
+    """
+    +------------+------------+------------+-------------+---------+
+    | INPUT      | INT64      | NUMERIC    | BIGNUMERIC  | FLOAT64 |
+    +------------+------------+------------+-------------+---------+
+    | INT64      | INT64      | NUMERIC    | BIGNUMERIC  | FLOAT64 |
+    | NUMERIC    | NUMERIC    | NUMERIC    | BIGNUMERIC  | FLOAT64 |
+    | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC  | FLOAT64 |
+    | FLOAT64    | FLOAT64    | FLOAT64    | FLOAT64     | FLOAT64 |
+    +------------+------------+------------+-------------+---------+
+    """
+    self._annotate_args(expression)
+    self._set_type(expression, self._maybe_coerce(expression.this.type, expression.expression.type))
+    return expression
 def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
     self._annotate_args(expression)
@@ -453,6 +485,14 @@ class BigQuery(Dialect):
     # All set operations require either a DISTINCT or ALL specifier
     SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None)
+    # https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#percentile_cont
+    COERCES_TO = {
+        **TypeAnnotator.COERCES_TO,
+        exp.DataType.Type.BIGDECIMAL: {exp.DataType.Type.DOUBLE},
+    }
+    COERCES_TO[exp.DataType.Type.DECIMAL] |= {exp.DataType.Type.BIGDECIMAL}
+    COERCES_TO[exp.DataType.Type.BIGINT] |= {exp.DataType.Type.BIGDECIMAL}
     # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types
     TYPE_TO_EXPRESSIONS = {
         **Dialect.TYPE_TO_EXPRESSIONS,
@@ -474,23 +514,47 @@ class BigQuery(Dialect):
         **{
             expr_type: lambda self, e: self._annotate_by_args(e, "this")
             for expr_type in (
+                exp.Abs,
+                exp.ArgMax,
+                exp.ArgMin,
+                exp.DateTrunc,
+                exp.DatetimeTrunc,
+                exp.FirstValue,
+                exp.GroupConcat,
+                exp.IgnoreNulls,
+                exp.JSONExtract,
+                exp.Lead,
                 exp.Left,
-                exp.Right,
                 exp.Lower,
-                exp.Upper,
+                exp.NthValue,
                 exp.Pad,
-                exp.Trim,
+                exp.PercentileDisc,
                 exp.RegexpExtract,
                 exp.RegexpReplace,
                 exp.Repeat,
+                exp.Replace,
+                exp.RespectNulls,
+                exp.Reverse,
+                exp.Right,
+                exp.SafeNegate,
+                exp.Sign,
                 exp.Substring,
+                exp.TimestampTrunc,
+                exp.Translate,
+                exp.Trim,
+                exp.Upper,
             )
         },
+        exp.Acos: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Acosh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Asin: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Asinh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Atan: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Atanh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Atan2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
         exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
         exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
-        exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"),
-        exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"),
         exp.Array: _annotate_array,
         exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
         exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
@@ -500,6 +564,7 @@ class BigQuery(Dialect):
         exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
+        exp.Cbrt: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.CodePointsToBytes: lambda self, e: self._annotate_with_type(
             e, exp.DataType.Type.BINARY
         ),
@@ -509,59 +574,99 @@ class BigQuery(Dialect):
         exp.Concat: _annotate_concat,
         exp.Contains: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
         exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Cot: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.CosineDistance: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Coth: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Csc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Csch: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.CumeDist: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
-        exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.DenseRank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
+        exp.EuclideanDistance: lambda self, e: self._annotate_with_type(
+            e, exp.DataType.Type.DOUBLE
+        ),
         exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
         exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Format: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
         exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
             e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
         ),
         exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
+        exp.IsInf: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
+        exp.IsNan: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
         exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
+        exp.JSONArrayAppend: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
+        exp.JSONArrayInsert: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
         exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
         exp.JSONExtractScalar: lambda self, e: self._annotate_with_type(
             e, exp.DataType.Type.VARCHAR
         ),
-        exp.JSONValueArray: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.build("ARRAY<VARCHAR>")
+        exp.JSONExtractArray: lambda self, e: self._annotate_by_args(e, "this", array=True),
+        exp.JSONFormat: lambda self, e: self._annotate_with_type(
+            e, exp.DataType.Type.JSON if e.args.get("to_json") else exp.DataType.Type.VARCHAR
+        ),
+        exp.JSONKeysAtDepth: lambda self, e: self._annotate_with_type(
+            e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
         ),
+        exp.JSONObject: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
+        exp.JSONRemove: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
+        exp.JSONSet: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
+        exp.JSONStripNulls: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
         exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
+        exp.JSONValueArray: lambda self, e: self._annotate_with_type(
+            e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
+        ),
         exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
         exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
+        exp.LaxBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
+        exp.LaxFloat64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.LaxInt64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
+        exp.LaxString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
         exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
         exp.Normalize: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
+        exp.Ntile: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
         exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
         exp.ParseBignumeric: lambda self, e: self._annotate_with_type(
             e, exp.DataType.Type.BIGDECIMAL
         ),
         exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL),
+        exp.PercentileCont: lambda self, e: _annotate_by_args_with_coerce(self, e),
+        exp.PercentRank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Rank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
+        exp.RangeBucket: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
-        exp.Replace: lambda self, e: self._annotate_by_args(e, "this"),
-        exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.RegexpInstr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
+        exp.RowNumber: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
+        exp.Rand: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type(
             e, exp.DataType.Type.VARCHAR
         ),
+        exp.SafeAdd: lambda self, e: _annotate_by_args_with_coerce(self, e),
+        exp.SafeMultiply: lambda self, e: _annotate_by_args_with_coerce(self, e),
+        exp.SafeSubtract: lambda self, e: _annotate_by_args_with_coerce(self, e),
+        exp.Sec: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Sech: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
         exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
         exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
-        exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.Sin: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.Sinh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True),
         exp.TimestampFromParts: lambda self, e: self._annotate_with_type(
             e, exp.DataType.Type.DATETIME
         ),
-        exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"),
         exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
         exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
         exp.ToCodePoints: lambda self, e: self._annotate_with_type(
             e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
         ),
         exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
-        exp.Translate: lambda self, e: self._annotate_by_args(e, "this"),
         exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
+        exp.Uuid: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
     }
     def normalize_identifier(self, expression: E) -> E:
@@ -682,8 +787,11 @@ class BigQuery(Dialect):
             "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list,
             "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar),
             "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
+            "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
+            "JSON_KEYS": exp.JSONKeysAtDepth.from_arg_list,
             "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract),
             "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
+            "JSON_STRIP_NULLS": _build_json_strip_nulls,
             "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar),
             "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
             "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
@@ -730,6 +838,9 @@ class BigQuery(Dialect):
                 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS
             ),
             "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)),
+            "TO_JSON": lambda args: exp.JSONFormat(
+                this=seq_get(args, 0), options=seq_get(args, 1), to_json=True
+            ),
             "TO_JSON_STRING": exp.JSONFormat.from_arg_list,
             "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
             "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
@@ -798,9 +909,13 @@ class BigQuery(Dialect):
             "SAFE_ORDINAL": (1, True),
         }
-        def _parse_for_in(self) -> exp.ForIn:
+        def _parse_for_in(self) -> t.Union[exp.ForIn, exp.Command]:
+            index = self._index
             this = self._parse_range()
             self._match_text_seq("DO")
+            if self._match(TokenType.COMMAND):
+                self._retreat(index)
+                return self._parse_as_command(self._prev)
             return self.expression(exp.ForIn, this=this, expression=self._parse_statement())
         def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
@@ -1196,7 +1311,13 @@ class BigQuery(Dialect):
             exp.JSONExtract: _json_extract_sql,
             exp.JSONExtractArray: _json_extract_sql,
             exp.JSONExtractScalar: _json_extract_sql,
-            exp.JSONFormat: rename_func("TO_JSON_STRING"),
+            exp.JSONFormat: lambda self, e: self.func(
+                "TO_JSON" if e.args.get("to_json") else "TO_JSON_STRING",
+                e.this,
+                e.args.get("options"),
+            ),
+            exp.JSONKeysAtDepth: rename_func("JSON_KEYS"),
+            exp.JSONValueArray: rename_func("JSON_VALUE_ARRAY"),
             exp.Levenshtein: _levenshtein_sql,
             exp.Max: max_or_greatest,
             exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)),

sqlglot/dialects/clickhouse.py CHANGED Viewed

@@ -312,6 +312,7 @@ class ClickHouse(Dialect):
             "ARRAYREVERSE": exp.ArrayReverse.from_arg_list,
             "ARRAYSLICE": exp.ArraySlice.from_arg_list,
             "COUNTIF": _build_count_if,
+            "COSINEDISTANCE": exp.CosineDistance.from_arg_list,
             "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None),
             "DATEADD": build_date_delta(exp.DateAdd, default_unit=None),
             "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True),
@@ -324,6 +325,7 @@ class ClickHouse(Dialect):
                 exp.JSONExtractScalar, zero_based_indexing=False
             ),
             "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
+            "L2Distance": exp.EuclideanDistance.from_arg_list,
             "MAP": parser.build_var_map,
             "MATCH": exp.RegexpLike.from_arg_list,
             "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime),
@@ -1094,6 +1096,7 @@ class ClickHouse(Dialect):
             exp.Array: inline_array_sql,
             exp.CastToStrType: rename_func("CAST"),
             exp.CountIf: rename_func("countIf"),
+            exp.CosineDistance: rename_func("cosineDistance"),
             exp.CompressColumnConstraint: lambda self,
             e: f"CODEC({self.expressions(e, key='this', flat=True)})",
             exp.ComputedColumnConstraint: lambda self,
@@ -1123,6 +1126,7 @@ class ClickHouse(Dialect):
             exp.Rand: rename_func("randCanonical"),
             exp.StartsWith: rename_func("startsWith"),
             exp.EndsWith: rename_func("endsWith"),
+            exp.EuclideanDistance: rename_func("L2Distance"),
             exp.StrPosition: lambda self, e: strposition_sql(
                 self,
                 e,

sqlglot/dialects/databricks.py CHANGED Viewed

@@ -106,6 +106,7 @@ class Databricks(Spark):
             ),
         }
+        TRANSFORMS.pop(exp.RegexpLike)
         TRANSFORMS.pop(exp.TryCast)
         TYPE_MAPPING = {

sqlglot/dialects/doris.py CHANGED Viewed

@@ -50,6 +50,7 @@ class Doris(MySQL):
             **MySQL.Parser.FUNCTIONS,
             "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
             "DATE_TRUNC": _build_date_trunc,
+            "L2_DISTANCE": exp.EuclideanDistance.from_arg_list,
             "MONTHS_ADD": exp.AddMonths.from_arg_list,
             "REGEXP": exp.RegexpLike.from_arg_list,
             "TO_DATE": exp.TsOrDsToDate.from_arg_list,
@@ -210,6 +211,7 @@ class Doris(MySQL):
             exp.CurrentDate: lambda self, _: self.func("CURRENT_DATE"),
             exp.CurrentTimestamp: lambda self, _: self.func("NOW"),
             exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, unit_to_str(e)),
+            exp.EuclideanDistance: rename_func("L2_DISTANCE"),
             exp.GroupConcat: lambda self, e: self.func(
                 "GROUP_CONCAT", e.this, e.args.get("separator") or exp.Literal.string(",")
             ),

sqlglot/dialects/dremio.py CHANGED Viewed

@@ -74,6 +74,27 @@ def build_date_delta_with_cast_interval(
     return _builder
+def datetype_handler(args: t.List[exp.Expression], dialect: DialectType) -> exp.Expression:
+    year, month, day = args
+    if all(isinstance(arg, exp.Literal) and arg.is_int for arg in (year, month, day)):
+        date_str = f"{int(year.this):04d}-{int(month.this):02d}-{int(day.this):02d}"
+        return exp.Date(this=exp.Literal.string(date_str))
+    return exp.Cast(
+        this=exp.Concat(
+            expressions=[
+                year,
+                exp.Literal.string("-"),
+                month,
+                exp.Literal.string("-"),
+                day,
+            ]
+        ),
+        to=exp.DataType.build("DATE"),
+    )
 class Dremio(Dialect):
     SUPPORTS_USER_DEFINED_TYPES = False
     CONCAT_COALESCE = True
@@ -145,12 +166,16 @@ class Dremio(Dialect):
         FUNCTIONS = {
             **parser.Parser.FUNCTIONS,
-            "TO_CHAR": to_char_is_numeric_handler,
-            "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "dremio"),
-            "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "dremio"),
+            "ARRAY_GENERATE_RANGE": exp.GenerateSeries.from_arg_list,
             "DATE_ADD": build_date_delta_with_cast_interval(exp.DateAdd),
+            "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "dremio"),
             "DATE_SUB": build_date_delta_with_cast_interval(exp.DateSub),
-            "ARRAY_GENERATE_RANGE": exp.GenerateSeries.from_arg_list,
+            "REGEXP_MATCHES": exp.RegexpLike.from_arg_list,
+            "REPEATSTR": exp.Repeat.from_arg_list,
+            "TO_CHAR": to_char_is_numeric_handler,
+            "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "dremio"),
+            "DATE_PART": exp.Extract.from_arg_list,
+            "DATETYPE": datetype_handler,
         }
         def _parse_current_date_utc(self) -> exp.Cast:

sqlglot/dialects/duckdb.py CHANGED Viewed

@@ -304,7 +304,6 @@ class DuckDB(Dialect):
             "CHAR": TokenType.TEXT,
             "DATETIME": TokenType.TIMESTAMPNTZ,
             "DETACH": TokenType.DETACH,
-            "EXCLUDE": TokenType.EXCEPT,
             "LOGICAL": TokenType.BOOLEAN,
             "ONLY": TokenType.ONLY,
             "PIVOT_WIDER": TokenType.PIVOT,
@@ -386,6 +385,8 @@ class DuckDB(Dialect):
             "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract),
             "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar),
             "LIST_CONTAINS": exp.ArrayContains.from_arg_list,
+            "LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list,
+            "LIST_DISTANCE": exp.EuclideanDistance.from_arg_list,
             "LIST_FILTER": exp.ArrayFilter.from_arg_list,
             "LIST_HAS": exp.ArrayContains.from_arg_list,
             "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list,
@@ -650,6 +651,7 @@ class DuckDB(Dialect):
             ),
             exp.BitwiseXor: rename_func("XOR"),
             exp.CommentColumnConstraint: no_comment_column_constraint_sql,
+            exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"),
             exp.CurrentDate: lambda *_: "CURRENT_DATE",
             exp.CurrentTime: lambda *_: "CURRENT_TIME",
             exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
@@ -673,6 +675,7 @@ class DuckDB(Dialect):
             exp.DiToDate: lambda self,
             e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)",
             exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False),
+            exp.EuclideanDistance: rename_func("LIST_DISTANCE"),
             exp.GenerateDateArray: _generate_datetime_array_sql,
             exp.GenerateTimestampArray: _generate_datetime_array_sql,
             exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False),

sqlglot/dialects/hive.py CHANGED Viewed

@@ -194,6 +194,16 @@ def _build_to_date(args: t.List) -> exp.TsOrDsToDate:
     return expr
+def _build_date_add(args: t.List) -> exp.TsOrDsAdd:
+    expression = seq_get(args, 1)
+    if expression:
+        expression = expression * -1
+    return exp.TsOrDsAdd(
+        this=seq_get(args, 0), expression=expression, unit=exp.Literal.string("DAY")
+    )
 class Hive(Dialect):
     ALIAS_POST_TABLESAMPLE = True
     IDENTIFIERS_CAN_START_WITH_DIGIT = True
@@ -314,11 +324,7 @@ class Hive(Dialect):
                     seq_get(args, 1),
                 ]
             ),
-            "DATE_SUB": lambda args: exp.TsOrDsAdd(
-                this=seq_get(args, 0),
-                expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)),
-                unit=exp.Literal.string("DAY"),
-            ),
+            "DATE_SUB": _build_date_add,
             "DATEDIFF": lambda args: exp.DateDiff(
                 this=exp.TsOrDsToDate(this=seq_get(args, 0)),
                 expression=exp.TsOrDsToDate(this=seq_get(args, 1)),

sqlglot/dialects/oracle.py CHANGED Viewed

@@ -107,6 +107,7 @@ class Oracle(Dialect):
         FUNCTIONS = {
             **parser.Parser.FUNCTIONS,
             "CONVERT": exp.ConvertToCharset.from_arg_list,
+            "L2_DISTANCE": exp.EuclideanDistance.from_arg_list,
             "NVL": lambda args: build_coalesce(args, is_nvl=True),
             "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)),
             "TO_CHAR": build_timetostr_or_tochar,
@@ -305,6 +306,7 @@ class Oracle(Dialect):
                 "TO_DATE", e.this, exp.Literal.string("YYYY-MM-DD")
             ),
             exp.DateTrunc: lambda self, e: self.func("TRUNC", e.this, e.unit),
+            exp.EuclideanDistance: rename_func("L2_DISTANCE"),
             exp.Group: transforms.preprocess([transforms.unalias_group]),
             exp.ILike: no_ilike_sql,
             exp.LogicalOr: rename_func("MAX"),

sqlglot 27.10.0__py3-none-any.whl → 27.12.0__py3-none-any.whl

sqlglot 27.10.0py3-none-any.whl → 27.12.0py3-none-any.whl