PyPI - sqlglot - Versions diffs - 28.4.1__py3-none-any.whl → 28.8.0__py3-none-any.whl - Mend

sqlglot 28.4.1py3-none-any.whl → 28.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

sqlglot/_version.py +2 -2
sqlglot/dialects/bigquery.py +20 -23
sqlglot/dialects/clickhouse.py +2 -0
sqlglot/dialects/dialect.py +355 -18
sqlglot/dialects/doris.py +38 -90
sqlglot/dialects/druid.py +1 -0
sqlglot/dialects/duckdb.py +1739 -163
sqlglot/dialects/exasol.py +17 -1
sqlglot/dialects/hive.py +27 -2
sqlglot/dialects/mysql.py +103 -11
sqlglot/dialects/oracle.py +38 -1
sqlglot/dialects/postgres.py +142 -33
sqlglot/dialects/presto.py +6 -2
sqlglot/dialects/redshift.py +7 -1
sqlglot/dialects/singlestore.py +13 -3
sqlglot/dialects/snowflake.py +271 -21
sqlglot/dialects/spark.py +25 -0
sqlglot/dialects/spark2.py +4 -3
sqlglot/dialects/starrocks.py +152 -17
sqlglot/dialects/trino.py +1 -0
sqlglot/dialects/tsql.py +5 -0
sqlglot/diff.py +1 -1
sqlglot/expressions.py +239 -47
sqlglot/generator.py +173 -44
sqlglot/optimizer/annotate_types.py +129 -60
sqlglot/optimizer/merge_subqueries.py +13 -2
sqlglot/optimizer/qualify_columns.py +7 -0
sqlglot/optimizer/resolver.py +19 -0
sqlglot/optimizer/scope.py +12 -0
sqlglot/optimizer/unnest_subqueries.py +7 -0
sqlglot/parser.py +251 -58
sqlglot/schema.py +186 -14
sqlglot/tokens.py +36 -6
sqlglot/transforms.py +6 -5
sqlglot/typing/__init__.py +29 -10
sqlglot/typing/bigquery.py +5 -10
sqlglot/typing/duckdb.py +39 -0
sqlglot/typing/hive.py +50 -1
sqlglot/typing/mysql.py +32 -0
sqlglot/typing/presto.py +0 -1
sqlglot/typing/snowflake.py +80 -17
sqlglot/typing/spark.py +29 -0
sqlglot/typing/spark2.py +9 -1
sqlglot/typing/tsql.py +21 -0
{sqlglot-28.4.1.dist-info → sqlglot-28.8.0.dist-info}/METADATA +47 -2
sqlglot-28.8.0.dist-info/RECORD +95 -0
{sqlglot-28.4.1.dist-info → sqlglot-28.8.0.dist-info}/WHEEL +1 -1
sqlglot-28.4.1.dist-info/RECORD +0 -92
{sqlglot-28.4.1.dist-info → sqlglot-28.8.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-28.4.1.dist-info → sqlglot-28.8.0.dist-info}/top_level.txt +0 -0

sqlglot/schema.py CHANGED Viewed

@@ -111,6 +111,25 @@ class Schema(abc.ABC):
         name = column if isinstance(column, str) else column.name
         return name in self.column_names(table, dialect=dialect, normalize=normalize)
+    def get_udf_type(
+        self,
+        udf: exp.Anonymous | str,
+        dialect: DialectType = None,
+        normalize: t.Optional[bool] = None,
+    ) -> exp.DataType:
+        """
+        Get the return type of a UDF.
+        Args:
+            udf: the UDF expression or string.
+            dialect: the SQL dialect for parsing string arguments.
+            normalize: whether to normalize identifiers.
+        Returns:
+            The return type as a DataType, or UNKNOWN if not found.
+        """
+        return exp.DataType.build("unknown")
     @property
     @abc.abstractmethod
     def supported_table_args(self) -> t.Tuple[str, ...]:
@@ -128,11 +147,18 @@ class AbstractMappingSchema:
     def __init__(
         self,
         mapping: t.Optional[t.Dict] = None,
+        udf_mapping: t.Optional[t.Dict] = None,
     ) -> None:
         self.mapping = mapping or {}
         self.mapping_trie = new_trie(
             tuple(reversed(t)) for t in flatten_schema(self.mapping, depth=self.depth())
         )
+        self.udf_mapping = udf_mapping or {}
+        self.udf_trie = new_trie(
+            tuple(reversed(t)) for t in flatten_schema(self.udf_mapping, depth=self.udf_depth())
+        )
         self._supported_table_args: t.Tuple[str, ...] = tuple()
     @property
@@ -142,6 +168,9 @@ class AbstractMappingSchema:
     def depth(self) -> int:
         return dict_depth(self.mapping)
+    def udf_depth(self) -> int:
+        return dict_depth(self.udf_mapping)
     @property
     def supported_table_args(self) -> t.Tuple[str, ...]:
         if not self._supported_table_args and self.mapping:
@@ -157,7 +186,39 @@ class AbstractMappingSchema:
         return self._supported_table_args
     def table_parts(self, table: exp.Table) -> t.List[str]:
-        return [part.name for part in reversed(table.parts)]
+        return [p.name for p in reversed(table.parts)]
+    def udf_parts(self, udf: exp.Anonymous) -> t.List[str]:
+        # a.b.c(...) is represented as Dot(Dot(a, b), Anonymous(c, ...))
+        parent = udf.parent
+        parts = [p.name for p in parent.flatten()] if isinstance(parent, exp.Dot) else [udf.name]
+        return list(reversed(parts))[0 : self.udf_depth()]
+    def _find_in_trie(
+        self,
+        parts: t.List[str],
+        trie: t.Dict,
+        raise_on_missing: bool,
+    ) -> t.Optional[t.List[str]]:
+        value, trie = in_trie(trie, parts)
+        if value == TrieResult.FAILED:
+            return None
+        if value == TrieResult.PREFIX:
+            possibilities = flatten_schema(trie)
+            if len(possibilities) == 1:
+                parts.extend(possibilities[0])
+            else:
+                if raise_on_missing:
+                    joined_parts = ".".join(parts)
+                    message = ", ".join(".".join(p) for p in possibilities)
+                    raise SchemaError(f"Ambiguous mapping for {joined_parts}: {message}.")
+                return None
+        return parts
     def find(
         self, table: exp.Table, raise_on_missing: bool = True, ensure_data_types: bool = False
@@ -174,23 +235,35 @@ class AbstractMappingSchema:
             The schema of the target table.
         """
         parts = self.table_parts(table)[0 : len(self.supported_table_args)]
-        value, trie = in_trie(self.mapping_trie, parts)
+        resolved_parts = self._find_in_trie(parts, self.mapping_trie, raise_on_missing)
-        if value == TrieResult.FAILED:
+        if resolved_parts is None:
             return None
-        if value == TrieResult.PREFIX:
-            possibilities = flatten_schema(trie)
+        return self.nested_get(resolved_parts, raise_on_missing=raise_on_missing)
-            if len(possibilities) == 1:
-                parts.extend(possibilities[0])
-            else:
-                message = ", ".join(".".join(parts) for parts in possibilities)
-                if raise_on_missing:
-                    raise SchemaError(f"Ambiguous mapping for {table}: {message}.")
-                return None
+    def find_udf(self, udf: exp.Anonymous, raise_on_missing: bool = False) -> t.Optional[t.Any]:
+        """
+        Returns the return type of a given UDF.
+        Args:
+            udf: the target UDF expression.
+            raise_on_missing: whether to raise if the UDF is not found.
+        Returns:
+            The return type of the UDF, or None if not found.
+        """
+        parts = self.udf_parts(udf)
+        resolved_parts = self._find_in_trie(parts, self.udf_trie, raise_on_missing)
-        return self.nested_get(parts, raise_on_missing=raise_on_missing)
+        if resolved_parts is None:
+            return None
+        return nested_get(
+            self.udf_mapping,
+            *zip(resolved_parts, reversed(resolved_parts)),
+            raise_on_missing=raise_on_missing,
+        )
     def nested_get(
         self, parts: t.Sequence[str], d: t.Optional[t.Dict] = None, raise_on_missing=True
@@ -227,6 +300,7 @@ class MappingSchema(AbstractMappingSchema, Schema):
         visible: t.Optional[t.Dict] = None,
         dialect: DialectType = None,
         normalize: bool = True,
+        udf_mapping: t.Optional[t.Dict] = None,
     ) -> None:
         self.visible = {} if visible is None else visible
         self.normalize = normalize
@@ -234,8 +308,12 @@ class MappingSchema(AbstractMappingSchema, Schema):
         self._type_mapping_cache: t.Dict[str, exp.DataType] = {}
         self._depth = 0
         schema = {} if schema is None else schema
+        udf_mapping = {} if udf_mapping is None else udf_mapping
-        super().__init__(self._normalize(schema) if self.normalize else schema)
+        super().__init__(
+            self._normalize(schema) if self.normalize else schema,
+            self._normalize_udfs(udf_mapping) if self.normalize else udf_mapping,
+        )
     @property
     def dialect(self) -> Dialect:
@@ -249,6 +327,7 @@ class MappingSchema(AbstractMappingSchema, Schema):
             visible=mapping_schema.visible,
             dialect=mapping_schema.dialect,
             normalize=mapping_schema.normalize,
+            udf_mapping=mapping_schema.udf_mapping,
         )
     def find(
@@ -272,6 +351,7 @@ class MappingSchema(AbstractMappingSchema, Schema):
                 "visible": self.visible.copy(),
                 "dialect": self.dialect,
                 "normalize": self.normalize,
+                "udf_mapping": self.udf_mapping.copy(),
                 **kwargs,
             }
         )
@@ -360,6 +440,42 @@ class MappingSchema(AbstractMappingSchema, Schema):
         return exp.DataType.build("unknown")
+    def get_udf_type(
+        self,
+        udf: exp.Anonymous | str,
+        dialect: DialectType = None,
+        normalize: t.Optional[bool] = None,
+    ) -> exp.DataType:
+        """
+        Get the return type of a UDF.
+        Args:
+            udf: the UDF expression or string (e.g., "db.my_func()").
+            dialect: the SQL dialect for parsing string arguments.
+            normalize: whether to normalize identifiers.
+        Returns:
+            The return type as a DataType, or UNKNOWN if not found.
+        """
+        parts = self._normalize_udf(udf, dialect=dialect, normalize=normalize)
+        resolved_parts = self._find_in_trie(parts, self.udf_trie, raise_on_missing=False)
+        if resolved_parts is None:
+            return exp.DataType.build("unknown")
+        udf_type = nested_get(
+            self.udf_mapping,
+            *zip(resolved_parts, reversed(resolved_parts)),
+            raise_on_missing=False,
+        )
+        if isinstance(udf_type, exp.DataType):
+            return udf_type
+        elif isinstance(udf_type, str):
+            return self._to_data_type(udf_type, dialect=dialect)
+        return exp.DataType.build("unknown")
     def has_column(
         self,
         table: exp.Table | str,
@@ -414,6 +530,61 @@ class MappingSchema(AbstractMappingSchema, Schema):
         return normalized_mapping
+    def _normalize_udfs(self, udfs: t.Dict) -> t.Dict:
+        """
+        Normalizes all identifiers in the UDF mapping.
+        Args:
+            udfs: the UDF mapping to normalize.
+        Returns:
+            The normalized UDF mapping.
+        """
+        normalized_mapping: t.Dict = {}
+        for keys in flatten_schema(udfs, depth=dict_depth(udfs)):
+            udf_type = nested_get(udfs, *zip(keys, keys))
+            normalized_keys = [self._normalize_name(key, is_table=True) for key in keys]
+            nested_set(normalized_mapping, normalized_keys, udf_type)
+        return normalized_mapping
+    def _normalize_udf(
+        self,
+        udf: exp.Anonymous | str,
+        dialect: DialectType = None,
+        normalize: t.Optional[bool] = None,
+    ) -> t.List[str]:
+        """
+        Extract and normalize UDF parts for lookup.
+        Args:
+            udf: the UDF expression or qualified string (e.g., "db.my_func()").
+            dialect: the SQL dialect for parsing.
+            normalize: whether to normalize identifiers.
+        Returns:
+            A list of normalized UDF parts (reversed for trie lookup).
+        """
+        dialect = dialect or self.dialect
+        normalize = self.normalize if normalize is None else normalize
+        if isinstance(udf, str):
+            parsed: exp.Expression = exp.maybe_parse(udf, dialect=dialect)
+            if isinstance(parsed, exp.Anonymous):
+                udf = parsed
+            elif isinstance(parsed, exp.Dot) and isinstance(parsed.expression, exp.Anonymous):
+                udf = parsed.expression
+            else:
+                raise SchemaError(f"Unable to parse UDF from: {udf!r}")
+        parts = self.udf_parts(udf)
+        if normalize:
+            parts = [self._normalize_name(part, dialect=dialect, is_table=True) for part in parts]
+        return parts
     def _normalize_table(
         self,
         table: exp.Table | str,
@@ -471,6 +642,7 @@ class MappingSchema(AbstractMappingSchema, Schema):
             try:
                 expression = exp.DataType.build(schema_type, dialect=dialect, udt=udt)
+                expression.transform(dialect.normalize_identifier, copy=False)
                 self._type_mapping_cache[schema_type] = expression
             except AttributeError:
                 in_dialect = f" in dialect {dialect}" if dialect else ""

sqlglot/tokens.py CHANGED Viewed

@@ -68,7 +68,7 @@ class TokenType(AutoName):
     DPIPE_SLASH = auto()
     CARET = auto()
     CARET_AT = auto()
-    TILDA = auto()
+    TILDE = auto()
     ARROW = auto()
     DARROW = auto()
     FARROW = auto()
@@ -87,6 +87,7 @@ class TokenType(AutoName):
     DAMP = auto()
     AMP_LT = auto()
     AMP_GT = auto()
+    ADJACENT = auto()
     XOR = auto()
     DSTAR = auto()
     QMARK_AMP = auto()
@@ -207,6 +208,7 @@ class TokenType(AutoName):
     LINESTRING = auto()
     LOCALTIME = auto()
     LOCALTIMESTAMP = auto()
+    SYSTIMESTAMP = auto()
     MULTILINESTRING = auto()
     POLYGON = auto()
     MULTIPOLYGON = auto()
@@ -370,6 +372,8 @@ class TokenType(AutoName):
     ORDER_SIBLINGS_BY = auto()
     ORDERED = auto()
     ORDINALITY = auto()
+    OUT = auto()
+    INOUT = auto()
     OUTER = auto()
     OVER = auto()
     OVERLAPS = auto()
@@ -436,6 +440,7 @@ class TokenType(AutoName):
     USE = auto()
     USING = auto()
     VALUES = auto()
+    VARIADIC = auto()
     VIEW = auto()
     SEMANTIC_VIEW = auto()
     VOLATILE = auto()
@@ -552,7 +557,11 @@ class _Tokenizer(type):
             **_quotes_to_format(TokenType.UNICODE_STRING, klass.UNICODE_STRINGS),
         }
+        if "BYTE_STRING_ESCAPES" not in klass.__dict__:
+            klass.BYTE_STRING_ESCAPES = klass.STRING_ESCAPES.copy()
         klass._STRING_ESCAPES = set(klass.STRING_ESCAPES)
+        klass._BYTE_STRING_ESCAPES = set(klass.BYTE_STRING_ESCAPES)
         klass._ESCAPE_FOLLOW_CHARS = set(klass.ESCAPE_FOLLOW_CHARS)
         klass._IDENTIFIER_ESCAPES = set(klass.IDENTIFIER_ESCAPES)
         klass._COMMENTS = {
@@ -585,6 +594,7 @@ class _Tokenizer(type):
                 identifiers=klass._IDENTIFIERS,
                 identifier_escapes=klass._IDENTIFIER_ESCAPES,
                 string_escapes=klass._STRING_ESCAPES,
+                byte_string_escapes=klass._BYTE_STRING_ESCAPES,
                 quotes=klass._QUOTES,
                 format_strings={
                     k: (v1, _TOKEN_TYPE_TO_INDEX[v2])
@@ -609,6 +619,7 @@ class _Tokenizer(type):
             )
             token_types = RsTokenTypeSettings(
                 bit_string=_TOKEN_TYPE_TO_INDEX[TokenType.BIT_STRING],
+                byte_string=_TOKEN_TYPE_TO_INDEX[TokenType.BYTE_STRING],
                 break_=_TOKEN_TYPE_TO_INDEX[TokenType.BREAK],
                 dcolon=_TOKEN_TYPE_TO_INDEX[TokenType.DCOLON],
                 heredoc_string=_TOKEN_TYPE_TO_INDEX[TokenType.HEREDOC_STRING],
@@ -655,7 +666,7 @@ class Tokenizer(metaclass=_Tokenizer):
         "/": TokenType.SLASH,
         "\\": TokenType.BACKSLASH,
         "*": TokenType.STAR,
-        "~": TokenType.TILDA,
+        "~": TokenType.TILDE,
         "?": TokenType.PLACEHOLDER,
         "@": TokenType.PARAMETER,
         "#": TokenType.HASH,
@@ -674,6 +685,7 @@ class Tokenizer(metaclass=_Tokenizer):
     IDENTIFIERS: t.List[str | t.Tuple[str, str]] = ['"']
     QUOTES: t.List[t.Tuple[str, str] | str] = ["'"]
     STRING_ESCAPES = ["'"]
+    BYTE_STRING_ESCAPES: t.List[str] = []
     VAR_SINGLE_TOKENS: t.Set[str] = set()
     ESCAPE_FOLLOW_CHARS: t.List[str] = []
@@ -704,6 +716,7 @@ class Tokenizer(metaclass=_Tokenizer):
     _IDENTIFIER_ESCAPES: t.Set[str] = set()
     _QUOTES: t.Dict[str, str] = {}
     _STRING_ESCAPES: t.Set[str] = set()
+    _BYTE_STRING_ESCAPES: t.Set[str] = set()
     _KEYWORD_TRIE: t.Dict = {}
     _RS_TOKENIZER: t.Optional[t.Any] = None
     _ESCAPE_FOLLOW_CHARS: t.Set[str] = set()
@@ -714,6 +727,8 @@ class Tokenizer(metaclass=_Tokenizer):
         **{f"{{{{{postfix}": TokenType.BLOCK_START for postfix in ("+", "-")},
         **{f"{prefix}}}}}": TokenType.BLOCK_END for prefix in ("+", "-")},
         HINT_START: TokenType.HINT,
+        "&<": TokenType.AMP_LT,
+        "&>": TokenType.AMP_GT,
         "==": TokenType.EQ,
         "::": TokenType.DCOLON,
         "?::": TokenType.QDCOLON,
@@ -737,6 +752,7 @@ class Tokenizer(metaclass=_Tokenizer):
         "~~": TokenType.LIKE,
         "~~*": TokenType.ILIKE,
         "~*": TokenType.IRLIKE,
+        "-|-": TokenType.ADJACENT,
         "ALL": TokenType.ALL,
         "AND": TokenType.AND,
         "ANTI": TokenType.ANTI,
@@ -837,6 +853,7 @@ class Tokenizer(metaclass=_Tokenizer):
         "XOR": TokenType.XOR,
         "ORDER BY": TokenType.ORDER_BY,
         "ORDINALITY": TokenType.ORDINALITY,
+        "OUT": TokenType.OUT,
         "OUTER": TokenType.OUTER,
         "OVER": TokenType.OVER,
         "OVERLAPS": TokenType.OVERLAPS,
@@ -850,6 +867,7 @@ class Tokenizer(metaclass=_Tokenizer):
         "PRAGMA": TokenType.PRAGMA,
         "PRIMARY KEY": TokenType.PRIMARY_KEY,
         "PROCEDURE": TokenType.PROCEDURE,
+        "OPERATOR": TokenType.OPERATOR,
         "QUALIFY": TokenType.QUALIFY,
         "RANGE": TokenType.RANGE,
         "RECURSIVE": TokenType.RECURSIVE,
@@ -1363,8 +1381,12 @@ class Tokenizer(metaclass=_Tokenizer):
                 decimal = True
                 self._advance()
             elif self._peek in ("-", "+") and scientific == 1:
-                scientific += 1
-                self._advance()
+                # Only consume +/- if followed by a digit
+                if self._current + 1 < self.size and self.sql[self._current + 1].isdigit():
+                    scientific += 1
+                    self._advance()
+                else:
+                    return self._add(TokenType.NUMBER)
             elif self._peek.upper() == "E" and not scientific:
                 scientific += 1
                 self._advance()
@@ -1464,7 +1486,15 @@ class Tokenizer(metaclass=_Tokenizer):
             return False
         self._advance(len(start))
-        text = self._extract_string(end, raw_string=token_type == TokenType.RAW_STRING)
+        text = self._extract_string(
+            end,
+            escapes=(
+                self._BYTE_STRING_ESCAPES
+                if token_type == TokenType.BYTE_STRING
+                else self._STRING_ESCAPES
+            ),
+            raw_string=token_type == TokenType.RAW_STRING,
+        )
         if base and text:
             try:
@@ -1514,7 +1544,7 @@ class Tokenizer(metaclass=_Tokenizer):
                 not raw_string
                 and self.dialect.UNESCAPED_SEQUENCES
                 and self._peek
-                and self._char in self.STRING_ESCAPES
+                and self._char in escapes
             ):
                 unescaped_sequence = self.dialect.UNESCAPED_SEQUENCES.get(self._char + self._peek)
                 if unescaped_sequence:

sqlglot/transforms.py CHANGED Viewed

@@ -1042,12 +1042,13 @@ def inherit_struct_field_names(expression: exp.Expression) -> exp.Expression:
             new_expressions = []
             for i, expr in enumerate(struct.expressions):
                 if not isinstance(expr, exp.PropertyEQ):
-                    # Create PropertyEQ: field_name := value
-                    new_expressions.append(
-                        exp.PropertyEQ(
-                            this=exp.Identifier(this=field_names[i].copy()), expression=expr
-                        )
+                    # Create PropertyEQ: field_name := value, preserving the type from the inner expression
+                    property_eq = exp.PropertyEQ(
+                        this=exp.Identifier(this=field_names[i].copy()),
+                        expression=expr,
                     )
+                    property_eq.type = expr.type
+                    new_expressions.append(property_eq)
                 else:
                     new_expressions.append(expr)

sqlglot/typing/__init__.py CHANGED Viewed

@@ -30,7 +30,6 @@ EXPRESSION_METADATA: ExpressionMetadataType = {
             exp.ArraySize,
             exp.CountIf,
             exp.Int64,
-            exp.Length,
             exp.UnixDate,
             exp.UnixSeconds,
             exp.UnixMicros,
@@ -47,11 +46,16 @@ EXPRESSION_METADATA: ExpressionMetadataType = {
     **{
         expr_type: {"returns": exp.DataType.Type.BOOLEAN}
         for expr_type in {
+            exp.All,
+            exp.Any,
             exp.Between,
             exp.Boolean,
             exp.Contains,
             exp.EndsWith,
+            exp.Exists,
             exp.In,
+            exp.IsInf,
+            exp.IsNan,
             exp.LogicalAnd,
             exp.LogicalOr,
             exp.RegexpLike,
@@ -86,7 +90,9 @@ EXPRESSION_METADATA: ExpressionMetadataType = {
         for expr_type in {
             exp.ApproxQuantile,
             exp.Avg,
+            exp.Cbrt,
             exp.Exp,
+            exp.Kurtosis,
             exp.Ln,
             exp.Log,
             exp.Pi,
@@ -109,16 +115,20 @@ EXPRESSION_METADATA: ExpressionMetadataType = {
         expr_type: {"returns": exp.DataType.Type.INT}
         for expr_type in {
             exp.Ascii,
+            exp.BitLength,
             exp.Ceil,
             exp.DatetimeDiff,
+            exp.Getbit,
             exp.TimestampDiff,
             exp.TimeDiff,
             exp.Unicode,
             exp.DateToDi,
             exp.Levenshtein,
+            exp.Length,
             exp.Sign,
             exp.StrPosition,
             exp.TsOrDiToDi,
+            exp.Quarter,
         }
     },
     **{
@@ -141,6 +151,7 @@ EXPRESSION_METADATA: ExpressionMetadataType = {
         expr_type: {"returns": exp.DataType.Type.TIME}
         for expr_type in {
             exp.CurrentTime,
+            exp.Localtime,
             exp.Time,
             exp.TimeAdd,
             exp.TimeSub,
@@ -169,7 +180,6 @@ EXPRESSION_METADATA: ExpressionMetadataType = {
             exp.DayOfWeekIso,
             exp.DayOfYear,
             exp.Month,
-            exp.Quarter,
             exp.Week,
             exp.WeekOfYear,
             exp.Year,
@@ -184,11 +194,14 @@ EXPRESSION_METADATA: ExpressionMetadataType = {
             exp.Concat,
             exp.ConcatWs,
             exp.Chr,
+            exp.Dayname,
             exp.DateToDateStr,
             exp.DPipe,
             exp.GroupConcat,
             exp.Initcap,
             exp.Lower,
+            exp.SHA,
+            exp.SHA2,
             exp.Substring,
             exp.String,
             exp.TimeToStr,
@@ -200,6 +213,8 @@ EXPRESSION_METADATA: ExpressionMetadataType = {
             exp.UnixToStr,
             exp.UnixToTimeStr,
             exp.Upper,
+            exp.RawString,
+            exp.Space,
         }
     },
     **{
@@ -237,13 +252,7 @@ EXPRESSION_METADATA: ExpressionMetadataType = {
             exp.ArrayLast,
         }
     },
-    **{
-        expr_type: {"returns": exp.DataType.Type.UNKNOWN}
-        for expr_type in {
-            exp.Anonymous,
-            exp.Slice,
-        }
-    },
+    exp.Anonymous: {"annotator": lambda self, e: self._set_type(e, self.schema.get_udf_type(e))},
     **{
         expr_type: {"annotator": lambda self, e: self._annotate_timeunit(e)}
         for expr_type in {
@@ -269,7 +278,11 @@ EXPRESSION_METADATA: ExpressionMetadataType = {
     exp.Array: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions", array=True)},
     exp.ArrayAgg: {"annotator": lambda self, e: self._annotate_by_args(e, "this", array=True)},
     exp.Bracket: {"annotator": lambda self, e: self._annotate_bracket(e)},
-    exp.Case: {"annotator": lambda self, e: self._annotate_by_args(e, "default", "ifs")},
+    exp.Case: {
+        "annotator": lambda self, e: self._annotate_by_args(
+            e, *[if_expr.args["true"] for if_expr in e.args["ifs"]], "default"
+        )
+    },
     exp.Count: {
         "annotator": lambda self, e: self._set_type(
             e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT
@@ -286,6 +299,12 @@ EXPRESSION_METADATA: ExpressionMetadataType = {
     exp.Dot: {"annotator": lambda self, e: self._annotate_dot(e)},
     exp.Explode: {"annotator": lambda self, e: self._annotate_explode(e)},
     exp.Extract: {"annotator": lambda self, e: self._annotate_extract(e)},
+    exp.HexString: {
+        "annotator": lambda self, e: self._set_type(
+            e,
+            exp.DataType.Type.BIGINT if e.args.get("is_integer") else exp.DataType.Type.BINARY,
+        )
+    },
     exp.GenerateSeries: {
         "annotator": lambda self, e: self._annotate_by_args(e, "start", "end", "step", array=True)
     },

sqlglot/typing/bigquery.py CHANGED Viewed

@@ -163,9 +163,9 @@ EXPRESSION_METADATA = {
     **{
         expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")}
         for expr_type in {
-            exp.Abs,
             exp.ArgMax,
             exp.ArgMin,
+            exp.DateAdd,
             exp.DateTrunc,
             exp.DatetimeTrunc,
             exp.FirstValue,
@@ -175,6 +175,7 @@ EXPRESSION_METADATA = {
             exp.Lead,
             exp.Left,
             exp.Lower,
+            exp.NetFunc,
             exp.NthValue,
             exp.Pad,
             exp.PercentileDisc,
@@ -185,6 +186,7 @@ EXPRESSION_METADATA = {
             exp.RespectNulls,
             exp.Reverse,
             exp.Right,
+            exp.SafeFunc,
             exp.SafeNegate,
             exp.Sign,
             exp.Substring,
@@ -197,7 +199,6 @@ EXPRESSION_METADATA = {
     **{
         expr_type: {"returns": exp.DataType.Type.BIGINT}
         for expr_type in {
-            exp.Ascii,
             exp.BitwiseAndAgg,
             exp.BitwiseCount,
             exp.BitwiseOrAgg,
@@ -213,7 +214,6 @@ EXPRESSION_METADATA = {
             exp.RangeBucket,
             exp.RegexpInstr,
             exp.RowNumber,
-            exp.Unicode,
         }
     },
     **{
@@ -232,8 +232,6 @@ EXPRESSION_METADATA = {
     **{
         expr_type: {"returns": exp.DataType.Type.BOOLEAN}
         for expr_type in {
-            exp.IsInf,
-            exp.IsNan,
             exp.JSONBool,
             exp.LaxBool,
         }
@@ -255,7 +253,6 @@ EXPRESSION_METADATA = {
             exp.Atan,
             exp.Atan2,
             exp.Atanh,
-            exp.Cbrt,
             exp.Corr,
             exp.CosineDistance,
             exp.Cot,
@@ -302,13 +299,14 @@ EXPRESSION_METADATA = {
         for expr_type in {
             exp.CodePointsToString,
             exp.Format,
+            exp.Host,
             exp.JSONExtractScalar,
             exp.JSONType,
             exp.LaxString,
             exp.LowerHex,
             exp.MD5,
-            exp.NetHost,
             exp.Normalize,
+            exp.RegDomain,
             exp.SafeConvertBytesToString,
             exp.Soundex,
             exp.Uuid,
@@ -339,9 +337,6 @@ EXPRESSION_METADATA = {
     exp.ApproxTopK: {"annotator": lambda self, e: _annotate_by_args_approx_top(self, e)},
     exp.ApproxTopSum: {"annotator": lambda self, e: _annotate_by_args_approx_top(self, e)},
     exp.Array: {"annotator": _annotate_array},
-    exp.ArrayConcat: {
-        "annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions")
-    },
     exp.Concat: {"annotator": _annotate_concat},
     exp.DateFromUnixDate: {"returns": exp.DataType.Type.DATE},
     exp.GenerateTimestampArray: {

sqlglot 28.4.1__py3-none-any.whl → 28.8.0__py3-none-any.whl

sqlglot 28.4.1py3-none-any.whl → 28.8.0py3-none-any.whl