PyPI - sqlglot - Versions diffs - 27.27.0__py3-none-any.whl → 28.4.0__py3-none-any.whl - Mend

sqlglot 27.27.0py3-none-any.whl → 28.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

sqlglot/__init__.py +1 -0
sqlglot/__main__.py +6 -4
sqlglot/_version.py +2 -2
sqlglot/dialects/bigquery.py +118 -279
sqlglot/dialects/clickhouse.py +73 -5
sqlglot/dialects/databricks.py +38 -1
sqlglot/dialects/dialect.py +354 -275
sqlglot/dialects/dremio.py +4 -1
sqlglot/dialects/duckdb.py +754 -25
sqlglot/dialects/exasol.py +243 -10
sqlglot/dialects/hive.py +8 -8
sqlglot/dialects/mysql.py +14 -4
sqlglot/dialects/oracle.py +29 -0
sqlglot/dialects/postgres.py +60 -26
sqlglot/dialects/presto.py +47 -16
sqlglot/dialects/redshift.py +16 -0
sqlglot/dialects/risingwave.py +3 -0
sqlglot/dialects/singlestore.py +12 -3
sqlglot/dialects/snowflake.py +239 -218
sqlglot/dialects/spark.py +15 -4
sqlglot/dialects/spark2.py +11 -48
sqlglot/dialects/sqlite.py +10 -0
sqlglot/dialects/starrocks.py +3 -0
sqlglot/dialects/teradata.py +5 -8
sqlglot/dialects/trino.py +6 -0
sqlglot/dialects/tsql.py +61 -22
sqlglot/diff.py +4 -2
sqlglot/errors.py +69 -0
sqlglot/executor/__init__.py +5 -10
sqlglot/executor/python.py +1 -29
sqlglot/expressions.py +637 -100
sqlglot/generator.py +160 -43
sqlglot/helper.py +2 -44
sqlglot/lineage.py +10 -4
sqlglot/optimizer/annotate_types.py +247 -140
sqlglot/optimizer/canonicalize.py +6 -1
sqlglot/optimizer/eliminate_joins.py +1 -1
sqlglot/optimizer/eliminate_subqueries.py +2 -2
sqlglot/optimizer/merge_subqueries.py +5 -5
sqlglot/optimizer/normalize.py +20 -13
sqlglot/optimizer/normalize_identifiers.py +17 -3
sqlglot/optimizer/optimizer.py +4 -0
sqlglot/optimizer/pushdown_predicates.py +1 -1
sqlglot/optimizer/qualify.py +18 -10
sqlglot/optimizer/qualify_columns.py +122 -275
sqlglot/optimizer/qualify_tables.py +128 -76
sqlglot/optimizer/resolver.py +374 -0
sqlglot/optimizer/scope.py +27 -16
sqlglot/optimizer/simplify.py +1075 -959
sqlglot/optimizer/unnest_subqueries.py +12 -2
sqlglot/parser.py +296 -170
sqlglot/planner.py +2 -2
sqlglot/schema.py +15 -4
sqlglot/tokens.py +42 -7
sqlglot/transforms.py +77 -22
sqlglot/typing/__init__.py +316 -0
sqlglot/typing/bigquery.py +376 -0
sqlglot/typing/hive.py +12 -0
sqlglot/typing/presto.py +24 -0
sqlglot/typing/snowflake.py +505 -0
sqlglot/typing/spark2.py +58 -0
sqlglot/typing/tsql.py +9 -0
{sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
sqlglot-28.4.0.dist-info/RECORD +92 -0
sqlglot-27.27.0.dist-info/RECORD +0 -84
{sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
{sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0

sqlglot/__init__.py CHANGED Viewed

@@ -29,6 +29,7 @@ from sqlglot.expressions import (
     condition as condition,
     delete as delete,
     except_ as except_,
+    find_tables as find_tables,
     from_ as from_,
     func as func,
     insert as insert,

sqlglot/__main__.py CHANGED Viewed

@@ -5,6 +5,7 @@ import sys
 import typing as t
 import sqlglot
+from sqlglot.helper import to_bool
 parser = argparse.ArgumentParser(description="Transpile SQL")
 parser.add_argument(
@@ -28,10 +29,11 @@ parser.add_argument(
     help="Dialect to write default is generic",
 )
 parser.add_argument(
-    "--no-identify",
+    "--identify",
     dest="identify",
-    action="store_false",
-    help="Don't auto identify fields",
+    type=str,
+    default="safe",
+    help="Whether to quote identifiers (safe, true, false)",
 )
 parser.add_argument(
     "--no-pretty",
@@ -87,7 +89,7 @@ else:
         sql,
         read=args.read,
         write=args.write,
-        identify=args.identify,
+        identify="safe" if args.identify == "safe" else to_bool(args.identify),
         pretty=args.pretty,
         error_level=error_level,
     )

sqlglot/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '27.27.0'
-__version_tuple__ = version_tuple = (27, 27, 0)
+__version__ = version = '28.4.0'
+__version_tuple__ = version_tuple = (28, 4, 0)
 __commit_id__ = commit_id = None

sqlglot/dialects/bigquery.py CHANGED Viewed

@@ -12,7 +12,6 @@ from sqlglot._typing import E
 from sqlglot.dialects.dialect import (
     Dialect,
     NormalizationStrategy,
-    annotate_with_type_lambda,
     arg_max_or_min_no_count,
     binary_from_function,
     date_add_interval_sql,
@@ -33,10 +32,12 @@ from sqlglot.dialects.dialect import (
     unit_to_var,
     strposition_sql,
     groupconcat_sql,
+    sha2_digest_sql,
 )
+from sqlglot.generator import unsupported_args
 from sqlglot.helper import seq_get, split_num_words
 from sqlglot.tokens import TokenType
-from sqlglot.generator import unsupported_args
+from sqlglot.typing.bigquery import EXPRESSION_METADATA
 if t.TYPE_CHECKING:
     from sqlglot._typing import Lit
@@ -241,10 +242,28 @@ def _build_datetime(args: t.List) -> exp.Func:
     return exp.TimestampFromParts.from_arg_list(args)
+def build_date_diff(args: t.List) -> exp.Expression:
+    expr = exp.DateDiff(
+        this=seq_get(args, 0),
+        expression=seq_get(args, 1),
+        unit=seq_get(args, 2),
+        date_part_boundary=True,
+    )
+    # Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST to facilitate transpilation
+    # This is done post exp.DateDiff construction since the TimeUnit mixin performs canonicalizations in its constructor too
+    unit = expr.args.get("unit")
+    if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK":
+        expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY")))
+    return expr
 def _build_regexp_extract(
     expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None
-) -> t.Callable[[t.List], E]:
-    def _builder(args: t.List) -> E:
+) -> t.Callable[[t.List, BigQuery], E]:
+    def _builder(args: t.List, dialect: BigQuery) -> E:
         try:
             group = re.compile(args[1].name).groups == 1
         except re.error:
@@ -257,6 +276,11 @@ def _build_regexp_extract(
             position=seq_get(args, 2),
             occurrence=seq_get(args, 3),
             group=exp.Literal.number(1) if group else default_group,
+            **(
+                {"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL}
+                if expr_type is exp.RegexpExtract
+                else {}
+            ),
         )
     return _builder
@@ -290,59 +314,6 @@ def _str_to_datetime_sql(
     return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone"))
-def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
-    """
-    Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention:
-    +---------+---------+---------+------------+---------+
-    |  INPUT  | INT64   | NUMERIC | BIGNUMERIC | FLOAT64 |
-    +---------+---------+---------+------------+---------+
-    |  OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
-    +---------+---------+---------+------------+---------+
-    """
-    self._annotate_args(expression)
-    this: exp.Expression = expression.this
-    self._set_type(
-        expression,
-        exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type,
-    )
-    return expression
-def _annotate_by_args_with_coerce(self: TypeAnnotator, expression: E) -> E:
-    """
-    +------------+------------+------------+-------------+---------+
-    | INPUT      | INT64      | NUMERIC    | BIGNUMERIC  | FLOAT64 |
-    +------------+------------+------------+-------------+---------+
-    | INT64      | INT64      | NUMERIC    | BIGNUMERIC  | FLOAT64 |
-    | NUMERIC    | NUMERIC    | NUMERIC    | BIGNUMERIC  | FLOAT64 |
-    | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC  | FLOAT64 |
-    | FLOAT64    | FLOAT64    | FLOAT64    | FLOAT64     | FLOAT64 |
-    +------------+------------+------------+-------------+---------+
-    """
-    self._annotate_args(expression)
-    self._set_type(expression, self._maybe_coerce(expression.this.type, expression.expression.type))
-    return expression
-def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
-    self._annotate_args(expression)
-    struct_type = exp.DataType(
-        this=exp.DataType.Type.STRUCT,
-        expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
-        nested=True,
-    )
-    self._set_type(
-        expression,
-        exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
-    )
-    return expression
 @unsupported_args("ins_cost", "del_cost", "sub_cost")
 def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
     max_dist = expression.args.get("max_dist")
@@ -363,11 +334,11 @@ def _build_levenshtein(args: t.List) -> exp.Levenshtein:
 def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]:
     def _builder(args: t.List) -> exp.TimeToStr:
-        return exp.TimeToStr(
-            this=expr_type(this=seq_get(args, 1)),
-            format=seq_get(args, 0),
-            zone=seq_get(args, 2),
+        formatted_time = build_formatted_time(exp.TimeToStr, "bigquery")(
+            [expr_type(this=seq_get(args, 1)), seq_get(args, 0)]
         )
+        formatted_time.set("zone", seq_get(args, 2))
+        return formatted_time
     return _builder
@@ -398,44 +369,6 @@ def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -
     return sql
-def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat:
-    annotated = self._annotate_by_args(expression, "expressions")
-    # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING
-    # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat
-    if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN):
-        annotated.type = exp.DataType.Type.VARCHAR
-    return annotated
-def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array:
-    array_args = expression.expressions
-    # BigQuery behaves as follows:
-    #
-    # SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t            -- foo, STRUCT<STRING>
-    # SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING>
-    if (
-        len(array_args) == 1
-        and isinstance(select := array_args[0].unnest(), exp.Select)
-        and (query_type := select.meta.get("query_type")) is not None
-        and query_type.is_type(exp.DataType.Type.STRUCT)
-        and len(query_type.expressions) == 1
-        and isinstance(col_def := query_type.expressions[0], exp.ColumnDef)
-        and (projection_type := col_def.kind) is not None
-        and not projection_type.is_type(exp.DataType.Type.UNKNOWN)
-    ):
-        array_type = exp.DataType(
-            this=exp.DataType.Type.ARRAY,
-            expressions=[projection_type.copy()],
-            nested=True,
-        )
-        return self._annotate_with_type(expression, array_type)
-    return self._annotate_by_args(expression, "expressions", array=True)
 class BigQuery(Dialect):
     WEEK_OFFSET = -1
     UNNEST_COLUMN_ONLY = True
@@ -444,8 +377,22 @@ class BigQuery(Dialect):
     LOG_BASE_FIRST = False
     HEX_LOWERCASE = True
     FORCE_EARLY_ALIAS_REF_EXPANSION = True
+    EXPAND_ONLY_GROUP_ALIAS_REF = True
     PRESERVE_ORIGINAL_NAMES = True
     HEX_STRING_IS_INTEGER_TYPE = True
+    BYTE_STRING_IS_BYTES_TYPE = True
+    UUID_IS_STRING_TYPE = True
+    ANNOTATE_ALL_SCOPES = True
+    PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = True
+    TABLES_REFERENCEABLE_AS_COLUMNS = True
+    SUPPORTS_STRUCT_STAR_EXPANSION = True
+    EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True
+    QUERY_RESULTS_ARE_STRUCTS = True
+    JSON_EXTRACT_SCALAR_SCALAR_ONLY = True
+    DEFAULT_NULL_TYPE = exp.DataType.Type.BIGINT
+    # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap
+    INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-'
     # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity
     NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
@@ -455,9 +402,19 @@ class BigQuery(Dialect):
     # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time
     TIME_MAPPING = {
+        "%x": "%m/%d/%y",
         "%D": "%m/%d/%y",
         "%E6S": "%S.%f",
         "%e": "%-d",
+        "%F": "%Y-%m-%d",
+        "%T": "%H:%M:%S",
+        "%c": "%a %b %e %H:%M:%S %Y",
+    }
+    INVERSE_TIME_MAPPING = {
+        # Preserve %E6S instead of expanding to %T.%f - since both %E6S & %T.%f are semantically different in BigQuery
+        # %E6S is semantically different from %T.%f: %E6S works as a single atomic specifier for seconds with microseconds, while %T.%f expands incorrectly and fails to parse.
+        "%H:%M:%S.%f": "%H:%M:%E6S",
     }
     FORMAT_MAPPING = {
@@ -480,7 +437,13 @@ class BigQuery(Dialect):
     # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table
     # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix
     # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column
-    PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE", "_TABLE_SUFFIX", "_FILE_NAME"}
+    PSEUDOCOLUMNS = {
+        "_PARTITIONTIME",
+        "_PARTITIONDATE",
+        "_TABLE_SUFFIX",
+        "_FILE_NAME",
+        "_DBT_MAX_PARTITION",
+    }
     # All set operations require either a DISTINCT or ALL specifier
     SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None)
@@ -492,182 +455,16 @@ class BigQuery(Dialect):
     }
     COERCES_TO[exp.DataType.Type.DECIMAL] |= {exp.DataType.Type.BIGDECIMAL}
     COERCES_TO[exp.DataType.Type.BIGINT] |= {exp.DataType.Type.BIGDECIMAL}
-    # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types
-    TYPE_TO_EXPRESSIONS = {
-        **Dialect.TYPE_TO_EXPRESSIONS,
-        exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP],
-    }
-    TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP)
-    ANNOTATORS = {
-        **Dialect.ANNOTATORS,
-        **{
-            expr_type: annotate_with_type_lambda(data_type)
-            for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
-            for expr_type in expressions
-        },
-        **{
-            expr_type: lambda self, e: _annotate_math_functions(self, e)
-            for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round)
-        },
-        **{
-            expr_type: lambda self, e: self._annotate_by_args(e, "this")
-            for expr_type in (
-                exp.Abs,
-                exp.ArgMax,
-                exp.ArgMin,
-                exp.DateTrunc,
-                exp.DatetimeTrunc,
-                exp.FirstValue,
-                exp.GroupConcat,
-                exp.IgnoreNulls,
-                exp.JSONExtract,
-                exp.Lead,
-                exp.Left,
-                exp.Lower,
-                exp.NthValue,
-                exp.Pad,
-                exp.PercentileDisc,
-                exp.RegexpExtract,
-                exp.RegexpReplace,
-                exp.Repeat,
-                exp.Replace,
-                exp.RespectNulls,
-                exp.Reverse,
-                exp.Right,
-                exp.SafeNegate,
-                exp.Sign,
-                exp.Substring,
-                exp.TimestampTrunc,
-                exp.Translate,
-                exp.Trim,
-                exp.Upper,
-            )
-        },
-        exp.Acos: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Acosh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Asin: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Asinh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Atan: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Atanh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Atan2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
-        exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
-        exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
-        exp.Array: _annotate_array,
-        exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
-        exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
-        exp.Cbrt: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.CodePointsToBytes: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.Type.BINARY
-        ),
-        exp.CodePointsToString: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.Type.VARCHAR
-        ),
-        exp.Concat: _annotate_concat,
-        exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Cot: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.CosineDistance: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Coth: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Csc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Csch: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.CumeDist: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
-        exp.DenseRank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.EuclideanDistance: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.Type.DOUBLE
-        ),
-        exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
-        exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Format: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
-        exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
-        ),
-        exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.IsInf: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
-        exp.IsNan: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
-        exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
-        exp.JSONArrayAppend: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
-        exp.JSONArrayInsert: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
-        exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
-        exp.JSONExtractScalar: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.Type.VARCHAR
-        ),
-        exp.JSONExtractArray: lambda self, e: self._annotate_by_args(e, "this", array=True),
-        exp.JSONFormat: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.Type.JSON if e.args.get("to_json") else exp.DataType.Type.VARCHAR
-        ),
-        exp.JSONKeysAtDepth: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
-        ),
-        exp.JSONObject: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
-        exp.JSONRemove: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
-        exp.JSONSet: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
-        exp.JSONStripNulls: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
-        exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
-        exp.JSONValueArray: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
-        ),
-        exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
-        exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
-        exp.LaxBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
-        exp.LaxFloat64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.LaxInt64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.LaxString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
-        exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
-        exp.Normalize: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
-        exp.Ntile: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
-        exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
-        exp.ParseBignumeric: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.Type.BIGDECIMAL
-        ),
-        exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL),
-        exp.PercentileCont: lambda self, e: _annotate_by_args_with_coerce(self, e),
-        exp.PercentRank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Rank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.RangeBucket: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
-        exp.RegexpInstr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.RowNumber: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.Rand: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.Type.VARCHAR
-        ),
-        exp.SafeAdd: lambda self, e: _annotate_by_args_with_coerce(self, e),
-        exp.SafeMultiply: lambda self, e: _annotate_by_args_with_coerce(self, e),
-        exp.SafeSubtract: lambda self, e: _annotate_by_args_with_coerce(self, e),
-        exp.Sec: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Sech: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
-        exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
-        exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
-        exp.Sin: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Sinh: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
-        exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True),
-        exp.TimestampFromParts: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.Type.DATETIME
-        ),
-        exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
-        exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
-        exp.ToCodePoints: lambda self, e: self._annotate_with_type(
-            e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
-        ),
-        exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
-        exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
-        exp.Uuid: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
+    COERCES_TO[exp.DataType.Type.VARCHAR] |= {
+        exp.DataType.Type.DATE,
+        exp.DataType.Type.DATETIME,
+        exp.DataType.Type.TIME,
+        exp.DataType.Type.TIMESTAMP,
+        exp.DataType.Type.TIMESTAMPTZ,
     }
+    EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
     def normalize_identifier(self, expression: E) -> E:
         if (
             isinstance(expression, exp.Identifier)
@@ -786,11 +583,12 @@ class BigQuery(Dialect):
             "BIT_AND": exp.BitwiseAndAgg.from_arg_list,
             "BIT_OR": exp.BitwiseOrAgg.from_arg_list,
             "BIT_XOR": exp.BitwiseXorAgg.from_arg_list,
-            "BIT_COUNT": exp.BitwiseCountAgg.from_arg_list,
+            "BIT_COUNT": exp.BitwiseCount.from_arg_list,
             "BOOL": exp.JSONBool.from_arg_list,
             "CONTAINS_SUBSTR": _build_contains_substring,
             "DATE": _build_date,
             "DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
+            "DATE_DIFF": build_date_diff,
             "DATE_SUB": build_date_delta_with_interval(exp.DateSub),
             "DATE_TRUNC": lambda args: exp.DateTrunc(
                 unit=seq_get(args, 1),
@@ -804,6 +602,12 @@ class BigQuery(Dialect):
             "EDIT_DISTANCE": _build_levenshtein,
             "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate),
             "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list,
+            "GREATEST": lambda args: exp.Greatest(
+                this=seq_get(args, 0), expressions=args[1:], null_if_any_null=True
+            ),
+            "LEAST": lambda args: exp.Least(
+                this=seq_get(args, 0), expressions=args[1:], null_if_any_null=True
+            ),
             "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar),
             "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
             "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
@@ -815,6 +619,7 @@ class BigQuery(Dialect):
             "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
             "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
             "MD5": exp.MD5Digest.from_arg_list,
+            "SHA1": exp.SHA1Digest.from_arg_list,
             "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize(
                 this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True
             ),
@@ -836,7 +641,9 @@ class BigQuery(Dialect):
             "REGEXP_EXTRACT_ALL": _build_regexp_extract(
                 exp.RegexpExtractAll, default_group=exp.Literal.number(0)
             ),
-            "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
+            "SHA256": lambda args: exp.SHA2Digest(
+                this=seq_get(args, 0), length=exp.Literal.number(256)
+            ),
             "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
             "SPLIT": lambda args: exp.Split(
                 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split
@@ -872,7 +679,11 @@ class BigQuery(Dialect):
         FUNCTION_PARSERS = {
             **parser.Parser.FUNCTION_PARSERS,
-            "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]),
+            "ARRAY": lambda self: self.expression(
+                exp.Array,
+                expressions=[self._parse_statement()],
+                struct_name_inheritance=True,
+            ),
             "JSON_ARRAY": lambda self: self.expression(
                 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
             ),
@@ -1108,6 +919,9 @@ class BigQuery(Dialect):
         ) -> t.Optional[exp.Expression]:
             bracket = super()._parse_bracket(this)
+            if isinstance(bracket, exp.Array):
+                bracket.set("struct_name_inheritance", True)
             if this is bracket:
                 return bracket
@@ -1254,6 +1068,24 @@ class BigQuery(Dialect):
                 this=self._match_text_seq("AS") and self._parse_select(),
             )
+        def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
+            this = super()._parse_column_ops(this)
+            if isinstance(this, exp.Dot):
+                prefix_name = this.this.name.upper()
+                func_name = this.name.upper()
+                if prefix_name == "NET":
+                    if func_name == "HOST":
+                        this = self.expression(
+                            exp.NetHost, this=seq_get(this.expression.expressions, 0)
+                        )
+                elif prefix_name == "SAFE":
+                    if func_name == "TIMESTAMP":
+                        this = _build_timestamp(this.expression.expressions)
+                        this.set("safe", True)
+            return this
     class Generator(generator.Generator):
         INTERVAL_ALLOWS_PLURAL_FORM = False
         JOIN_HINTS = False
@@ -1302,7 +1134,7 @@ class BigQuery(Dialect):
             exp.BitwiseAndAgg: rename_func("BIT_AND"),
             exp.BitwiseOrAgg: rename_func("BIT_OR"),
             exp.BitwiseXorAgg: rename_func("BIT_XOR"),
-            exp.BitwiseCountAgg: rename_func("BIT_COUNT"),
+            exp.BitwiseCount: rename_func("BIT_COUNT"),
             exp.ByteLength: rename_func("BYTE_LENGTH"),
             exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]),
             exp.CollateProperty: lambda self, e: (
@@ -1329,7 +1161,7 @@ class BigQuery(Dialect):
             ),
             exp.GenerateSeries: rename_func("GENERATE_ARRAY"),
             exp.GroupConcat: lambda self, e: groupconcat_sql(
-                self, e, func_name="STRING_AGG", within_group=False
+                self, e, func_name="STRING_AGG", within_group=False, sep=None
             ),
             exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))),
             exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"),
@@ -1388,6 +1220,8 @@ class BigQuery(Dialect):
             ),
             exp.SHA: rename_func("SHA1"),
             exp.SHA2: sha256_sql,
+            exp.SHA1Digest: rename_func("SHA1"),
+            exp.SHA2Digest: sha2_digest_sql,
             exp.StabilityProperty: lambda self, e: (
                 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
             ),
@@ -1399,6 +1233,7 @@ class BigQuery(Dialect):
             ),
             exp.StrToDate: _str_to_datetime_sql,
             exp.StrToTime: _str_to_datetime_sql,
+            exp.SessionUser: lambda *_: "SESSION_USER()",
             exp.TimeAdd: date_add_interval_sql("TIME", "ADD"),
             exp.TimeFromParts: rename_func("TIME"),
             exp.TimestampFromParts: rename_func("DATETIME"),
@@ -1716,3 +1551,7 @@ class BigQuery(Dialect):
             kind = f" {kind}" if kind else ""
             return f"{variables}{kind}{default}"
+        def timestamp_sql(self, expression: exp.Timestamp) -> str:
+            prefix = "SAFE." if expression.args.get("safe") else ""
+            return self.func(f"{prefix}TIMESTAMP", expression.this, expression.args.get("zone"))

sqlglot 27.27.0__py3-none-any.whl → 28.4.0__py3-none-any.whl

sqlglot 27.27.0py3-none-any.whl → 28.4.0py3-none-any.whl