PyPI - sqlglot - Versions diffs - 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl - Mend

sqlglot 27.29.0py3-none-any.whl → 28.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

sqlglot/__main__.py +6 -4
sqlglot/_version.py +2 -2
sqlglot/dialects/bigquery.py +116 -295
sqlglot/dialects/clickhouse.py +67 -2
sqlglot/dialects/databricks.py +38 -1
sqlglot/dialects/dialect.py +327 -286
sqlglot/dialects/dremio.py +4 -1
sqlglot/dialects/duckdb.py +718 -22
sqlglot/dialects/exasol.py +243 -10
sqlglot/dialects/hive.py +8 -8
sqlglot/dialects/mysql.py +11 -2
sqlglot/dialects/oracle.py +29 -0
sqlglot/dialects/postgres.py +46 -24
sqlglot/dialects/presto.py +47 -16
sqlglot/dialects/redshift.py +16 -0
sqlglot/dialects/risingwave.py +3 -0
sqlglot/dialects/singlestore.py +12 -3
sqlglot/dialects/snowflake.py +199 -271
sqlglot/dialects/spark.py +2 -2
sqlglot/dialects/spark2.py +11 -48
sqlglot/dialects/sqlite.py +9 -0
sqlglot/dialects/teradata.py +5 -8
sqlglot/dialects/trino.py +6 -0
sqlglot/dialects/tsql.py +61 -25
sqlglot/diff.py +4 -2
sqlglot/errors.py +69 -0
sqlglot/expressions.py +484 -84
sqlglot/generator.py +143 -41
sqlglot/helper.py +2 -2
sqlglot/optimizer/annotate_types.py +247 -140
sqlglot/optimizer/canonicalize.py +6 -1
sqlglot/optimizer/eliminate_joins.py +1 -1
sqlglot/optimizer/eliminate_subqueries.py +2 -2
sqlglot/optimizer/merge_subqueries.py +5 -5
sqlglot/optimizer/normalize.py +20 -13
sqlglot/optimizer/normalize_identifiers.py +17 -3
sqlglot/optimizer/optimizer.py +4 -0
sqlglot/optimizer/pushdown_predicates.py +1 -1
sqlglot/optimizer/qualify.py +14 -6
sqlglot/optimizer/qualify_columns.py +113 -352
sqlglot/optimizer/qualify_tables.py +112 -70
sqlglot/optimizer/resolver.py +374 -0
sqlglot/optimizer/scope.py +27 -16
sqlglot/optimizer/simplify.py +1074 -964
sqlglot/optimizer/unnest_subqueries.py +12 -2
sqlglot/parser.py +276 -160
sqlglot/planner.py +2 -2
sqlglot/schema.py +15 -4
sqlglot/tokens.py +42 -7
sqlglot/transforms.py +77 -22
sqlglot/typing/__init__.py +316 -0
sqlglot/typing/bigquery.py +376 -0
sqlglot/typing/hive.py +12 -0
sqlglot/typing/presto.py +24 -0
sqlglot/typing/snowflake.py +505 -0
sqlglot/typing/spark2.py +58 -0
sqlglot/typing/tsql.py +9 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
sqlglot-28.4.0.dist-info/RECORD +92 -0
sqlglot-27.29.0.dist-info/RECORD +0 -84
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0

sqlglot/dialects/clickhouse.py CHANGED Viewed

@@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
     arg_max_or_min_no_count,
     build_date_delta,
     build_formatted_time,
+    build_like,
     inline_array_sql,
     json_extract_segments,
     json_path_key_only_name,
@@ -23,6 +24,7 @@ from sqlglot.dialects.dialect import (
     timestamptrunc_sql,
     unit_to_var,
     trim_sql,
+    sha2_digest_sql,
 )
 from sqlglot.generator import Generator
 from sqlglot.helper import is_int, seq_get
@@ -188,6 +190,43 @@ def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> st
     return f"{{{csv_args}}}"
+def _build_timestamp_trunc(unit: str) -> t.Callable[[t.List], exp.TimestampTrunc]:
+    return lambda args: exp.TimestampTrunc(
+        this=seq_get(args, 0), unit=exp.var(unit), zone=seq_get(args, 1)
+    )
+def _build_split_by_char(args: t.List) -> exp.Split | exp.Anonymous:
+    sep = seq_get(args, 0)
+    if isinstance(sep, exp.Literal):
+        sep_value = sep.to_py()
+        if isinstance(sep_value, str) and len(sep_value.encode("utf-8")) == 1:
+            return _build_split(exp.Split)(args)
+    return exp.Anonymous(this="splitByChar", expressions=args)
+def _build_split(exp_class: t.Type[E]) -> t.Callable[[t.List], E]:
+    return lambda args: exp_class(
+        this=seq_get(args, 1), expression=seq_get(args, 0), limit=seq_get(args, 2)
+    )
+# Skip the 'week' unit since ClickHouse's toStartOfWeek
+# uses an extra mode argument to specify the first day of the week
+TIMESTAMP_TRUNC_UNITS = {
+    "MICROSECOND",
+    "MILLISECOND",
+    "SECOND",
+    "MINUTE",
+    "HOUR",
+    "DAY",
+    "MONTH",
+    "QUARTER",
+    "YEAR",
+}
 class ClickHouse(Dialect):
     INDEX_OFFSET = 1
     NORMALIZE_FUNCTIONS: bool | str = False
@@ -308,10 +347,16 @@ class ClickHouse(Dialect):
         FUNCTIONS = {
             **parser.Parser.FUNCTIONS,
+            **{
+                f"TOSTARTOF{unit}": _build_timestamp_trunc(unit=unit)
+                for unit in TIMESTAMP_TRUNC_UNITS
+            },
             "ANY": exp.AnyValue.from_arg_list,
             "ARRAYSUM": exp.ArraySum.from_arg_list,
             "ARRAYREVERSE": exp.ArrayReverse.from_arg_list,
             "ARRAYSLICE": exp.ArraySlice.from_arg_list,
+            "CURRENTDATABASE": exp.CurrentDatabase.from_arg_list,
+            "CURRENTSCHEMAS": exp.CurrentSchemas.from_arg_list,
             "COUNTIF": _build_count_if,
             "COSINEDISTANCE": exp.CosineDistance.from_arg_list,
             "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None),
@@ -322,13 +367,17 @@ class ClickHouse(Dialect):
             "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None),
             "DATESUB": build_date_delta(exp.DateSub, default_unit=None),
             "FORMATDATETIME": _build_datetime_format(exp.TimeToStr),
+            "HAS": exp.ArrayContains.from_arg_list,
+            "ILIKE": build_like(exp.ILike),
             "JSONEXTRACTSTRING": build_json_extract_path(
                 exp.JSONExtractScalar, zero_based_indexing=False
             ),
             "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
+            "LIKE": build_like(exp.Like),
             "L2Distance": exp.EuclideanDistance.from_arg_list,
             "MAP": parser.build_var_map,
             "MATCH": exp.RegexpLike.from_arg_list,
+            "NOTLIKE": build_like(exp.Like, not_like=True),
             "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime),
             "RANDCANONICAL": exp.Rand.from_arg_list,
             "STR_TO_DATE": _build_str_to_date,
@@ -336,11 +385,15 @@ class ClickHouse(Dialect):
             "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None),
             "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None),
             "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None),
+            "TOMONDAY": _build_timestamp_trunc("WEEK"),
             "UNIQ": exp.ApproxDistinct.from_arg_list,
             "XOR": lambda args: exp.Xor(expressions=args),
             "MD5": exp.MD5Digest.from_arg_list,
             "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
             "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
+            "SPLITBYCHAR": _build_split_by_char,
+            "SPLITBYREGEXP": _build_split(exp.RegexpSplit),
+            "SPLITBYSTRING": _build_split(exp.Split),
             "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list,
             "TOTYPENAME": exp.Typeof.from_arg_list,
             "EDITDISTANCE": exp.Levenshtein.from_arg_list,
@@ -418,6 +471,7 @@ class ClickHouse(Dialect):
             "quantiles",
             "quantileExact",
             "quantilesExact",
+            "quantilesExactExclusive",
             "quantileExactLow",
             "quantilesExactLow",
             "quantileExactHigh",
@@ -771,7 +825,7 @@ class ClickHouse(Dialect):
             if join:
                 method = join.args.get("method")
                 join.set("method", None)
-                join.set("global", method)
+                join.set("global_", method)
                 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table`
                 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join
@@ -1087,6 +1141,7 @@ class ClickHouse(Dialect):
             exp.AnyValue: rename_func("any"),
             exp.ApproxDistinct: rename_func("uniq"),
             exp.ArrayConcat: rename_func("arrayConcat"),
+            exp.ArrayContains: rename_func("has"),
             exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this),
             exp.ArrayRemove: remove_from_array_using_filter,
             exp.ArrayReverse: rename_func("arrayReverse"),
@@ -1096,6 +1151,8 @@ class ClickHouse(Dialect):
             exp.ArgMin: arg_max_or_min_no_count("argMin"),
             exp.Array: inline_array_sql,
             exp.CastToStrType: rename_func("CAST"),
+            exp.CurrentDatabase: rename_func("CURRENT_DATABASE"),
+            exp.CurrentSchemas: rename_func("CURRENT_SCHEMAS"),
             exp.CountIf: rename_func("countIf"),
             exp.CosineDistance: rename_func("cosineDistance"),
             exp.CompressColumnConstraint: lambda self,
@@ -1148,9 +1205,17 @@ class ClickHouse(Dialect):
             exp.MD5Digest: rename_func("MD5"),
             exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))),
             exp.SHA: rename_func("SHA1"),
+            exp.SHA1Digest: rename_func("SHA1"),
             exp.SHA2: sha256_sql,
+            exp.SHA2Digest: sha2_digest_sql,
+            exp.Split: lambda self, e: self.func(
+                "splitByString", e.args.get("expression"), e.this, e.args.get("limit")
+            ),
+            exp.RegexpSplit: lambda self, e: self.func(
+                "splitByRegexp", e.args.get("expression"), e.this, e.args.get("limit")
+            ),
             exp.UnixToTime: _unix_to_time_sql,
-            exp.TimestampTrunc: timestamptrunc_sql(zone=True),
+            exp.TimestampTrunc: timestamptrunc_sql(func="dateTrunc", zone=True),
             exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"),
             exp.Variance: rename_func("varSamp"),
             exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),

sqlglot/dialects/databricks.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 from copy import deepcopy
 from collections import defaultdict
-from sqlglot import exp, transforms, jsonpath
+from sqlglot import exp, transforms, jsonpath, parser
 from sqlglot.dialects.dialect import (
     date_delta_sql,
     build_date_delta,
@@ -12,6 +12,7 @@ from sqlglot.dialects.dialect import (
     groupconcat_sql,
 )
 from sqlglot.dialects.spark import Spark
+from sqlglot.helper import seq_get
 from sqlglot.tokens import TokenType
 from sqlglot.optimizer.annotate_types import TypeAnnotator
@@ -54,11 +55,21 @@ class Databricks(Spark):
         FUNCTIONS = {
             **Spark.Parser.FUNCTIONS,
+            "GETDATE": exp.CurrentTimestamp.from_arg_list,
             "DATEADD": build_date_delta(exp.DateAdd),
             "DATE_ADD": build_date_delta(exp.DateAdd),
             "DATEDIFF": build_date_delta(exp.DateDiff),
             "DATE_DIFF": build_date_delta(exp.DateDiff),
+            "NOW": exp.CurrentTimestamp.from_arg_list,
             "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "databricks"),
+            "UNIFORM": lambda args: exp.Uniform(
+                this=seq_get(args, 0), expression=seq_get(args, 1), seed=seq_get(args, 2)
+            ),
+        }
+        NO_PAREN_FUNCTION_PARSERS = {
+            **Spark.Parser.NO_PAREN_FUNCTION_PARSERS,
+            "CURDATE": lambda self: self._parse_curdate(),
         }
         FACTOR = {
@@ -66,6 +77,21 @@ class Databricks(Spark):
             TokenType.COLON: exp.JSONExtract,
         }
+        COLUMN_OPERATORS = {
+            **parser.Parser.COLUMN_OPERATORS,
+            TokenType.QDCOLON: lambda self, this, to: self.expression(
+                exp.TryCast,
+                this=this,
+                to=to,
+            ),
+        }
+        def _parse_curdate(self) -> exp.CurrentDate:
+            # CURDATE, an alias for CURRENT_DATE, has optional parentheses
+            if self._match(TokenType.L_PAREN):
+                self._match_r_paren()
+            return self.expression(exp.CurrentDate)
     class Generator(Spark.Generator):
         TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
         COPY_PARAMS_ARE_WRAPPED = False
@@ -104,6 +130,7 @@ class Databricks(Spark):
                 if e.args.get("is_numeric")
                 else self.function_fallback_sql(e)
             ),
+            exp.CurrentCatalog: lambda *_: "CURRENT_CATALOG()",
         }
         TRANSFORMS.pop(exp.RegexpLike)
@@ -136,3 +163,13 @@ class Databricks(Spark):
         def jsonpath_sql(self, expression: exp.JSONPath) -> str:
             expression.set("escape", None)
             return super().jsonpath_sql(expression)
+        def uniform_sql(self, expression: exp.Uniform) -> str:
+            gen = expression.args.get("gen")
+            seed = expression.args.get("seed")
+            # From Snowflake UNIFORM(min, max, gen) as RANDOM(), RANDOM(seed), or constant value -> Extract seed
+            if gen:
+                seed = gen.this
+            return self.func("UNIFORM", expression.this, expression.expression, seed)

sqlglot 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl

sqlglot 27.29.0py3-none-any.whl → 28.4.0py3-none-any.whl