PyPI - sqlglot - Versions diffs - 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl - Mend

sqlglot 27.29.0py3-none-any.whl → 28.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

sqlglot/__main__.py +6 -4
sqlglot/_version.py +2 -2
sqlglot/dialects/bigquery.py +116 -295
sqlglot/dialects/clickhouse.py +67 -2
sqlglot/dialects/databricks.py +38 -1
sqlglot/dialects/dialect.py +327 -286
sqlglot/dialects/dremio.py +4 -1
sqlglot/dialects/duckdb.py +718 -22
sqlglot/dialects/exasol.py +243 -10
sqlglot/dialects/hive.py +8 -8
sqlglot/dialects/mysql.py +11 -2
sqlglot/dialects/oracle.py +29 -0
sqlglot/dialects/postgres.py +46 -24
sqlglot/dialects/presto.py +47 -16
sqlglot/dialects/redshift.py +16 -0
sqlglot/dialects/risingwave.py +3 -0
sqlglot/dialects/singlestore.py +12 -3
sqlglot/dialects/snowflake.py +199 -271
sqlglot/dialects/spark.py +2 -2
sqlglot/dialects/spark2.py +11 -48
sqlglot/dialects/sqlite.py +9 -0
sqlglot/dialects/teradata.py +5 -8
sqlglot/dialects/trino.py +6 -0
sqlglot/dialects/tsql.py +61 -25
sqlglot/diff.py +4 -2
sqlglot/errors.py +69 -0
sqlglot/expressions.py +484 -84
sqlglot/generator.py +143 -41
sqlglot/helper.py +2 -2
sqlglot/optimizer/annotate_types.py +247 -140
sqlglot/optimizer/canonicalize.py +6 -1
sqlglot/optimizer/eliminate_joins.py +1 -1
sqlglot/optimizer/eliminate_subqueries.py +2 -2
sqlglot/optimizer/merge_subqueries.py +5 -5
sqlglot/optimizer/normalize.py +20 -13
sqlglot/optimizer/normalize_identifiers.py +17 -3
sqlglot/optimizer/optimizer.py +4 -0
sqlglot/optimizer/pushdown_predicates.py +1 -1
sqlglot/optimizer/qualify.py +14 -6
sqlglot/optimizer/qualify_columns.py +113 -352
sqlglot/optimizer/qualify_tables.py +112 -70
sqlglot/optimizer/resolver.py +374 -0
sqlglot/optimizer/scope.py +27 -16
sqlglot/optimizer/simplify.py +1074 -964
sqlglot/optimizer/unnest_subqueries.py +12 -2
sqlglot/parser.py +276 -160
sqlglot/planner.py +2 -2
sqlglot/schema.py +15 -4
sqlglot/tokens.py +42 -7
sqlglot/transforms.py +77 -22
sqlglot/typing/__init__.py +316 -0
sqlglot/typing/bigquery.py +376 -0
sqlglot/typing/hive.py +12 -0
sqlglot/typing/presto.py +24 -0
sqlglot/typing/snowflake.py +505 -0
sqlglot/typing/spark2.py +58 -0
sqlglot/typing/tsql.py +9 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
sqlglot-28.4.0.dist-info/RECORD +92 -0
sqlglot-27.29.0.dist-info/RECORD +0 -84
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0

sqlglot/optimizer/eliminate_subqueries.py CHANGED Viewed

@@ -65,7 +65,7 @@ def eliminate_subqueries(expression: exp.Expression) -> exp.Expression:
     # Existing CTES in the root expression. We'll use this for deduplication.
     existing_ctes: ExistingCTEsMapping = {}
-    with_ = root.expression.args.get("with")
+    with_ = root.expression.args.get("with_")
     recursive = False
     if with_:
         recursive = with_.args.get("recursive")
@@ -97,7 +97,7 @@ def eliminate_subqueries(expression: exp.Expression) -> exp.Expression:
     if new_ctes:
         query = expression.expression if isinstance(expression, exp.DDL) else expression
-        query.set("with", exp.With(expressions=new_ctes, recursive=recursive))
+        query.set("with_", exp.With(expressions=new_ctes, recursive=recursive))
     return expression

sqlglot/optimizer/merge_subqueries.py CHANGED Viewed

@@ -48,7 +48,7 @@ def merge_subqueries(expression: E, leave_tables_isolated: bool = False) -> E:
 # If a derived table has these Select args, it can't be merged
 UNMERGABLE_ARGS = set(exp.Select.arg_types) - {
     "expressions",
-    "from",
+    "from_",
     "joins",
     "where",
     "order",
@@ -165,7 +165,7 @@ def _mergeable(
         if not on:
             return False
         selections = [c.name for c in on.find_all(exp.Column) if c.table == alias]
-        inner_from = inner_scope.expression.args.get("from")
+        inner_from = inner_scope.expression.args.get("from_")
         if not inner_from:
             return False
         inner_from_table = inner_from.alias_or_name
@@ -197,7 +197,7 @@ def _mergeable(
         and not outer_scope.expression.is_star
         and isinstance(inner_select, exp.Select)
         and not any(inner_select.args.get(arg) for arg in UNMERGABLE_ARGS)
-        and inner_select.args.get("from") is not None
+        and inner_select.args.get("from_") is not None
         and not outer_scope.pivots
         and not any(e.find(exp.AggFunc, exp.Select, exp.Explode) for e in inner_select.expressions)
         and not (leave_tables_isolated and len(outer_scope.selected_sources) > 1)
@@ -261,7 +261,7 @@ def _merge_from(
     """
     Merge FROM clause of inner query into outer query.
     """
-    new_subquery = inner_scope.expression.args["from"].this
+    new_subquery = inner_scope.expression.args["from_"].this
     new_subquery.set("joins", node_to_replace.args.get("joins"))
     node_to_replace.replace(new_subquery)
     for join_hint in outer_scope.join_hints:
@@ -357,7 +357,7 @@ def _merge_where(outer_scope: Scope, inner_scope: Scope, from_or_join: FromOrJoi
     if isinstance(from_or_join, exp.Join):
         # Merge predicates from an outer join to the ON clause
         # if it only has columns that are already joined
-        from_ = expression.args.get("from")
+        from_ = expression.args.get("from_")
         sources = {from_.alias_or_name} if from_ else set()
         for join in expression.args["joins"]:

sqlglot/optimizer/normalize.py CHANGED Viewed

@@ -6,7 +6,7 @@ from sqlglot import exp
 from sqlglot.errors import OptimizeError
 from sqlglot.helper import while_changing
 from sqlglot.optimizer.scope import find_all_in_scope
-from sqlglot.optimizer.simplify import flatten, rewrite_between, uniq_sort
+from sqlglot.optimizer.simplify import Simplifier, flatten
 logger = logging.getLogger("sqlglot")
@@ -28,6 +28,8 @@ def normalize(expression: exp.Expression, dnf: bool = False, max_distance: int =
     Returns:
         sqlglot.Expression: normalized expression
     """
+    simplifier = Simplifier(annotate_new_expressions=False)
     for node in tuple(expression.walk(prune=lambda e: isinstance(e, exp.Connector))):
         if isinstance(node, exp.Connector):
             if normalized(node, dnf=dnf):
@@ -35,7 +37,7 @@ def normalize(expression: exp.Expression, dnf: bool = False, max_distance: int =
             root = node is expression
             original = node.copy()
-            node.transform(rewrite_between, copy=False)
+            node.transform(simplifier.rewrite_between, copy=False)
             distance = normalization_distance(node, dnf=dnf, max_=max_distance)
             if distance > max_distance:
@@ -46,7 +48,10 @@ def normalize(expression: exp.Expression, dnf: bool = False, max_distance: int =
             try:
                 node = node.replace(
-                    while_changing(node, lambda e: distributive_law(e, dnf, max_distance))
+                    while_changing(
+                        node,
+                        lambda e: distributive_law(e, dnf, max_distance, simplifier=simplifier),
+                    )
                 )
             except OptimizeError as e:
                 logger.info(e)
@@ -146,7 +151,7 @@ def _predicate_lengths(expression, dnf, max_=float("inf"), depth=0):
         yield from _predicate_lengths(right, dnf, max_, depth)
-def distributive_law(expression, dnf, max_distance):
+def distributive_law(expression, dnf, max_distance, simplifier=None):
     """
     x OR (y AND z) -> (x OR y) AND (x OR z)
     (x AND y) OR (y AND z) -> (x OR y) AND (x OR z) AND (y OR y) AND (y OR z)
@@ -168,32 +173,34 @@ def distributive_law(expression, dnf, max_distance):
         from_func = exp.and_ if from_exp == exp.And else exp.or_
         to_func = exp.and_ if to_exp == exp.And else exp.or_
+        simplifier = simplifier or Simplifier(annotate_new_expressions=False)
         if isinstance(a, to_exp) and isinstance(b, to_exp):
             if len(tuple(a.find_all(exp.Connector))) > len(tuple(b.find_all(exp.Connector))):
-                return _distribute(a, b, from_func, to_func)
-            return _distribute(b, a, from_func, to_func)
+                return _distribute(a, b, from_func, to_func, simplifier)
+            return _distribute(b, a, from_func, to_func, simplifier)
         if isinstance(a, to_exp):
-            return _distribute(b, a, from_func, to_func)
+            return _distribute(b, a, from_func, to_func, simplifier)
         if isinstance(b, to_exp):
-            return _distribute(a, b, from_func, to_func)
+            return _distribute(a, b, from_func, to_func, simplifier)
     return expression
-def _distribute(a, b, from_func, to_func):
+def _distribute(a, b, from_func, to_func, simplifier):
     if isinstance(a, exp.Connector):
         exp.replace_children(
             a,
             lambda c: to_func(
-                uniq_sort(flatten(from_func(c, b.left))),
-                uniq_sort(flatten(from_func(c, b.right))),
+                simplifier.uniq_sort(flatten(from_func(c, b.left))),
+                simplifier.uniq_sort(flatten(from_func(c, b.right))),
                 copy=False,
             ),
         )
     else:
         a = to_func(
-            uniq_sort(flatten(from_func(a, b.left))),
-            uniq_sort(flatten(from_func(a, b.right))),
+            simplifier.uniq_sort(flatten(from_func(a, b.left))),
+            simplifier.uniq_sort(flatten(from_func(a, b.right))),
             copy=False,
         )

sqlglot/optimizer/normalize_identifiers.py CHANGED Viewed

@@ -10,14 +10,18 @@ if t.TYPE_CHECKING:
 @t.overload
-def normalize_identifiers(expression: E, dialect: DialectType = None) -> E: ...
+def normalize_identifiers(
+    expression: E, dialect: DialectType = None, store_original_column_identifiers: bool = False
+) -> E: ...
 @t.overload
-def normalize_identifiers(expression: str, dialect: DialectType = None) -> exp.Identifier: ...
+def normalize_identifiers(
+    expression: str, dialect: DialectType = None, store_original_column_identifiers: bool = False
+) -> exp.Identifier: ...
-def normalize_identifiers(expression, dialect=None):
+def normalize_identifiers(expression, dialect=None, store_original_column_identifiers=False):
     """
     Normalize identifiers by converting them to either lower or upper case,
     ensuring the semantics are preserved in each case (e.g. by respecting
@@ -48,6 +52,8 @@ def normalize_identifiers(expression, dialect=None):
     Args:
         expression: The expression to transform.
         dialect: The dialect to use in order to decide how to normalize identifiers.
+        store_original_column_identifiers: Whether to store the original column identifiers in
+            the meta data of the expression in case we want to undo the normalization at a later point.
     Returns:
         The transformed expression.
@@ -59,6 +65,14 @@ def normalize_identifiers(expression, dialect=None):
     for node in expression.walk(prune=lambda n: n.meta.get("case_sensitive")):
         if not node.meta.get("case_sensitive"):
+            if store_original_column_identifiers and isinstance(node, exp.Column):
+                # TODO: This does not handle non-column cases, e.g PARSE_JSON(...).key
+                parent = node
+                while parent and isinstance(parent.parent, exp.Dot):
+                    parent = parent.parent
+                node.meta["dot_parts"] = [p.name for p in parent.parts]
             dialect.normalize_identifier(node)
     return expression

sqlglot/optimizer/optimizer.py CHANGED Viewed

@@ -46,6 +46,7 @@ def optimize(
     catalog: t.Optional[str | exp.Identifier] = None,
     dialect: DialectType = None,
     rules: t.Sequence[t.Callable] = RULES,
+    sql: t.Optional[str] = None,
     **kwargs,
 ) -> exp.Expression:
     """
@@ -66,6 +67,8 @@ def optimize(
         rules: sequence of optimizer rules to use.
             Many of the rules require tables and columns to be qualified.
             Do not remove `qualify` from the sequence of rules unless you know what you're doing!
+        sql: Original SQL string for error highlighting. If not provided, errors will not include
+            highlighting. Requires that the expression has position metadata from parsing.
         **kwargs: If a rule has a keyword argument with a same name in **kwargs, it will be passed in.
     Returns:
@@ -77,6 +80,7 @@ def optimize(
         "catalog": catalog,
         "schema": schema,
         "dialect": dialect,
+        "sql": sql,
         "isolate_tables": True,  # needed for other optimizations to perform well
         "quote_identifiers": False,
         **kwargs,

sqlglot/optimizer/pushdown_predicates.py CHANGED Viewed

@@ -181,7 +181,7 @@ def nodes_for_predicate(predicate, sources, scope_ref_count):
         # a node can reference a CTE which should be pushed down
         if isinstance(node, exp.From) and not isinstance(source, exp.Table):
-            with_ = source.parent.expression.args.get("with")
+            with_ = source.parent.expression.args.get("with_")
             if with_ and with_.recursive:
                 return {}
             node = source.expression

sqlglot/optimizer/qualify.py CHANGED Viewed

@@ -7,7 +7,6 @@ from sqlglot.dialects.dialect import Dialect, DialectType
 from sqlglot.optimizer.isolate_table_selects import isolate_table_selects
 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
 from sqlglot.optimizer.qualify_columns import (
-    pushdown_cte_alias_columns as pushdown_cte_alias_columns_func,
     qualify_columns as qualify_columns_func,
     quote_identifiers as quote_identifiers_func,
     validate_qualify_columns as validate_qualify_columns_func,
@@ -31,7 +30,9 @@ def qualify(
     validate_qualify_columns: bool = True,
     quote_identifiers: bool = True,
     identify: bool = True,
+    canonicalize_table_aliases: bool = False,
     on_qualify: t.Optional[t.Callable[[exp.Expression], None]] = None,
+    sql: t.Optional[str] = None,
 ) -> exp.Expression:
     """
     Rewrite sqlglot AST to have normalized and qualified tables and columns.
@@ -63,28 +64,35 @@ def qualify(
             This step is necessary to ensure correctness for case sensitive queries.
             But this flag is provided in case this step is performed at a later time.
         identify: If True, quote all identifiers, else only necessary ones.
+        canonicalize_table_aliases: Whether to use canonical aliases (_0, _1, ...) for all sources
+            instead of preserving table names.
         on_qualify: Callback after a table has been qualified.
+        sql: Original SQL string for error highlighting. If not provided, errors will not include
+            highlighting. Requires that the expression has position metadata from parsing.
     Returns:
         The qualified expression.
     """
     schema = ensure_schema(schema, dialect=dialect)
+    dialect = Dialect.get_or_raise(dialect)
-    expression = normalize_identifiers(expression, dialect=dialect)
+    expression = normalize_identifiers(
+        expression,
+        dialect=dialect,
+        store_original_column_identifiers=True,
+    )
     expression = qualify_tables(
         expression,
         db=db,
         catalog=catalog,
         dialect=dialect,
         on_qualify=on_qualify,
+        canonicalize_table_aliases=canonicalize_table_aliases,
     )
     if isolate_tables:
         expression = isolate_table_selects(expression, schema=schema)
-    if Dialect.get_or_raise(dialect).PREFER_CTE_ALIAS_COLUMN:
-        expression = pushdown_cte_alias_columns_func(expression)
     if qualify_columns:
         expression = qualify_columns_func(
             expression,
@@ -99,6 +107,6 @@ def qualify(
         expression = quote_identifiers_func(expression, dialect=dialect, identify=identify)
     if validate_qualify_columns:
-        validate_qualify_columns_func(expression)
+        validate_qualify_columns_func(expression, sql=sql)
     return expression

sqlglot 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl

sqlglot 27.29.0py3-none-any.whl → 28.4.0py3-none-any.whl