PyPI - sqlglot - Versions diffs - 26.28.1__py3-none-any.whl → 26.29.0__py3-none-any.whl - Mend

sqlglot 26.28.1py3-none-any.whl → 26.29.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

sqlglot/_version.py +2 -2
sqlglot/dialects/athena.py +1 -0
sqlglot/dialects/dialect.py +4 -1
sqlglot/dialects/duckdb.py +7 -0
sqlglot/dialects/snowflake.py +38 -1
sqlglot/dialects/tsql.py +2 -2
sqlglot/expressions.py +7 -0
sqlglot/generator.py +20 -3
sqlglot/optimizer/annotate_types.py +44 -1
sqlglot/optimizer/qualify_columns.py +7 -0
sqlglot/optimizer/scope.py +14 -1
sqlglot/parser.py +155 -72
{sqlglot-26.28.1.dist-info → sqlglot-26.29.0.dist-info}/METADATA +1 -1
{sqlglot-26.28.1.dist-info → sqlglot-26.29.0.dist-info}/RECORD +17 -17
{sqlglot-26.28.1.dist-info → sqlglot-26.29.0.dist-info}/WHEEL +0 -0
{sqlglot-26.28.1.dist-info → sqlglot-26.29.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-26.28.1.dist-info → sqlglot-26.29.0.dist-info}/top_level.txt +0 -0

sqlglot/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '26.28.1'
-__version_tuple__ = version_tuple = (26, 28, 1)
+__version__ = version = '26.29.0'
+__version_tuple__ = version_tuple = (26, 29, 0)

sqlglot/dialects/athena.py CHANGED Viewed

@@ -108,6 +108,7 @@ class Athena(Trino):
         """
         IDENTIFIERS = ['"', "`"]
+        STRING_ESCAPES = ["'", "\\"]
         KEYWORDS = {
             **Hive.Tokenizer.KEYWORDS,
             **Trino.Tokenizer.KEYWORDS,

sqlglot/dialects/dialect.py CHANGED Viewed

@@ -1621,7 +1621,10 @@ def map_date_part(part, dialect: DialectType = Dialect):
     mapped = (
         Dialect.get_or_raise(dialect).DATE_PART_MAPPING.get(part.name.upper()) if part else None
     )
-    return exp.var(mapped) if mapped else part
+    if mapped:
+        return exp.Literal.string(mapped) if part.is_string else exp.var(mapped)
+    return part
 def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str:

sqlglot/dialects/duckdb.py CHANGED Viewed

@@ -290,6 +290,12 @@ class DuckDB(Dialect):
     # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table
     NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
+    DATE_PART_MAPPING = {
+        **Dialect.DATE_PART_MAPPING,
+        "DAYOFWEEKISO": "ISODOW",
+    }
+    DATE_PART_MAPPING.pop("WEEKDAY")
     def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
         if isinstance(path, exp.Literal):
             # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`.
@@ -620,6 +626,7 @@ class DuckDB(Dialect):
         PAD_FILL_PATTERN_IS_REQUIRED = True
         ARRAY_CONCAT_IS_VAR_LEN = False
         ARRAY_SIZE_DIM_REQUIRED = False
+        NORMALIZE_EXTRACT_DATE_PARTS = True
         TRANSFORMS = {
             **generator.Generator.TRANSFORMS,

sqlglot/dialects/snowflake.py CHANGED Viewed

@@ -31,6 +31,7 @@ from sqlglot.dialects.dialect import (
 )
 from sqlglot.generator import unsupported_args
 from sqlglot.helper import flatten, is_float, is_int, seq_get
+from sqlglot.optimizer.scope import find_all_in_scope
 from sqlglot.tokens import TokenType
 if t.TYPE_CHECKING:
@@ -333,6 +334,34 @@ def _json_extract_value_array_sql(
     return self.func("TRANSFORM", json_extract, transform_lambda)
+def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression:
+    if isinstance(expression, exp.Select):
+        # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations
+        # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced
+        # by Snowflake's parser.
+        #
+        # Additionally, at the time of writing this, BigQuery is the only dialect that produces a
+        # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`.
+        unnest_aliases = set()
+        for unnest in find_all_in_scope(expression, exp.Unnest):
+            unnest_alias = unnest.args.get("alias")
+            if (
+                isinstance(unnest_alias, exp.TableAlias)
+                and not unnest_alias.this
+                and len(unnest_alias.columns) == 1
+            ):
+                unnest_aliases.add(unnest_alias.columns[0].name)
+        if unnest_aliases:
+            for c in find_all_in_scope(expression, exp.Column):
+                if c.table in unnest_aliases:
+                    bracket_lhs = c.args["table"]
+                    bracket_rhs = exp.Literal.string(c.name)
+                    c.replace(exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]))
+    return expression
 class Snowflake(Dialect):
     # https://docs.snowflake.com/en/sql-reference/identifiers-syntax
     NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE
@@ -1096,6 +1125,7 @@ class Snowflake(Dialect):
                     transforms.explode_projection_to_unnest(),
                     transforms.eliminate_semi_and_anti_joins,
                     _transform_generate_date_array,
+                    _eliminate_dot_variant_lookup,
                 ]
             ),
             exp.SHA: rename_func("SHA1"),
@@ -1314,7 +1344,14 @@ class Snowflake(Dialect):
             start = f" START {start}" if start else ""
             increment = expression.args.get("increment")
             increment = f" INCREMENT {increment}" if increment else ""
-            return f"AUTOINCREMENT{start}{increment}"
+            order = expression.args.get("order")
+            if order is not None:
+                order_clause = " ORDER" if order else " NOORDER"
+            else:
+                order_clause = ""
+            return f"AUTOINCREMENT{start}{increment}{order_clause}"
         def cluster_sql(self, expression: exp.Cluster) -> str:
             return f"CLUSTER BY ({self.expressions(expression, flat=True)})"

sqlglot/dialects/tsql.py CHANGED Viewed

@@ -1224,8 +1224,6 @@ class TSQL(Dialect):
                     # to amend the AST by moving the CTEs to the CREATE VIEW statement's query.
                     ctas_expression.set("with", with_.pop())
-            sql = super().create_sql(expression)
             table = expression.find(exp.Table)
             # Convert CTAS statement to SELECT .. INTO ..
@@ -1243,6 +1241,8 @@ class TSQL(Dialect):
                     select_into.limit(0, copy=False)
                 sql = self.sql(select_into)
+            else:
+                sql = super().create_sql(expression)
             if exists:
                 identifier = self.sql(exp.Literal.string(exp.table_name(table) if table else ""))

sqlglot/expressions.py CHANGED Viewed

@@ -1947,6 +1947,7 @@ class GeneratedAsIdentityColumnConstraint(ColumnConstraintKind):
         "minvalue": False,
         "maxvalue": False,
         "cycle": False,
+        "order": False,
     }
@@ -7044,6 +7045,12 @@ class Semicolon(Expression):
     arg_types = {}
+# BigQuery allows SELECT t FROM t and treats the projection as a struct value. This expression
+# type is intended to be constructed by qualify so that we can properly annotate its type later
+class TableColumn(Expression):
+    pass
 def _norm_arg(arg):
     return arg.lower() if type(arg) is str else arg

sqlglot/generator.py CHANGED Viewed

@@ -201,6 +201,7 @@ class Generator(metaclass=_Generator):
         exp.StreamingTableProperty: lambda *_: "STREAMING",
         exp.StrictProperty: lambda *_: "STRICT",
         exp.SwapTable: lambda self, e: f"SWAP WITH {self.sql(e, 'this')}",
+        exp.TableColumn: lambda self, e: self.sql(e.this),
         exp.Tags: lambda self, e: f"TAG ({self.expressions(e, flat=True)})",
         exp.TemporaryProperty: lambda *_: "TEMPORARY",
         exp.TitleColumnConstraint: lambda self, e: f"TITLE {self.sql(e, 'this')}",
@@ -463,6 +464,11 @@ class Generator(metaclass=_Generator):
     # Whether to wrap <props> in `AlterSet`, e.g., ALTER ... SET (<props>)
     ALTER_SET_WRAPPED = False
+    # Whether to normalize the date parts in EXTRACT(<date_part> FROM <expr>) into a common representation
+    # For instance, to extract the day of week in ISO semantics, one can use ISODOW, DAYOFWEEKISO etc depending on the dialect.
+    # TODO: The normalization should be done by default once we've tested it across all dialects.
+    NORMALIZE_EXTRACT_DATE_PARTS = False
     # The name to generate for the JSONPath expression. If `None`, only `this` will be generated
     PARSE_JSON_NAME: t.Optional[str] = "PARSE_JSON"
@@ -2909,9 +2915,17 @@ class Generator(metaclass=_Generator):
         return f"NEXT VALUE FOR {self.sql(expression, 'this')}{order}"
     def extract_sql(self, expression: exp.Extract) -> str:
-        this = self.sql(expression, "this") if self.EXTRACT_ALLOWS_QUOTES else expression.this.name
+        from sqlglot.dialects.dialect import map_date_part
+        this = (
+            map_date_part(expression.this, self.dialect)
+            if self.NORMALIZE_EXTRACT_DATE_PARTS
+            else expression.this
+        )
+        this_sql = self.sql(this) if self.EXTRACT_ALLOWS_QUOTES else this.name
         expression_sql = self.sql(expression, "expression")
-        return f"EXTRACT({this} FROM {expression_sql})"
+        return f"EXTRACT({this_sql} FROM {expression_sql})"
     def trim_sql(self, expression: exp.Trim) -> str:
         trim_type = self.sql(expression, "position")
@@ -4766,7 +4780,10 @@ class Generator(metaclass=_Generator):
     def detach_sql(self, expression: exp.Detach) -> str:
         this = self.sql(expression, "this")
-        exists_sql = " IF EXISTS" if expression.args.get("exists") else ""
+        # the DATABASE keyword is required if IF EXISTS is set
+        # without it, DuckDB throws an error: Parser Error: syntax error at or near "exists" (Line Number: 1)
+        # ref: https://duckdb.org/docs/stable/sql/statements/attach.html#detach-syntax
+        exists_sql = " DATABASE IF EXISTS" if expression.args.get("exists") else ""
         return f"DETACH{exists_sql} {this}"

sqlglot/optimizer/annotate_types.py CHANGED Viewed

@@ -12,7 +12,7 @@ from sqlglot.helper import (
     seq_get,
 )
 from sqlglot.optimizer.scope import Scope, traverse_scope
-from sqlglot.schema import Schema, ensure_schema
+from sqlglot.schema import MappingSchema, Schema, ensure_schema
 from sqlglot.dialects.dialect import Dialect
 if t.TYPE_CHECKING:
@@ -290,9 +290,52 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
                 elif isinstance(source.expression, exp.Unnest):
                     self._set_type(col, source.expression.type)
+        if isinstance(self.schema, MappingSchema):
+            for table_column in scope.table_columns:
+                source = scope.sources.get(table_column.name)
+                if isinstance(source, exp.Table):
+                    schema = self.schema.find(
+                        source, raise_on_missing=False, ensure_data_types=True
+                    )
+                    if not isinstance(schema, dict):
+                        continue
+                    struct_type = exp.DataType(
+                        this=exp.DataType.Type.STRUCT,
+                        expressions=[
+                            exp.ColumnDef(this=exp.to_identifier(c), kind=kind)
+                            for c, kind in schema.items()
+                        ],
+                        nested=True,
+                    )
+                    self._set_type(table_column, struct_type)
+                elif (
+                    isinstance(source, Scope)
+                    and isinstance(source.expression, exp.Query)
+                    and source.expression.is_type(exp.DataType.Type.STRUCT)
+                ):
+                    self._set_type(table_column, source.expression.type)
         # Then (possibly) annotate the remaining expressions in the scope
         self._maybe_annotate(scope.expression)
+        if self.schema.dialect == "bigquery" and isinstance(scope.expression, exp.Query):
+            struct_type = exp.DataType(
+                this=exp.DataType.Type.STRUCT,
+                expressions=[
+                    exp.ColumnDef(this=exp.to_identifier(select.output_name), kind=select.type)
+                    for select in scope.expression.selects
+                ],
+                nested=True,
+            )
+            if not any(
+                cd.kind.is_type(exp.DataType.Type.UNKNOWN)
+                for cd in struct_type.expressions
+                if cd.kind
+            ):
+                self._set_type(scope.expression, struct_type)
     def _maybe_annotate(self, expression: E) -> E:
         if id(expression) in self._visited:
             return expression  # We've already inferred the expression's type

sqlglot/optimizer/qualify_columns.py CHANGED Viewed

@@ -529,6 +529,13 @@ def _qualify_columns(scope: Scope, resolver: Resolver, allow_partial_qualificati
             column_table = resolver.get_table(column_name)
             if column_table:
                 column.set("table", column_table)
+            elif (
+                resolver.schema.dialect == "bigquery"
+                and len(column.parts) == 1
+                and column_name in scope.selected_sources
+            ):
+                # BigQuery allows tables to be referenced as columns, treating them as structs
+                scope.replace(column, exp.TableColumn(this=column.this))
     for pivot in scope.pivots:
         for column in pivot.find_all(exp.Column):

sqlglot/optimizer/scope.py CHANGED Viewed

@@ -88,6 +88,7 @@ class Scope:
     def clear_cache(self):
         self._collected = False
         self._raw_columns = None
+        self._table_columns = None
         self._stars = None
         self._derived_tables = None
         self._udtfs = None
@@ -125,6 +126,7 @@ class Scope:
         self._derived_tables = []
         self._udtfs = []
         self._raw_columns = []
+        self._table_columns = []
         self._stars = []
         self._join_hints = []
         self._semi_anti_join_tables = set()
@@ -156,6 +158,8 @@ class Scope:
                 self._derived_tables.append(node)
             elif isinstance(node, exp.UNWRAPPED_QUERIES):
                 self._subqueries.append(node)
+            elif isinstance(node, exp.TableColumn):
+                self._table_columns.append(node)
         self._collected = True
@@ -309,6 +313,13 @@ class Scope:
         return self._columns
+    @property
+    def table_columns(self):
+        if self._table_columns is None:
+            self._ensure_collected()
+        return self._table_columns
     @property
     def selected_sources(self):
         """
@@ -849,12 +860,14 @@ def walk_in_scope(expression, bfs=True, prune=None):
         if node is expression:
             continue
         if (
             isinstance(node, exp.CTE)
             or (
                 isinstance(node.parent, (exp.From, exp.Join, exp.Subquery))
-                and (_is_derived_table(node) or isinstance(node, exp.UDTF))
+                and _is_derived_table(node)
             )
+            or (isinstance(node.parent, exp.UDTF) and isinstance(node, exp.Query))
             or isinstance(node, exp.UNWRAPPED_QUERIES)
         ):
             crossed_scope_boundary = True

sqlglot/parser.py CHANGED Viewed

@@ -931,15 +931,22 @@ class Parser(metaclass=_Parser):
     }
     PIPE_SYNTAX_TRANSFORM_PARSERS = {
-        "SELECT": lambda self, query: self._parse_pipe_syntax_select(query),
-        "WHERE": lambda self, query: query.where(self._parse_where(), copy=False),
+        "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query),
+        "AS": lambda self, query: self._build_pipe_cte(
+            query, [exp.Star()], self._parse_table_alias()
+        ),
+        "DROP": lambda self, query: self._parse_pipe_syntax_drop(query),
+        "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query),
+        "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query),
         "ORDER BY": lambda self, query: query.order_by(
             self._parse_order(), append=False, copy=False
         ),
-        "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query),
-        "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query),
         "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query),
+        "SELECT": lambda self, query: self._parse_pipe_syntax_select(query),
+        "SET": lambda self, query: self._parse_pipe_syntax_set(query),
+        "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query),
         "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query),
+        "WHERE": lambda self, query: query.where(self._parse_where(), copy=False),
     }
     PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
@@ -3252,11 +3259,9 @@ class Parser(metaclass=_Parser):
         elif self._match(TokenType.VALUES, advance=False):
             this = self._parse_derived_table_values()
         elif from_:
-            if self._match(TokenType.PIPE_GT, advance=False):
-                return self._parse_pipe_syntax_query(
-                    exp.Select().from_(from_.this, append=False, copy=False)
-                )
             this = exp.select("*").from_(from_.this, copy=False)
+            if self._match(TokenType.PIPE_GT, advance=False):
+                return self._parse_pipe_syntax_query(this)
         elif self._match(TokenType.SUMMARIZE):
             table = self._match(TokenType.TABLE)
             this = self._parse_select() or self._parse_string() or self._parse_table()
@@ -5543,6 +5548,37 @@ class Parser(metaclass=_Parser):
         return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this
+    def _parse_paren(self) -> t.Optional[exp.Expression]:
+        if not self._match(TokenType.L_PAREN):
+            return None
+        comments = self._prev_comments
+        query = self._parse_select()
+        if query:
+            expressions = [query]
+        else:
+            expressions = self._parse_expressions()
+        this = self._parse_query_modifiers(seq_get(expressions, 0))
+        if not this and self._match(TokenType.R_PAREN, advance=False):
+            this = self.expression(exp.Tuple)
+        elif isinstance(this, exp.UNWRAPPED_QUERIES):
+            this = self._parse_subquery(this=this, parse_alias=False)
+        elif isinstance(this, exp.Subquery):
+            this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False)
+        elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA:
+            this = self.expression(exp.Tuple, expressions=expressions)
+        else:
+            this = self.expression(exp.Paren, this=this)
+        if this:
+            this.add_comments(comments)
+        self._match_r_paren(expression=this)
+        return this
     def _parse_primary(self) -> t.Optional[exp.Expression]:
         if self._match_set(self.PRIMARY_PARSERS):
             token_type = self._prev.token_type
@@ -5561,37 +5597,7 @@ class Parser(metaclass=_Parser):
         if self._match_pair(TokenType.DOT, TokenType.NUMBER):
             return exp.Literal.number(f"0.{self._prev.text}")
-        if self._match(TokenType.L_PAREN):
-            comments = self._prev_comments
-            query = self._parse_select()
-            if query:
-                expressions = [query]
-            else:
-                expressions = self._parse_expressions()
-            this = self._parse_query_modifiers(seq_get(expressions, 0))
-            if not this and self._match(TokenType.R_PAREN, advance=False):
-                this = self.expression(exp.Tuple)
-            elif isinstance(this, exp.UNWRAPPED_QUERIES):
-                this = self._parse_subquery(this=this, parse_alias=False)
-            elif isinstance(this, exp.Subquery):
-                this = self._parse_subquery(
-                    this=self._parse_set_operations(this), parse_alias=False
-                )
-            elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA:
-                this = self.expression(exp.Tuple, expressions=expressions)
-            else:
-                this = self.expression(exp.Paren, this=this)
-            if this:
-                this.add_comments(comments)
-            self._match_r_paren(expression=this)
-            return this
-        return None
+        return self._parse_paren()
     def _parse_field(
         self,
@@ -5913,6 +5919,7 @@ class Parser(metaclass=_Parser):
     ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint:
         start = None
         increment = None
+        order = None
         if self._match(TokenType.L_PAREN, advance=False):
             args = self._parse_wrapped_csv(self._parse_bitwise)
@@ -5922,10 +5929,14 @@ class Parser(metaclass=_Parser):
             start = self._parse_bitwise()
             self._match_text_seq("INCREMENT")
             increment = self._parse_bitwise()
+            if self._match_text_seq("ORDER"):
+                order = True
+            elif self._match_text_seq("NOORDER"):
+                order = False
         if start and increment:
             return exp.GeneratedAsIdentityColumnConstraint(
-                start=start, increment=increment, this=False
+                start=start, increment=increment, this=False, order=order
             )
         return exp.AutoIncrementColumnConstraint()
@@ -8328,12 +8339,18 @@ class Parser(metaclass=_Parser):
         expression.update_positions(token)
         return expression
-    def _build_pipe_cte(self, query: exp.Query, expressions: t.List[exp.Expression]) -> exp.Select:
-        if not query.selects:
-            query = query.select("*", copy=False)
-        self._pipe_cte_counter += 1
-        new_cte = f"__tmp{self._pipe_cte_counter}"
+    def _build_pipe_cte(
+        self,
+        query: exp.Query,
+        expressions: t.List[exp.Expression],
+        alias_cte: t.Optional[exp.TableAlias] = None,
+    ) -> exp.Select:
+        new_cte: t.Optional[t.Union[str, exp.TableAlias]]
+        if alias_cte:
+            new_cte = alias_cte
+        else:
+            self._pipe_cte_counter += 1
+            new_cte = f"__tmp{self._pipe_cte_counter}"
         with_ = query.args.get("with")
         ctes = with_.pop() if with_ else None
@@ -8344,15 +8361,34 @@ class Parser(metaclass=_Parser):
         return new_select.with_(new_cte, as_=query, copy=False)
+    def _build_pipe_ctes(
+        self,
+        query: exp.Select,
+        expressions: t.List[exp.Expression],
+        alias_cte: t.Optional[exp.TableAlias] = None,
+    ) -> exp.Select:
+        select = query.selects[0].assert_is(exp.Star)
+        if select.args.get("except") or select.args.get("replace"):
+            query = self._build_pipe_cte(
+                query=query.select(
+                    *[expr for expr in expressions if not expr.is_star and expr.args.get("alias")],
+                    copy=False,
+                ),
+                expressions=[
+                    projection.args.get("alias", projection) for projection in expressions
+                ],
+            )
+        else:
+            query.select(*expressions, append=False, copy=False)
+        return self._build_pipe_cte(query=query, expressions=[exp.Star()], alias_cte=alias_cte)
     def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select:
         select = self._parse_select()
         if not select:
             return query
-        if not query.selects:
-            return self._build_pipe_cte(query.select(*select.expressions), [exp.Star()])
-        return self._build_pipe_cte(query, select.expressions)
+        return self._build_pipe_ctes(query=query, expressions=select.expressions)
     def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select:
         limit = self._parse_limit()
@@ -8396,12 +8432,12 @@ class Parser(metaclass=_Parser):
             aggregates_or_groups.append(this)
         if group_by_exists:
-            query = query.select(*aggregates_or_groups, copy=False).group_by(
+            query.select(*aggregates_or_groups, copy=False).group_by(
                 *[projection.args.get("alias", projection) for projection in aggregates_or_groups],
                 copy=False,
             )
         else:
-            query = query.select(*aggregates_or_groups, copy=False)
+            query.select(*aggregates_or_groups, copy=False)
         if orders:
             return query.order_by(*orders, append=False, copy=False)
@@ -8417,34 +8453,40 @@ class Parser(metaclass=_Parser):
         ):
             query = self._parse_pipe_syntax_aggregate_group_order_by(query)
-        return self._build_pipe_cte(query, [exp.Star()])
+        return self._build_pipe_ctes(
+            query=query, expressions=[expr for expr in query.selects if not expr.is_star]
+        )
-    def _parse_pipe_syntax_set_operator(
-        self, query: t.Optional[exp.Query]
-    ) -> t.Optional[exp.Select]:
+    def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Select]:
         first_setop = self.parse_set_operation(this=query)
-        if not first_setop or not query:
+        if not first_setop:
             return None
+        def _parse_and_unwrap_query() -> t.Optional[exp.Select]:
+            expr = self._parse_paren()
+            return expr.assert_is(exp.Subquery).unnest() if expr else None
         first_setop.this.pop()
-        distinct = first_setop.args.pop("distinct")
-        setops = [first_setop.expression.pop(), *self._parse_expressions()]
-        query = self._build_pipe_cte(query, [exp.Star()])
+        setops = [
+            first_setop.expression.pop().assert_is(exp.Subquery).unnest(),
+            *self._parse_csv(_parse_and_unwrap_query),
+        ]
+        query = self._build_pipe_cte(query=query, expressions=[exp.Star()])
         with_ = query.args.get("with")
         ctes = with_.pop() if with_ else None
         if isinstance(first_setop, exp.Union):
-            query = query.union(*setops, distinct=distinct, copy=False, **first_setop.args)
+            query = query.union(*setops, copy=False, **first_setop.args)
         elif isinstance(first_setop, exp.Except):
-            query = query.except_(*setops, distinct=distinct, copy=False, **first_setop.args)
+            query = query.except_(*setops, copy=False, **first_setop.args)
         else:
-            query = query.intersect(*setops, distinct=distinct, copy=False, **first_setop.args)
+            query = query.intersect(*setops, copy=False, **first_setop.args)
         query.set("with", ctes)
-        return self._build_pipe_cte(query, [exp.Star()])
+        return self._build_pipe_cte(query=query, expressions=[exp.Star()])
     def _parse_pipe_syntax_join(self, query: exp.Select) -> t.Optional[exp.Select]:
         join = self._parse_join()
@@ -8462,16 +8504,60 @@ class Parser(metaclass=_Parser):
         if from_:
             from_.this.set("pivots", pivots)
-        return self._build_pipe_cte(query, [exp.Star()])
+        return self._build_pipe_ctes(query=query, expressions=[exp.Star()])
+    def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select:
+        self._match_text_seq("EXTEND")
+        return self._build_pipe_ctes(
+            query=query,
+            expressions=[query.selects[0].assert_is(exp.Star), *self._parse_expressions()],
+        )
+    def _parse_pipe_syntax_drop(self, query: exp.Select) -> exp.Select:
+        self._match_text_seq("DROP")
+        dropped_columns = self._parse_csv(self._parse_assignment)
+        select = query.selects[0].assert_is(exp.Star)
+        except_ = select.args.get("except") or []
+        select.set("except", [*except_, *dropped_columns])
+        return query
+    def _parse_pipe_syntax_set(self, query: exp.Select) -> exp.Select:
+        self._match_text_seq("SET")
+        replaced_columns = [
+            self.expression(exp.Alias, this=expr.expression, alias=expr.this)
+            for expr in self._parse_csv(self._parse_assignment)
+        ]
+        select = query.selects[0].assert_is(exp.Star)
+        replace_ = select.args.get("replace") or []
+        select.set("replace", [*replace_, *replaced_columns])
+        return query
+    def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select:
+        sample = self._parse_table_sample()
+        with_ = query.args.get("with")
+        if with_:
+            with_.expressions[-1].this.set("sample", sample)
+        else:
+            query.set("sample", sample)
+        return query
     def _parse_pipe_syntax_query(self, query: exp.Select) -> t.Optional[exp.Select]:
         while self._match(TokenType.PIPE_GT):
             start = self._curr
             parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper())
             if not parser:
-                parsed_query = self._parse_pipe_syntax_set_operator(
-                    query
-                ) or self._parse_pipe_syntax_join(query)
+                # The set operators (UNION, etc) and the JOIN operator have a few common starting
+                # keywords, making it tricky to disambiguate them without lookahead. The approach
+                # here is to try and parse a set operation and if that fails, then try to parse a
+                # join operator. If that fails as well, then the operator is not supported.
+                parsed_query = self._parse_pipe_syntax_set_operator(query)
+                parsed_query = parsed_query or self._parse_pipe_syntax_join(query)
                 if not parsed_query:
                     self._retreat(start)
                     self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.")
@@ -8480,7 +8566,4 @@ class Parser(metaclass=_Parser):
             else:
                 query = parser(self, query)
-        if query and not query.selects:
-            return query.select("*", copy=False)
         return query

{sqlglot-26.28.1.dist-info → sqlglot-26.29.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sqlglot
-Version: 26.28.1
+Version: 26.29.0
 Summary: An easily customizable SQL parser and transpiler
 Author-email: Toby Mao <toby.mao@gmail.com>
 License: MIT License

{sqlglot-26.28.1.dist-info → sqlglot-26.29.0.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
 sqlglot/__init__.py,sha256=za08rtdPh2v7dOpGdNomttlIVGgTrKja7rPd6sQwaTg,5391
 sqlglot/__main__.py,sha256=022c173KqxsiABWTEpUIq_tJUxuNiW7a7ABsxBXqvu8,2069
 sqlglot/_typing.py,sha256=-1HPyr3w5COlSJWqlgt8jhFk2dyMvBuvVBqIX1wyVCM,642
-sqlglot/_version.py,sha256=lTxpjPlB8VNbv3452Opk2GFByRI5SLtHItxl9sne84Q,515
+sqlglot/_version.py,sha256=gOnetX1YzVEd7bBCS3U4KJPt8DHhhNA_iqdIiN8DYk4,515
 sqlglot/diff.py,sha256=PtOllQMQa1Sw1-V2Y8eypmDqGujXYPaTOp_WLsWkAWk,17314
 sqlglot/errors.py,sha256=QNKMr-pzLUDR-tuMmn_GK6iMHUIVdb_YSJ_BhGEvuso,2126
-sqlglot/expressions.py,sha256=oE7OmkFEstTWoPqM7yCls2I2JNyia8Spr-jVi3n77-A,242992
-sqlglot/generator.py,sha256=4iJ0BxkzinmosIhfhb34xjxaFpzw3Zo7fvmknaf5uRs,212432
+sqlglot/expressions.py,sha256=r3WkNufDInSqIoMasryY4W_XUV7DyIFU2G29jglFPqQ,243249
+sqlglot/generator.py,sha256=E1LjyN49nX9XfK-hysHWvpw7-qtws4xeb85sZi5x3M0,213345
 sqlglot/helper.py,sha256=9nZjFVRBtMKFC3EdzpDQ6jkazFO19po6BF8xHiNGZIo,15111
 sqlglot/jsonpath.py,sha256=dKdI3PNINNGimmSse2IIv-GbPN_3lXncXh_70QH7Lss,7664
 sqlglot/lineage.py,sha256=kXBDSErmZZluZx_kkrMj4MPEOAbkvcbX1tbOW7Bpl-U,15303
-sqlglot/parser.py,sha256=TksM9cVq6bbbyM0sgglcOb-p6_1_Xk6EPIS2Buj-048,320530
+sqlglot/parser.py,sha256=IXOPic_GfVXDaNRna9JbxmG-l2FjxZTIlV0wtWvWnqM,323926
 sqlglot/planner.py,sha256=ql7Li-bWJRcyXzNaZy_n6bQ6B2ZfunEIB8Ztv2xaxq4,14634
 sqlglot/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlglot/schema.py,sha256=13H2qKQs27EKdTpDLOvcNnSTDAUbYNKjWtJs4aQCSOA,20509
@@ -19,15 +19,15 @@ sqlglot/tokens.py,sha256=R0B8GQSbQ9GoDc0NlaT5Tc8RjgEOx2IYIkYU5rY8Rg8,48742
 sqlglot/transforms.py,sha256=3jpbHeVTLK9hmQi5f3_vmK-5jZB32_ittCkO7poxCs4,40631
 sqlglot/trie.py,sha256=v27uXMrHfqrXlJ6GmeTSMovsB_3o0ctnlKhdNt7W6fI,2245
 sqlglot/dialects/__init__.py,sha256=aZTLpe2SwgWqiVrRabmfV8TVLPVHFydGwb_zhcVhRss,3499
-sqlglot/dialects/athena.py,sha256=xjy75ej0T3douCUfFKhE1I3kqvPEuQY29x24WG1--Vw,6307
+sqlglot/dialects/athena.py,sha256=gPE9ybRcbd6dVa1mrTFB_eVjsjQG36hErq5EpHyQmXo,6344
 sqlglot/dialects/bigquery.py,sha256=PIRhlNIj6I5iXPxR2_9q1OWXvy4ovVB_ae5qe8SWV80,52713
 sqlglot/dialects/clickhouse.py,sha256=0ahX0zjIwN9-RzfNyITBHs9PsgQXjL0uMRlRgYz9crI,56520
 sqlglot/dialects/databricks.py,sha256=8PoaiP8PfiBjpheRiua-rO_HzX2TRUXqc3DnlQ8zYrg,4481
-sqlglot/dialects/dialect.py,sha256=uuek7l3vUf8OB987UUxzNqdsZdrSj1TtmImVyxbI7Go,68463
+sqlglot/dialects/dialect.py,sha256=uiRHCJ2pjIea3EnRXhizNni1o-d31X02CRBuvXXne7U,68529
 sqlglot/dialects/doris.py,sha256=eC7Ct-iz7p4Usz659NkelUFhm-GmVolIZy5uaBvgjaA,14397
 sqlglot/dialects/drill.py,sha256=FOh7_KjPx_77pv0DiHKZog0CcmzqeF9_PEmGnJ1ESSM,5825
 sqlglot/dialects/druid.py,sha256=kh3snZtneehNOWqs3XcPjsrhNaRbkCQ8E4hHbWJ1fHM,690
-sqlglot/dialects/duckdb.py,sha256=alEYXBW5uUApRC8IRYnsapeiJq7JJwUmrK18C56RYsg,47780
+sqlglot/dialects/duckdb.py,sha256=rARz845jDTzx8WUncAYHZeoBcVi7WvIJlGbjnNHaxZM,47965
 sqlglot/dialects/dune.py,sha256=gALut-fFfN2qMsr8LvZ1NQK3F3W9z2f4PwMvTMXVVVg,375
 sqlglot/dialects/hive.py,sha256=PO6DLT1kHL-U2kFfV1CsNgQFT7A32LuGN71gnTXEOfY,31728
 sqlglot/dialects/materialize.py,sha256=_DPLPt8YrdQIIXNrGJw1IMcGOoAEJ9NO9X9pDfy4hxs,3494
@@ -38,7 +38,7 @@ sqlglot/dialects/presto.py,sha256=ltKbQ44efeq1HM0T8Qq0rsBSx6B6bF9RoKtUBVeoz70,33
 sqlglot/dialects/prql.py,sha256=OF2LfDb4uzKIF7kpCfpL5G7VP1pnzLbjfW5QFUnuPvo,7803
 sqlglot/dialects/redshift.py,sha256=H8H8lGizHIAd4qLoPeFchyiGZKO1I8U_B058woukuGw,15366
 sqlglot/dialects/risingwave.py,sha256=hwEOPjMw0ZM_3fjQcBUE00oy6I8V6mzYOOYmcwwS8mw,2898
-sqlglot/dialects/snowflake.py,sha256=m4Gekw4NhoD3q4WF1TJhetRmmwkh8XG9Rqq8mL3P31E,61761
+sqlglot/dialects/snowflake.py,sha256=dP5o1sH0q5UDMxPoI5vYp1_2FQyBU7VbeYdxF1HVyEs,63398
 sqlglot/dialects/spark.py,sha256=fbmiTKAQiKqG9yE_HAxYGgQiOjdxB9tJyjOtgdqF100,7645
 sqlglot/dialects/spark2.py,sha256=8er7nHDm5Wc57m9AOxKN0sd_DVzbhAL44H_udlFh9O8,14258
 sqlglot/dialects/sqlite.py,sha256=UzJwIdY1PsLArMxNt5lKvk8COHvXeo4FoqW41LqVmM8,12440
@@ -46,14 +46,14 @@ sqlglot/dialects/starrocks.py,sha256=fHNgvq5Nz7dI4QUWCTOO5VDOYjasBxRRlcg9TbY0UZE
 sqlglot/dialects/tableau.py,sha256=oIawDzUITxGCWaEMB8OaNMPWhbC3U-2y09pYPm4eazc,2190
 sqlglot/dialects/teradata.py,sha256=xWa-9kSTsT-eM1NePi_oIM1dPHmXW89GLU5Uda3_6Ao,14036
 sqlglot/dialects/trino.py,sha256=wgLsiX1NQvjGny_rgrU1e2r6kK1LD0KgaSdIDrYmjD0,4285
-sqlglot/dialects/tsql.py,sha256=Kpakg5NXC2Gwzr8Su2Uotmi2Bmc2dUe8T2u7aeNe-us,54376
+sqlglot/dialects/tsql.py,sha256=kMa8hYAXp3D2-g4HzkuzHDsWeXU1WgbyZm2sNl2a8rE,54397
 sqlglot/executor/__init__.py,sha256=FslewzYQtQdDNg_0Ju2UaiP4vo4IMUgkfkmFsYUhcN0,2958
 sqlglot/executor/context.py,sha256=WJHJdYQCOeVXwLw0uSSrWSc25eBMn5Ix108RCvdsKRQ,3386
 sqlglot/executor/env.py,sha256=tQhU5PpTBMcxgZIFddFqxWMNPtHN0vOOz72voncY3KY,8276
 sqlglot/executor/python.py,sha256=09GYRzrPn3lZGfDJY9pbONOvmYxsRyeSWjUiqkSRHGo,16661
 sqlglot/executor/table.py,sha256=xkuJlgLVNYUXsSUaX0zTcnFekldXLLU8LqDyjR5K9wY,4419
 sqlglot/optimizer/__init__.py,sha256=FdAvVz6rQLLkiiH21-SD4RxB5zS3WDeU-s03PZkJ-F4,343
-sqlglot/optimizer/annotate_types.py,sha256=RxplZctzmrtTbAX2YoU6T-rVHqbyl4clRRisIb0iwOQ,22278
+sqlglot/optimizer/annotate_types.py,sha256=-JkNgc5R1jYh130D8lGv5nYSmPddv4Naf3BZiD5ZuTs,24137
 sqlglot/optimizer/canonicalize.py,sha256=RJpUbWDudjknRMtO_Kf8MGZ5Hv1twpPWac2u5kpV4Vw,7719
 sqlglot/optimizer/eliminate_ctes.py,sha256=fUBM0RUnPrm2sYptEWBux98B7fcx7W-BM1zVqfgDz9c,1448
 sqlglot/optimizer/eliminate_joins.py,sha256=5Whliegc7U8BnS6tlrl9wkeAgyP1NpgCCAPxChHzFfw,5874
@@ -67,13 +67,13 @@ sqlglot/optimizer/optimizer.py,sha256=vXEXDWHvbO-vJmSI7UqJuydM2WrD1xko7rETq2EtVJ
 sqlglot/optimizer/pushdown_predicates.py,sha256=H4lFc9Dsds8W7FOsE4wbK6PHJBu6SjgQU7mVtl4laps,8357
 sqlglot/optimizer/pushdown_projections.py,sha256=7NoK5NAUVYVhs0YnYyo6WuXfaO-BShSwS6lA8Y-ATQ4,6668
 sqlglot/optimizer/qualify.py,sha256=oAPfwub7dEkrlCrsptcJWpLya4BgKhN6M5SwIs_86LY,4002
-sqlglot/optimizer/qualify_columns.py,sha256=X2Iydssan_Fw84cd-mrzqxG3eRfRdpP6HVRofSbfHlg,40515
+sqlglot/optimizer/qualify_columns.py,sha256=77aScPakXYaiagnoCWk2qwMxlKuRGsFTAK9sOQuR2vY,40872
 sqlglot/optimizer/qualify_tables.py,sha256=5f5enBAh-bpNB9ewF97W9fx9h1TGXj1Ih5fncvH42sY,6486
-sqlglot/optimizer/scope.py,sha256=lZWJsR1k-vx1VdxOn0yvbF_LcviXbK357WlrgOLXGEs,30123
+sqlglot/optimizer/scope.py,sha256=r-2PaO7-woaIWaWrKC88J9eTgdQardNYQ1rIXXaPr1w,30501
 sqlglot/optimizer/simplify.py,sha256=S0Blqg5Mq2KRRWhWz-Eivch9sBjBhg9fRJA6EdBzj2g,50704
 sqlglot/optimizer/unnest_subqueries.py,sha256=kzWUVDlxs8z9nmRx-8U-pHXPtVZhEIwkKqmKhr2QLvc,10908
-sqlglot-26.28.1.dist-info/licenses/LICENSE,sha256=AI3__mHZfOtzY3EluR_pIYBm3_pE7TbVx7qaHxoZ114,1065
-sqlglot-26.28.1.dist-info/METADATA,sha256=ElrNZkPPdEmAmU1gVJgndWkFCWlhnYqLLkGB4562Bd4,20732
-sqlglot-26.28.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-sqlglot-26.28.1.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
-sqlglot-26.28.1.dist-info/RECORD,,
+sqlglot-26.29.0.dist-info/licenses/LICENSE,sha256=AI3__mHZfOtzY3EluR_pIYBm3_pE7TbVx7qaHxoZ114,1065
+sqlglot-26.29.0.dist-info/METADATA,sha256=rc1ouFaDp1lgiQ2W3jRFM2VNj7RUrf2drwij1_rajpg,20732
+sqlglot-26.29.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+sqlglot-26.29.0.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
+sqlglot-26.29.0.dist-info/RECORD,,

{sqlglot-26.28.1.dist-info → sqlglot-26.29.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sqlglot-26.28.1.dist-info → sqlglot-26.29.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{sqlglot-26.28.1.dist-info → sqlglot-26.29.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

sqlglot 26.28.1__py3-none-any.whl → 26.29.0__py3-none-any.whl

sqlglot 26.28.1py3-none-any.whl → 26.29.0py3-none-any.whl