sqlglot 26.28.1__py3-none-any.whl → 26.29.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlglot/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '26.28.1'
21
- __version_tuple__ = version_tuple = (26, 28, 1)
20
+ __version__ = version = '26.29.0'
21
+ __version_tuple__ = version_tuple = (26, 29, 0)
@@ -108,6 +108,7 @@ class Athena(Trino):
108
108
  """
109
109
 
110
110
  IDENTIFIERS = ['"', "`"]
111
+ STRING_ESCAPES = ["'", "\\"]
111
112
  KEYWORDS = {
112
113
  **Hive.Tokenizer.KEYWORDS,
113
114
  **Trino.Tokenizer.KEYWORDS,
@@ -1621,7 +1621,10 @@ def map_date_part(part, dialect: DialectType = Dialect):
1621
1621
  mapped = (
1622
1622
  Dialect.get_or_raise(dialect).DATE_PART_MAPPING.get(part.name.upper()) if part else None
1623
1623
  )
1624
- return exp.var(mapped) if mapped else part
1624
+ if mapped:
1625
+ return exp.Literal.string(mapped) if part.is_string else exp.var(mapped)
1626
+
1627
+ return part
1625
1628
 
1626
1629
 
1627
1630
  def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str:
@@ -290,6 +290,12 @@ class DuckDB(Dialect):
290
290
  # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table
291
291
  NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
292
292
 
293
+ DATE_PART_MAPPING = {
294
+ **Dialect.DATE_PART_MAPPING,
295
+ "DAYOFWEEKISO": "ISODOW",
296
+ }
297
+ DATE_PART_MAPPING.pop("WEEKDAY")
298
+
293
299
  def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
294
300
  if isinstance(path, exp.Literal):
295
301
  # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`.
@@ -620,6 +626,7 @@ class DuckDB(Dialect):
620
626
  PAD_FILL_PATTERN_IS_REQUIRED = True
621
627
  ARRAY_CONCAT_IS_VAR_LEN = False
622
628
  ARRAY_SIZE_DIM_REQUIRED = False
629
+ NORMALIZE_EXTRACT_DATE_PARTS = True
623
630
 
624
631
  TRANSFORMS = {
625
632
  **generator.Generator.TRANSFORMS,
@@ -31,6 +31,7 @@ from sqlglot.dialects.dialect import (
31
31
  )
32
32
  from sqlglot.generator import unsupported_args
33
33
  from sqlglot.helper import flatten, is_float, is_int, seq_get
34
+ from sqlglot.optimizer.scope import find_all_in_scope
34
35
  from sqlglot.tokens import TokenType
35
36
 
36
37
  if t.TYPE_CHECKING:
@@ -333,6 +334,34 @@ def _json_extract_value_array_sql(
333
334
  return self.func("TRANSFORM", json_extract, transform_lambda)
334
335
 
335
336
 
337
+ def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression:
338
+ if isinstance(expression, exp.Select):
339
+ # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations
340
+ # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced
341
+ # by Snowflake's parser.
342
+ #
343
+ # Additionally, at the time of writing this, BigQuery is the only dialect that produces a
344
+ # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`.
345
+ unnest_aliases = set()
346
+ for unnest in find_all_in_scope(expression, exp.Unnest):
347
+ unnest_alias = unnest.args.get("alias")
348
+ if (
349
+ isinstance(unnest_alias, exp.TableAlias)
350
+ and not unnest_alias.this
351
+ and len(unnest_alias.columns) == 1
352
+ ):
353
+ unnest_aliases.add(unnest_alias.columns[0].name)
354
+
355
+ if unnest_aliases:
356
+ for c in find_all_in_scope(expression, exp.Column):
357
+ if c.table in unnest_aliases:
358
+ bracket_lhs = c.args["table"]
359
+ bracket_rhs = exp.Literal.string(c.name)
360
+ c.replace(exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]))
361
+
362
+ return expression
363
+
364
+
336
365
  class Snowflake(Dialect):
337
366
  # https://docs.snowflake.com/en/sql-reference/identifiers-syntax
338
367
  NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE
@@ -1096,6 +1125,7 @@ class Snowflake(Dialect):
1096
1125
  transforms.explode_projection_to_unnest(),
1097
1126
  transforms.eliminate_semi_and_anti_joins,
1098
1127
  _transform_generate_date_array,
1128
+ _eliminate_dot_variant_lookup,
1099
1129
  ]
1100
1130
  ),
1101
1131
  exp.SHA: rename_func("SHA1"),
@@ -1314,7 +1344,14 @@ class Snowflake(Dialect):
1314
1344
  start = f" START {start}" if start else ""
1315
1345
  increment = expression.args.get("increment")
1316
1346
  increment = f" INCREMENT {increment}" if increment else ""
1317
- return f"AUTOINCREMENT{start}{increment}"
1347
+
1348
+ order = expression.args.get("order")
1349
+ if order is not None:
1350
+ order_clause = " ORDER" if order else " NOORDER"
1351
+ else:
1352
+ order_clause = ""
1353
+
1354
+ return f"AUTOINCREMENT{start}{increment}{order_clause}"
1318
1355
 
1319
1356
  def cluster_sql(self, expression: exp.Cluster) -> str:
1320
1357
  return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
sqlglot/dialects/tsql.py CHANGED
@@ -1224,8 +1224,6 @@ class TSQL(Dialect):
1224
1224
  # to amend the AST by moving the CTEs to the CREATE VIEW statement's query.
1225
1225
  ctas_expression.set("with", with_.pop())
1226
1226
 
1227
- sql = super().create_sql(expression)
1228
-
1229
1227
  table = expression.find(exp.Table)
1230
1228
 
1231
1229
  # Convert CTAS statement to SELECT .. INTO ..
@@ -1243,6 +1241,8 @@ class TSQL(Dialect):
1243
1241
  select_into.limit(0, copy=False)
1244
1242
 
1245
1243
  sql = self.sql(select_into)
1244
+ else:
1245
+ sql = super().create_sql(expression)
1246
1246
 
1247
1247
  if exists:
1248
1248
  identifier = self.sql(exp.Literal.string(exp.table_name(table) if table else ""))
sqlglot/expressions.py CHANGED
@@ -1947,6 +1947,7 @@ class GeneratedAsIdentityColumnConstraint(ColumnConstraintKind):
1947
1947
  "minvalue": False,
1948
1948
  "maxvalue": False,
1949
1949
  "cycle": False,
1950
+ "order": False,
1950
1951
  }
1951
1952
 
1952
1953
 
@@ -7044,6 +7045,12 @@ class Semicolon(Expression):
7044
7045
  arg_types = {}
7045
7046
 
7046
7047
 
7048
+ # BigQuery allows SELECT t FROM t and treats the projection as a struct value. This expression
7049
+ # type is intended to be constructed by qualify so that we can properly annotate its type later
7050
+ class TableColumn(Expression):
7051
+ pass
7052
+
7053
+
7047
7054
  def _norm_arg(arg):
7048
7055
  return arg.lower() if type(arg) is str else arg
7049
7056
 
sqlglot/generator.py CHANGED
@@ -201,6 +201,7 @@ class Generator(metaclass=_Generator):
201
201
  exp.StreamingTableProperty: lambda *_: "STREAMING",
202
202
  exp.StrictProperty: lambda *_: "STRICT",
203
203
  exp.SwapTable: lambda self, e: f"SWAP WITH {self.sql(e, 'this')}",
204
+ exp.TableColumn: lambda self, e: self.sql(e.this),
204
205
  exp.Tags: lambda self, e: f"TAG ({self.expressions(e, flat=True)})",
205
206
  exp.TemporaryProperty: lambda *_: "TEMPORARY",
206
207
  exp.TitleColumnConstraint: lambda self, e: f"TITLE {self.sql(e, 'this')}",
@@ -463,6 +464,11 @@ class Generator(metaclass=_Generator):
463
464
  # Whether to wrap <props> in `AlterSet`, e.g., ALTER ... SET (<props>)
464
465
  ALTER_SET_WRAPPED = False
465
466
 
467
+ # Whether to normalize the date parts in EXTRACT(<date_part> FROM <expr>) into a common representation
468
+ # For instance, to extract the day of week in ISO semantics, one can use ISODOW, DAYOFWEEKISO etc depending on the dialect.
469
+ # TODO: The normalization should be done by default once we've tested it across all dialects.
470
+ NORMALIZE_EXTRACT_DATE_PARTS = False
471
+
466
472
  # The name to generate for the JSONPath expression. If `None`, only `this` will be generated
467
473
  PARSE_JSON_NAME: t.Optional[str] = "PARSE_JSON"
468
474
 
@@ -2909,9 +2915,17 @@ class Generator(metaclass=_Generator):
2909
2915
  return f"NEXT VALUE FOR {self.sql(expression, 'this')}{order}"
2910
2916
 
2911
2917
  def extract_sql(self, expression: exp.Extract) -> str:
2912
- this = self.sql(expression, "this") if self.EXTRACT_ALLOWS_QUOTES else expression.this.name
2918
+ from sqlglot.dialects.dialect import map_date_part
2919
+
2920
+ this = (
2921
+ map_date_part(expression.this, self.dialect)
2922
+ if self.NORMALIZE_EXTRACT_DATE_PARTS
2923
+ else expression.this
2924
+ )
2925
+ this_sql = self.sql(this) if self.EXTRACT_ALLOWS_QUOTES else this.name
2913
2926
  expression_sql = self.sql(expression, "expression")
2914
- return f"EXTRACT({this} FROM {expression_sql})"
2927
+
2928
+ return f"EXTRACT({this_sql} FROM {expression_sql})"
2915
2929
 
2916
2930
  def trim_sql(self, expression: exp.Trim) -> str:
2917
2931
  trim_type = self.sql(expression, "position")
@@ -4766,7 +4780,10 @@ class Generator(metaclass=_Generator):
4766
4780
 
4767
4781
  def detach_sql(self, expression: exp.Detach) -> str:
4768
4782
  this = self.sql(expression, "this")
4769
- exists_sql = " IF EXISTS" if expression.args.get("exists") else ""
4783
+ # the DATABASE keyword is required if IF EXISTS is set
4784
+ # without it, DuckDB throws an error: Parser Error: syntax error at or near "exists" (Line Number: 1)
4785
+ # ref: https://duckdb.org/docs/stable/sql/statements/attach.html#detach-syntax
4786
+ exists_sql = " DATABASE IF EXISTS" if expression.args.get("exists") else ""
4770
4787
 
4771
4788
  return f"DETACH{exists_sql} {this}"
4772
4789
 
@@ -12,7 +12,7 @@ from sqlglot.helper import (
12
12
  seq_get,
13
13
  )
14
14
  from sqlglot.optimizer.scope import Scope, traverse_scope
15
- from sqlglot.schema import Schema, ensure_schema
15
+ from sqlglot.schema import MappingSchema, Schema, ensure_schema
16
16
  from sqlglot.dialects.dialect import Dialect
17
17
 
18
18
  if t.TYPE_CHECKING:
@@ -290,9 +290,52 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
290
290
  elif isinstance(source.expression, exp.Unnest):
291
291
  self._set_type(col, source.expression.type)
292
292
 
293
+ if isinstance(self.schema, MappingSchema):
294
+ for table_column in scope.table_columns:
295
+ source = scope.sources.get(table_column.name)
296
+
297
+ if isinstance(source, exp.Table):
298
+ schema = self.schema.find(
299
+ source, raise_on_missing=False, ensure_data_types=True
300
+ )
301
+ if not isinstance(schema, dict):
302
+ continue
303
+
304
+ struct_type = exp.DataType(
305
+ this=exp.DataType.Type.STRUCT,
306
+ expressions=[
307
+ exp.ColumnDef(this=exp.to_identifier(c), kind=kind)
308
+ for c, kind in schema.items()
309
+ ],
310
+ nested=True,
311
+ )
312
+ self._set_type(table_column, struct_type)
313
+ elif (
314
+ isinstance(source, Scope)
315
+ and isinstance(source.expression, exp.Query)
316
+ and source.expression.is_type(exp.DataType.Type.STRUCT)
317
+ ):
318
+ self._set_type(table_column, source.expression.type)
319
+
293
320
  # Then (possibly) annotate the remaining expressions in the scope
294
321
  self._maybe_annotate(scope.expression)
295
322
 
323
+ if self.schema.dialect == "bigquery" and isinstance(scope.expression, exp.Query):
324
+ struct_type = exp.DataType(
325
+ this=exp.DataType.Type.STRUCT,
326
+ expressions=[
327
+ exp.ColumnDef(this=exp.to_identifier(select.output_name), kind=select.type)
328
+ for select in scope.expression.selects
329
+ ],
330
+ nested=True,
331
+ )
332
+ if not any(
333
+ cd.kind.is_type(exp.DataType.Type.UNKNOWN)
334
+ for cd in struct_type.expressions
335
+ if cd.kind
336
+ ):
337
+ self._set_type(scope.expression, struct_type)
338
+
296
339
  def _maybe_annotate(self, expression: E) -> E:
297
340
  if id(expression) in self._visited:
298
341
  return expression # We've already inferred the expression's type
@@ -529,6 +529,13 @@ def _qualify_columns(scope: Scope, resolver: Resolver, allow_partial_qualificati
529
529
  column_table = resolver.get_table(column_name)
530
530
  if column_table:
531
531
  column.set("table", column_table)
532
+ elif (
533
+ resolver.schema.dialect == "bigquery"
534
+ and len(column.parts) == 1
535
+ and column_name in scope.selected_sources
536
+ ):
537
+ # BigQuery allows tables to be referenced as columns, treating them as structs
538
+ scope.replace(column, exp.TableColumn(this=column.this))
532
539
 
533
540
  for pivot in scope.pivots:
534
541
  for column in pivot.find_all(exp.Column):
@@ -88,6 +88,7 @@ class Scope:
88
88
  def clear_cache(self):
89
89
  self._collected = False
90
90
  self._raw_columns = None
91
+ self._table_columns = None
91
92
  self._stars = None
92
93
  self._derived_tables = None
93
94
  self._udtfs = None
@@ -125,6 +126,7 @@ class Scope:
125
126
  self._derived_tables = []
126
127
  self._udtfs = []
127
128
  self._raw_columns = []
129
+ self._table_columns = []
128
130
  self._stars = []
129
131
  self._join_hints = []
130
132
  self._semi_anti_join_tables = set()
@@ -156,6 +158,8 @@ class Scope:
156
158
  self._derived_tables.append(node)
157
159
  elif isinstance(node, exp.UNWRAPPED_QUERIES):
158
160
  self._subqueries.append(node)
161
+ elif isinstance(node, exp.TableColumn):
162
+ self._table_columns.append(node)
159
163
 
160
164
  self._collected = True
161
165
 
@@ -309,6 +313,13 @@ class Scope:
309
313
 
310
314
  return self._columns
311
315
 
316
+ @property
317
+ def table_columns(self):
318
+ if self._table_columns is None:
319
+ self._ensure_collected()
320
+
321
+ return self._table_columns
322
+
312
323
  @property
313
324
  def selected_sources(self):
314
325
  """
@@ -849,12 +860,14 @@ def walk_in_scope(expression, bfs=True, prune=None):
849
860
 
850
861
  if node is expression:
851
862
  continue
863
+
852
864
  if (
853
865
  isinstance(node, exp.CTE)
854
866
  or (
855
867
  isinstance(node.parent, (exp.From, exp.Join, exp.Subquery))
856
- and (_is_derived_table(node) or isinstance(node, exp.UDTF))
868
+ and _is_derived_table(node)
857
869
  )
870
+ or (isinstance(node.parent, exp.UDTF) and isinstance(node, exp.Query))
858
871
  or isinstance(node, exp.UNWRAPPED_QUERIES)
859
872
  ):
860
873
  crossed_scope_boundary = True
sqlglot/parser.py CHANGED
@@ -931,15 +931,22 @@ class Parser(metaclass=_Parser):
931
931
  }
932
932
 
933
933
  PIPE_SYNTAX_TRANSFORM_PARSERS = {
934
- "SELECT": lambda self, query: self._parse_pipe_syntax_select(query),
935
- "WHERE": lambda self, query: query.where(self._parse_where(), copy=False),
934
+ "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query),
935
+ "AS": lambda self, query: self._build_pipe_cte(
936
+ query, [exp.Star()], self._parse_table_alias()
937
+ ),
938
+ "DROP": lambda self, query: self._parse_pipe_syntax_drop(query),
939
+ "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query),
940
+ "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query),
936
941
  "ORDER BY": lambda self, query: query.order_by(
937
942
  self._parse_order(), append=False, copy=False
938
943
  ),
939
- "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query),
940
- "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query),
941
944
  "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query),
945
+ "SELECT": lambda self, query: self._parse_pipe_syntax_select(query),
946
+ "SET": lambda self, query: self._parse_pipe_syntax_set(query),
947
+ "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query),
942
948
  "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query),
949
+ "WHERE": lambda self, query: query.where(self._parse_where(), copy=False),
943
950
  }
944
951
 
945
952
  PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
@@ -3252,11 +3259,9 @@ class Parser(metaclass=_Parser):
3252
3259
  elif self._match(TokenType.VALUES, advance=False):
3253
3260
  this = self._parse_derived_table_values()
3254
3261
  elif from_:
3255
- if self._match(TokenType.PIPE_GT, advance=False):
3256
- return self._parse_pipe_syntax_query(
3257
- exp.Select().from_(from_.this, append=False, copy=False)
3258
- )
3259
3262
  this = exp.select("*").from_(from_.this, copy=False)
3263
+ if self._match(TokenType.PIPE_GT, advance=False):
3264
+ return self._parse_pipe_syntax_query(this)
3260
3265
  elif self._match(TokenType.SUMMARIZE):
3261
3266
  table = self._match(TokenType.TABLE)
3262
3267
  this = self._parse_select() or self._parse_string() or self._parse_table()
@@ -5543,6 +5548,37 @@ class Parser(metaclass=_Parser):
5543
5548
 
5544
5549
  return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this
5545
5550
 
5551
+ def _parse_paren(self) -> t.Optional[exp.Expression]:
5552
+ if not self._match(TokenType.L_PAREN):
5553
+ return None
5554
+
5555
+ comments = self._prev_comments
5556
+ query = self._parse_select()
5557
+
5558
+ if query:
5559
+ expressions = [query]
5560
+ else:
5561
+ expressions = self._parse_expressions()
5562
+
5563
+ this = self._parse_query_modifiers(seq_get(expressions, 0))
5564
+
5565
+ if not this and self._match(TokenType.R_PAREN, advance=False):
5566
+ this = self.expression(exp.Tuple)
5567
+ elif isinstance(this, exp.UNWRAPPED_QUERIES):
5568
+ this = self._parse_subquery(this=this, parse_alias=False)
5569
+ elif isinstance(this, exp.Subquery):
5570
+ this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False)
5571
+ elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA:
5572
+ this = self.expression(exp.Tuple, expressions=expressions)
5573
+ else:
5574
+ this = self.expression(exp.Paren, this=this)
5575
+
5576
+ if this:
5577
+ this.add_comments(comments)
5578
+
5579
+ self._match_r_paren(expression=this)
5580
+ return this
5581
+
5546
5582
  def _parse_primary(self) -> t.Optional[exp.Expression]:
5547
5583
  if self._match_set(self.PRIMARY_PARSERS):
5548
5584
  token_type = self._prev.token_type
@@ -5561,37 +5597,7 @@ class Parser(metaclass=_Parser):
5561
5597
  if self._match_pair(TokenType.DOT, TokenType.NUMBER):
5562
5598
  return exp.Literal.number(f"0.{self._prev.text}")
5563
5599
 
5564
- if self._match(TokenType.L_PAREN):
5565
- comments = self._prev_comments
5566
- query = self._parse_select()
5567
-
5568
- if query:
5569
- expressions = [query]
5570
- else:
5571
- expressions = self._parse_expressions()
5572
-
5573
- this = self._parse_query_modifiers(seq_get(expressions, 0))
5574
-
5575
- if not this and self._match(TokenType.R_PAREN, advance=False):
5576
- this = self.expression(exp.Tuple)
5577
- elif isinstance(this, exp.UNWRAPPED_QUERIES):
5578
- this = self._parse_subquery(this=this, parse_alias=False)
5579
- elif isinstance(this, exp.Subquery):
5580
- this = self._parse_subquery(
5581
- this=self._parse_set_operations(this), parse_alias=False
5582
- )
5583
- elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA:
5584
- this = self.expression(exp.Tuple, expressions=expressions)
5585
- else:
5586
- this = self.expression(exp.Paren, this=this)
5587
-
5588
- if this:
5589
- this.add_comments(comments)
5590
-
5591
- self._match_r_paren(expression=this)
5592
- return this
5593
-
5594
- return None
5600
+ return self._parse_paren()
5595
5601
 
5596
5602
  def _parse_field(
5597
5603
  self,
@@ -5913,6 +5919,7 @@ class Parser(metaclass=_Parser):
5913
5919
  ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint:
5914
5920
  start = None
5915
5921
  increment = None
5922
+ order = None
5916
5923
 
5917
5924
  if self._match(TokenType.L_PAREN, advance=False):
5918
5925
  args = self._parse_wrapped_csv(self._parse_bitwise)
@@ -5922,10 +5929,14 @@ class Parser(metaclass=_Parser):
5922
5929
  start = self._parse_bitwise()
5923
5930
  self._match_text_seq("INCREMENT")
5924
5931
  increment = self._parse_bitwise()
5932
+ if self._match_text_seq("ORDER"):
5933
+ order = True
5934
+ elif self._match_text_seq("NOORDER"):
5935
+ order = False
5925
5936
 
5926
5937
  if start and increment:
5927
5938
  return exp.GeneratedAsIdentityColumnConstraint(
5928
- start=start, increment=increment, this=False
5939
+ start=start, increment=increment, this=False, order=order
5929
5940
  )
5930
5941
 
5931
5942
  return exp.AutoIncrementColumnConstraint()
@@ -8328,12 +8339,18 @@ class Parser(metaclass=_Parser):
8328
8339
  expression.update_positions(token)
8329
8340
  return expression
8330
8341
 
8331
- def _build_pipe_cte(self, query: exp.Query, expressions: t.List[exp.Expression]) -> exp.Select:
8332
- if not query.selects:
8333
- query = query.select("*", copy=False)
8334
-
8335
- self._pipe_cte_counter += 1
8336
- new_cte = f"__tmp{self._pipe_cte_counter}"
8342
+ def _build_pipe_cte(
8343
+ self,
8344
+ query: exp.Query,
8345
+ expressions: t.List[exp.Expression],
8346
+ alias_cte: t.Optional[exp.TableAlias] = None,
8347
+ ) -> exp.Select:
8348
+ new_cte: t.Optional[t.Union[str, exp.TableAlias]]
8349
+ if alias_cte:
8350
+ new_cte = alias_cte
8351
+ else:
8352
+ self._pipe_cte_counter += 1
8353
+ new_cte = f"__tmp{self._pipe_cte_counter}"
8337
8354
 
8338
8355
  with_ = query.args.get("with")
8339
8356
  ctes = with_.pop() if with_ else None
@@ -8344,15 +8361,34 @@ class Parser(metaclass=_Parser):
8344
8361
 
8345
8362
  return new_select.with_(new_cte, as_=query, copy=False)
8346
8363
 
8364
+ def _build_pipe_ctes(
8365
+ self,
8366
+ query: exp.Select,
8367
+ expressions: t.List[exp.Expression],
8368
+ alias_cte: t.Optional[exp.TableAlias] = None,
8369
+ ) -> exp.Select:
8370
+ select = query.selects[0].assert_is(exp.Star)
8371
+ if select.args.get("except") or select.args.get("replace"):
8372
+ query = self._build_pipe_cte(
8373
+ query=query.select(
8374
+ *[expr for expr in expressions if not expr.is_star and expr.args.get("alias")],
8375
+ copy=False,
8376
+ ),
8377
+ expressions=[
8378
+ projection.args.get("alias", projection) for projection in expressions
8379
+ ],
8380
+ )
8381
+ else:
8382
+ query.select(*expressions, append=False, copy=False)
8383
+
8384
+ return self._build_pipe_cte(query=query, expressions=[exp.Star()], alias_cte=alias_cte)
8385
+
8347
8386
  def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select:
8348
8387
  select = self._parse_select()
8349
8388
  if not select:
8350
8389
  return query
8351
8390
 
8352
- if not query.selects:
8353
- return self._build_pipe_cte(query.select(*select.expressions), [exp.Star()])
8354
-
8355
- return self._build_pipe_cte(query, select.expressions)
8391
+ return self._build_pipe_ctes(query=query, expressions=select.expressions)
8356
8392
 
8357
8393
  def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select:
8358
8394
  limit = self._parse_limit()
@@ -8396,12 +8432,12 @@ class Parser(metaclass=_Parser):
8396
8432
  aggregates_or_groups.append(this)
8397
8433
 
8398
8434
  if group_by_exists:
8399
- query = query.select(*aggregates_or_groups, copy=False).group_by(
8435
+ query.select(*aggregates_or_groups, copy=False).group_by(
8400
8436
  *[projection.args.get("alias", projection) for projection in aggregates_or_groups],
8401
8437
  copy=False,
8402
8438
  )
8403
8439
  else:
8404
- query = query.select(*aggregates_or_groups, copy=False)
8440
+ query.select(*aggregates_or_groups, copy=False)
8405
8441
 
8406
8442
  if orders:
8407
8443
  return query.order_by(*orders, append=False, copy=False)
@@ -8417,34 +8453,40 @@ class Parser(metaclass=_Parser):
8417
8453
  ):
8418
8454
  query = self._parse_pipe_syntax_aggregate_group_order_by(query)
8419
8455
 
8420
- return self._build_pipe_cte(query, [exp.Star()])
8456
+ return self._build_pipe_ctes(
8457
+ query=query, expressions=[expr for expr in query.selects if not expr.is_star]
8458
+ )
8421
8459
 
8422
- def _parse_pipe_syntax_set_operator(
8423
- self, query: t.Optional[exp.Query]
8424
- ) -> t.Optional[exp.Select]:
8460
+ def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Select]:
8425
8461
  first_setop = self.parse_set_operation(this=query)
8426
-
8427
- if not first_setop or not query:
8462
+ if not first_setop:
8428
8463
  return None
8429
8464
 
8465
+ def _parse_and_unwrap_query() -> t.Optional[exp.Select]:
8466
+ expr = self._parse_paren()
8467
+ return expr.assert_is(exp.Subquery).unnest() if expr else None
8468
+
8430
8469
  first_setop.this.pop()
8431
- distinct = first_setop.args.pop("distinct")
8432
- setops = [first_setop.expression.pop(), *self._parse_expressions()]
8433
8470
 
8434
- query = self._build_pipe_cte(query, [exp.Star()])
8471
+ setops = [
8472
+ first_setop.expression.pop().assert_is(exp.Subquery).unnest(),
8473
+ *self._parse_csv(_parse_and_unwrap_query),
8474
+ ]
8475
+
8476
+ query = self._build_pipe_cte(query=query, expressions=[exp.Star()])
8435
8477
  with_ = query.args.get("with")
8436
8478
  ctes = with_.pop() if with_ else None
8437
8479
 
8438
8480
  if isinstance(first_setop, exp.Union):
8439
- query = query.union(*setops, distinct=distinct, copy=False, **first_setop.args)
8481
+ query = query.union(*setops, copy=False, **first_setop.args)
8440
8482
  elif isinstance(first_setop, exp.Except):
8441
- query = query.except_(*setops, distinct=distinct, copy=False, **first_setop.args)
8483
+ query = query.except_(*setops, copy=False, **first_setop.args)
8442
8484
  else:
8443
- query = query.intersect(*setops, distinct=distinct, copy=False, **first_setop.args)
8485
+ query = query.intersect(*setops, copy=False, **first_setop.args)
8444
8486
 
8445
8487
  query.set("with", ctes)
8446
8488
 
8447
- return self._build_pipe_cte(query, [exp.Star()])
8489
+ return self._build_pipe_cte(query=query, expressions=[exp.Star()])
8448
8490
 
8449
8491
  def _parse_pipe_syntax_join(self, query: exp.Select) -> t.Optional[exp.Select]:
8450
8492
  join = self._parse_join()
@@ -8462,16 +8504,60 @@ class Parser(metaclass=_Parser):
8462
8504
  if from_:
8463
8505
  from_.this.set("pivots", pivots)
8464
8506
 
8465
- return self._build_pipe_cte(query, [exp.Star()])
8507
+ return self._build_pipe_ctes(query=query, expressions=[exp.Star()])
8508
+
8509
+ def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select:
8510
+ self._match_text_seq("EXTEND")
8511
+ return self._build_pipe_ctes(
8512
+ query=query,
8513
+ expressions=[query.selects[0].assert_is(exp.Star), *self._parse_expressions()],
8514
+ )
8515
+
8516
+ def _parse_pipe_syntax_drop(self, query: exp.Select) -> exp.Select:
8517
+ self._match_text_seq("DROP")
8518
+ dropped_columns = self._parse_csv(self._parse_assignment)
8519
+
8520
+ select = query.selects[0].assert_is(exp.Star)
8521
+ except_ = select.args.get("except") or []
8522
+ select.set("except", [*except_, *dropped_columns])
8523
+
8524
+ return query
8525
+
8526
+ def _parse_pipe_syntax_set(self, query: exp.Select) -> exp.Select:
8527
+ self._match_text_seq("SET")
8528
+ replaced_columns = [
8529
+ self.expression(exp.Alias, this=expr.expression, alias=expr.this)
8530
+ for expr in self._parse_csv(self._parse_assignment)
8531
+ ]
8532
+
8533
+ select = query.selects[0].assert_is(exp.Star)
8534
+ replace_ = select.args.get("replace") or []
8535
+ select.set("replace", [*replace_, *replaced_columns])
8536
+
8537
+ return query
8538
+
8539
+ def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select:
8540
+ sample = self._parse_table_sample()
8541
+
8542
+ with_ = query.args.get("with")
8543
+ if with_:
8544
+ with_.expressions[-1].this.set("sample", sample)
8545
+ else:
8546
+ query.set("sample", sample)
8547
+
8548
+ return query
8466
8549
 
8467
8550
  def _parse_pipe_syntax_query(self, query: exp.Select) -> t.Optional[exp.Select]:
8468
8551
  while self._match(TokenType.PIPE_GT):
8469
8552
  start = self._curr
8470
8553
  parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper())
8471
8554
  if not parser:
8472
- parsed_query = self._parse_pipe_syntax_set_operator(
8473
- query
8474
- ) or self._parse_pipe_syntax_join(query)
8555
+ # The set operators (UNION, etc) and the JOIN operator have a few common starting
8556
+ # keywords, making it tricky to disambiguate them without lookahead. The approach
8557
+ # here is to try and parse a set operation and if that fails, then try to parse a
8558
+ # join operator. If that fails as well, then the operator is not supported.
8559
+ parsed_query = self._parse_pipe_syntax_set_operator(query)
8560
+ parsed_query = parsed_query or self._parse_pipe_syntax_join(query)
8475
8561
  if not parsed_query:
8476
8562
  self._retreat(start)
8477
8563
  self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.")
@@ -8480,7 +8566,4 @@ class Parser(metaclass=_Parser):
8480
8566
  else:
8481
8567
  query = parser(self, query)
8482
8568
 
8483
- if query and not query.selects:
8484
- return query.select("*", copy=False)
8485
-
8486
8569
  return query
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlglot
3
- Version: 26.28.1
3
+ Version: 26.29.0
4
4
  Summary: An easily customizable SQL parser and transpiler
5
5
  Author-email: Toby Mao <toby.mao@gmail.com>
6
6
  License: MIT License
@@ -1,15 +1,15 @@
1
1
  sqlglot/__init__.py,sha256=za08rtdPh2v7dOpGdNomttlIVGgTrKja7rPd6sQwaTg,5391
2
2
  sqlglot/__main__.py,sha256=022c173KqxsiABWTEpUIq_tJUxuNiW7a7ABsxBXqvu8,2069
3
3
  sqlglot/_typing.py,sha256=-1HPyr3w5COlSJWqlgt8jhFk2dyMvBuvVBqIX1wyVCM,642
4
- sqlglot/_version.py,sha256=lTxpjPlB8VNbv3452Opk2GFByRI5SLtHItxl9sne84Q,515
4
+ sqlglot/_version.py,sha256=gOnetX1YzVEd7bBCS3U4KJPt8DHhhNA_iqdIiN8DYk4,515
5
5
  sqlglot/diff.py,sha256=PtOllQMQa1Sw1-V2Y8eypmDqGujXYPaTOp_WLsWkAWk,17314
6
6
  sqlglot/errors.py,sha256=QNKMr-pzLUDR-tuMmn_GK6iMHUIVdb_YSJ_BhGEvuso,2126
7
- sqlglot/expressions.py,sha256=oE7OmkFEstTWoPqM7yCls2I2JNyia8Spr-jVi3n77-A,242992
8
- sqlglot/generator.py,sha256=4iJ0BxkzinmosIhfhb34xjxaFpzw3Zo7fvmknaf5uRs,212432
7
+ sqlglot/expressions.py,sha256=r3WkNufDInSqIoMasryY4W_XUV7DyIFU2G29jglFPqQ,243249
8
+ sqlglot/generator.py,sha256=E1LjyN49nX9XfK-hysHWvpw7-qtws4xeb85sZi5x3M0,213345
9
9
  sqlglot/helper.py,sha256=9nZjFVRBtMKFC3EdzpDQ6jkazFO19po6BF8xHiNGZIo,15111
10
10
  sqlglot/jsonpath.py,sha256=dKdI3PNINNGimmSse2IIv-GbPN_3lXncXh_70QH7Lss,7664
11
11
  sqlglot/lineage.py,sha256=kXBDSErmZZluZx_kkrMj4MPEOAbkvcbX1tbOW7Bpl-U,15303
12
- sqlglot/parser.py,sha256=TksM9cVq6bbbyM0sgglcOb-p6_1_Xk6EPIS2Buj-048,320530
12
+ sqlglot/parser.py,sha256=IXOPic_GfVXDaNRna9JbxmG-l2FjxZTIlV0wtWvWnqM,323926
13
13
  sqlglot/planner.py,sha256=ql7Li-bWJRcyXzNaZy_n6bQ6B2ZfunEIB8Ztv2xaxq4,14634
14
14
  sqlglot/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  sqlglot/schema.py,sha256=13H2qKQs27EKdTpDLOvcNnSTDAUbYNKjWtJs4aQCSOA,20509
@@ -19,15 +19,15 @@ sqlglot/tokens.py,sha256=R0B8GQSbQ9GoDc0NlaT5Tc8RjgEOx2IYIkYU5rY8Rg8,48742
19
19
  sqlglot/transforms.py,sha256=3jpbHeVTLK9hmQi5f3_vmK-5jZB32_ittCkO7poxCs4,40631
20
20
  sqlglot/trie.py,sha256=v27uXMrHfqrXlJ6GmeTSMovsB_3o0ctnlKhdNt7W6fI,2245
21
21
  sqlglot/dialects/__init__.py,sha256=aZTLpe2SwgWqiVrRabmfV8TVLPVHFydGwb_zhcVhRss,3499
22
- sqlglot/dialects/athena.py,sha256=xjy75ej0T3douCUfFKhE1I3kqvPEuQY29x24WG1--Vw,6307
22
+ sqlglot/dialects/athena.py,sha256=gPE9ybRcbd6dVa1mrTFB_eVjsjQG36hErq5EpHyQmXo,6344
23
23
  sqlglot/dialects/bigquery.py,sha256=PIRhlNIj6I5iXPxR2_9q1OWXvy4ovVB_ae5qe8SWV80,52713
24
24
  sqlglot/dialects/clickhouse.py,sha256=0ahX0zjIwN9-RzfNyITBHs9PsgQXjL0uMRlRgYz9crI,56520
25
25
  sqlglot/dialects/databricks.py,sha256=8PoaiP8PfiBjpheRiua-rO_HzX2TRUXqc3DnlQ8zYrg,4481
26
- sqlglot/dialects/dialect.py,sha256=uuek7l3vUf8OB987UUxzNqdsZdrSj1TtmImVyxbI7Go,68463
26
+ sqlglot/dialects/dialect.py,sha256=uiRHCJ2pjIea3EnRXhizNni1o-d31X02CRBuvXXne7U,68529
27
27
  sqlglot/dialects/doris.py,sha256=eC7Ct-iz7p4Usz659NkelUFhm-GmVolIZy5uaBvgjaA,14397
28
28
  sqlglot/dialects/drill.py,sha256=FOh7_KjPx_77pv0DiHKZog0CcmzqeF9_PEmGnJ1ESSM,5825
29
29
  sqlglot/dialects/druid.py,sha256=kh3snZtneehNOWqs3XcPjsrhNaRbkCQ8E4hHbWJ1fHM,690
30
- sqlglot/dialects/duckdb.py,sha256=alEYXBW5uUApRC8IRYnsapeiJq7JJwUmrK18C56RYsg,47780
30
+ sqlglot/dialects/duckdb.py,sha256=rARz845jDTzx8WUncAYHZeoBcVi7WvIJlGbjnNHaxZM,47965
31
31
  sqlglot/dialects/dune.py,sha256=gALut-fFfN2qMsr8LvZ1NQK3F3W9z2f4PwMvTMXVVVg,375
32
32
  sqlglot/dialects/hive.py,sha256=PO6DLT1kHL-U2kFfV1CsNgQFT7A32LuGN71gnTXEOfY,31728
33
33
  sqlglot/dialects/materialize.py,sha256=_DPLPt8YrdQIIXNrGJw1IMcGOoAEJ9NO9X9pDfy4hxs,3494
@@ -38,7 +38,7 @@ sqlglot/dialects/presto.py,sha256=ltKbQ44efeq1HM0T8Qq0rsBSx6B6bF9RoKtUBVeoz70,33
38
38
  sqlglot/dialects/prql.py,sha256=OF2LfDb4uzKIF7kpCfpL5G7VP1pnzLbjfW5QFUnuPvo,7803
39
39
  sqlglot/dialects/redshift.py,sha256=H8H8lGizHIAd4qLoPeFchyiGZKO1I8U_B058woukuGw,15366
40
40
  sqlglot/dialects/risingwave.py,sha256=hwEOPjMw0ZM_3fjQcBUE00oy6I8V6mzYOOYmcwwS8mw,2898
41
- sqlglot/dialects/snowflake.py,sha256=m4Gekw4NhoD3q4WF1TJhetRmmwkh8XG9Rqq8mL3P31E,61761
41
+ sqlglot/dialects/snowflake.py,sha256=dP5o1sH0q5UDMxPoI5vYp1_2FQyBU7VbeYdxF1HVyEs,63398
42
42
  sqlglot/dialects/spark.py,sha256=fbmiTKAQiKqG9yE_HAxYGgQiOjdxB9tJyjOtgdqF100,7645
43
43
  sqlglot/dialects/spark2.py,sha256=8er7nHDm5Wc57m9AOxKN0sd_DVzbhAL44H_udlFh9O8,14258
44
44
  sqlglot/dialects/sqlite.py,sha256=UzJwIdY1PsLArMxNt5lKvk8COHvXeo4FoqW41LqVmM8,12440
@@ -46,14 +46,14 @@ sqlglot/dialects/starrocks.py,sha256=fHNgvq5Nz7dI4QUWCTOO5VDOYjasBxRRlcg9TbY0UZE
46
46
  sqlglot/dialects/tableau.py,sha256=oIawDzUITxGCWaEMB8OaNMPWhbC3U-2y09pYPm4eazc,2190
47
47
  sqlglot/dialects/teradata.py,sha256=xWa-9kSTsT-eM1NePi_oIM1dPHmXW89GLU5Uda3_6Ao,14036
48
48
  sqlglot/dialects/trino.py,sha256=wgLsiX1NQvjGny_rgrU1e2r6kK1LD0KgaSdIDrYmjD0,4285
49
- sqlglot/dialects/tsql.py,sha256=Kpakg5NXC2Gwzr8Su2Uotmi2Bmc2dUe8T2u7aeNe-us,54376
49
+ sqlglot/dialects/tsql.py,sha256=kMa8hYAXp3D2-g4HzkuzHDsWeXU1WgbyZm2sNl2a8rE,54397
50
50
  sqlglot/executor/__init__.py,sha256=FslewzYQtQdDNg_0Ju2UaiP4vo4IMUgkfkmFsYUhcN0,2958
51
51
  sqlglot/executor/context.py,sha256=WJHJdYQCOeVXwLw0uSSrWSc25eBMn5Ix108RCvdsKRQ,3386
52
52
  sqlglot/executor/env.py,sha256=tQhU5PpTBMcxgZIFddFqxWMNPtHN0vOOz72voncY3KY,8276
53
53
  sqlglot/executor/python.py,sha256=09GYRzrPn3lZGfDJY9pbONOvmYxsRyeSWjUiqkSRHGo,16661
54
54
  sqlglot/executor/table.py,sha256=xkuJlgLVNYUXsSUaX0zTcnFekldXLLU8LqDyjR5K9wY,4419
55
55
  sqlglot/optimizer/__init__.py,sha256=FdAvVz6rQLLkiiH21-SD4RxB5zS3WDeU-s03PZkJ-F4,343
56
- sqlglot/optimizer/annotate_types.py,sha256=RxplZctzmrtTbAX2YoU6T-rVHqbyl4clRRisIb0iwOQ,22278
56
+ sqlglot/optimizer/annotate_types.py,sha256=-JkNgc5R1jYh130D8lGv5nYSmPddv4Naf3BZiD5ZuTs,24137
57
57
  sqlglot/optimizer/canonicalize.py,sha256=RJpUbWDudjknRMtO_Kf8MGZ5Hv1twpPWac2u5kpV4Vw,7719
58
58
  sqlglot/optimizer/eliminate_ctes.py,sha256=fUBM0RUnPrm2sYptEWBux98B7fcx7W-BM1zVqfgDz9c,1448
59
59
  sqlglot/optimizer/eliminate_joins.py,sha256=5Whliegc7U8BnS6tlrl9wkeAgyP1NpgCCAPxChHzFfw,5874
@@ -67,13 +67,13 @@ sqlglot/optimizer/optimizer.py,sha256=vXEXDWHvbO-vJmSI7UqJuydM2WrD1xko7rETq2EtVJ
67
67
  sqlglot/optimizer/pushdown_predicates.py,sha256=H4lFc9Dsds8W7FOsE4wbK6PHJBu6SjgQU7mVtl4laps,8357
68
68
  sqlglot/optimizer/pushdown_projections.py,sha256=7NoK5NAUVYVhs0YnYyo6WuXfaO-BShSwS6lA8Y-ATQ4,6668
69
69
  sqlglot/optimizer/qualify.py,sha256=oAPfwub7dEkrlCrsptcJWpLya4BgKhN6M5SwIs_86LY,4002
70
- sqlglot/optimizer/qualify_columns.py,sha256=X2Iydssan_Fw84cd-mrzqxG3eRfRdpP6HVRofSbfHlg,40515
70
+ sqlglot/optimizer/qualify_columns.py,sha256=77aScPakXYaiagnoCWk2qwMxlKuRGsFTAK9sOQuR2vY,40872
71
71
  sqlglot/optimizer/qualify_tables.py,sha256=5f5enBAh-bpNB9ewF97W9fx9h1TGXj1Ih5fncvH42sY,6486
72
- sqlglot/optimizer/scope.py,sha256=lZWJsR1k-vx1VdxOn0yvbF_LcviXbK357WlrgOLXGEs,30123
72
+ sqlglot/optimizer/scope.py,sha256=r-2PaO7-woaIWaWrKC88J9eTgdQardNYQ1rIXXaPr1w,30501
73
73
  sqlglot/optimizer/simplify.py,sha256=S0Blqg5Mq2KRRWhWz-Eivch9sBjBhg9fRJA6EdBzj2g,50704
74
74
  sqlglot/optimizer/unnest_subqueries.py,sha256=kzWUVDlxs8z9nmRx-8U-pHXPtVZhEIwkKqmKhr2QLvc,10908
75
- sqlglot-26.28.1.dist-info/licenses/LICENSE,sha256=AI3__mHZfOtzY3EluR_pIYBm3_pE7TbVx7qaHxoZ114,1065
76
- sqlglot-26.28.1.dist-info/METADATA,sha256=ElrNZkPPdEmAmU1gVJgndWkFCWlhnYqLLkGB4562Bd4,20732
77
- sqlglot-26.28.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
- sqlglot-26.28.1.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
79
- sqlglot-26.28.1.dist-info/RECORD,,
75
+ sqlglot-26.29.0.dist-info/licenses/LICENSE,sha256=AI3__mHZfOtzY3EluR_pIYBm3_pE7TbVx7qaHxoZ114,1065
76
+ sqlglot-26.29.0.dist-info/METADATA,sha256=rc1ouFaDp1lgiQ2W3jRFM2VNj7RUrf2drwij1_rajpg,20732
77
+ sqlglot-26.29.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
+ sqlglot-26.29.0.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
79
+ sqlglot-26.29.0.dist-info/RECORD,,