sqlglot 26.29.0__py3-none-any.whl → 26.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlglot/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '26.29.0'
21
- __version_tuple__ = version_tuple = (26, 29, 0)
20
+ __version__ = version = '26.31.0'
21
+ __version_tuple__ = version_tuple = (26, 31, 0)
@@ -74,6 +74,7 @@ DIALECTS = [
74
74
  "Druid",
75
75
  "DuckDB",
76
76
  "Dune",
77
+ "Fabric",
77
78
  "Hive",
78
79
  "Materialize",
79
80
  "MySQL",
@@ -92,6 +93,7 @@ DIALECTS = [
92
93
  "Teradata",
93
94
  "Trino",
94
95
  "TSQL",
96
+ "Exasol",
95
97
  ]
96
98
 
97
99
  MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}
@@ -524,6 +524,7 @@ class BigQuery(Dialect):
524
524
  PREFIXED_PIVOT_COLUMNS = True
525
525
  LOG_DEFAULTS_TO_LN = True
526
526
  SUPPORTS_IMPLICIT_UNNEST = True
527
+ JOINS_HAVE_EQUAL_PRECEDENCE = True
527
528
 
528
529
  # BigQuery does not allow ASC/DESC to be used as an identifier
529
530
  ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC}
@@ -542,7 +543,7 @@ class BigQuery(Dialect):
542
543
  "DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
543
544
  "DATE_SUB": build_date_delta_with_interval(exp.DateSub),
544
545
  "DATE_TRUNC": lambda args: exp.DateTrunc(
545
- unit=exp.Literal.string(str(seq_get(args, 1))),
546
+ unit=seq_get(args, 1),
546
547
  this=seq_get(args, 0),
547
548
  zone=seq_get(args, 2),
548
549
  ),
@@ -962,9 +963,6 @@ class BigQuery(Dialect):
962
963
  exp.DateSub: date_add_interval_sql("DATE", "SUB"),
963
964
  exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"),
964
965
  exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"),
965
- exp.DateTrunc: lambda self, e: self.func(
966
- "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone")
967
- ),
968
966
  exp.FromTimeZone: lambda self, e: self.func(
969
967
  "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'"
970
968
  ),
@@ -1194,6 +1192,11 @@ class BigQuery(Dialect):
1194
1192
  "within",
1195
1193
  }
1196
1194
 
1195
+ def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
1196
+ unit = expression.unit
1197
+ unit_sql = unit.name if unit.is_string else self.sql(unit)
1198
+ return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone"))
1199
+
1197
1200
  def mod_sql(self, expression: exp.Mod) -> str:
1198
1201
  this = expression.this
1199
1202
  expr = expression.expression
@@ -297,6 +297,7 @@ class ClickHouse(Dialect):
297
297
  MODIFIERS_ATTACHED_TO_SET_OP = False
298
298
  INTERVAL_SPANS = False
299
299
  OPTIONAL_ALIAS_TOKEN_CTE = False
300
+ JOINS_HAVE_EQUAL_PRECEDENCE = True
300
301
 
301
302
  FUNCTIONS = {
302
303
  **parser.Parser.FUNCTIONS,
@@ -691,6 +692,7 @@ class ClickHouse(Dialect):
691
692
  parse_bracket: bool = False,
692
693
  is_db_reference: bool = False,
693
694
  parse_partition: bool = False,
695
+ consume_pipe: bool = False,
694
696
  ) -> t.Optional[exp.Expression]:
695
697
  this = super()._parse_table(
696
698
  schema=schema,
@@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
9
9
  build_date_delta,
10
10
  timestamptrunc_sql,
11
11
  build_formatted_time,
12
+ groupconcat_sql,
12
13
  )
13
14
  from sqlglot.dialects.spark import Spark
14
15
  from sqlglot.tokens import TokenType
@@ -87,6 +88,7 @@ class Databricks(Spark):
87
88
  e.this,
88
89
  ),
89
90
  exp.DatetimeTrunc: timestamptrunc_sql(),
91
+ exp.GroupConcat: groupconcat_sql,
90
92
  exp.Select: transforms.preprocess(
91
93
  [
92
94
  transforms.eliminate_distinct_on,
@@ -77,6 +77,7 @@ class Dialects(str, Enum):
77
77
  DRUID = "druid"
78
78
  DUCKDB = "duckdb"
79
79
  DUNE = "dune"
80
+ FABRIC = "fabric"
80
81
  HIVE = "hive"
81
82
  MATERIALIZE = "materialize"
82
83
  MYSQL = "mysql"
@@ -95,6 +96,7 @@ class Dialects(str, Enum):
95
96
  TERADATA = "teradata"
96
97
  TRINO = "trino"
97
98
  TSQL = "tsql"
99
+ EXASOL = "exasol"
98
100
 
99
101
 
100
102
  class NormalizationStrategy(str, AutoName):
@@ -699,6 +701,9 @@ class Dialect(metaclass=_Dialect):
699
701
  exp.TimeAdd,
700
702
  exp.TimeSub,
701
703
  },
704
+ exp.DataType.Type.TIMESTAMPTZ: {
705
+ exp.CurrentTimestampLTZ,
706
+ },
702
707
  exp.DataType.Type.TIMESTAMP: {
703
708
  exp.CurrentTimestamp,
704
709
  exp.StrToTime,
@@ -1905,14 +1910,23 @@ def groupconcat_sql(
1905
1910
 
1906
1911
 
1907
1912
  def build_timetostr_or_tochar(args: t.List, dialect: Dialect) -> exp.TimeToStr | exp.ToChar:
1908
- this = seq_get(args, 0)
1913
+ if len(args) == 2:
1914
+ this = args[0]
1915
+ if not this.type:
1916
+ from sqlglot.optimizer.annotate_types import annotate_types
1909
1917
 
1910
- if this and not this.type:
1911
- from sqlglot.optimizer.annotate_types import annotate_types
1918
+ annotate_types(this, dialect=dialect)
1912
1919
 
1913
- annotate_types(this, dialect=dialect)
1914
1920
  if this.is_type(*exp.DataType.TEMPORAL_TYPES):
1915
1921
  dialect_name = dialect.__class__.__name__.lower()
1916
1922
  return build_formatted_time(exp.TimeToStr, dialect_name, default=True)(args)
1917
1923
 
1918
1924
  return exp.ToChar.from_arg_list(args)
1925
+
1926
+
1927
+ def build_replace_with_optional_replacement(args: t.List) -> exp.Replace:
1928
+ return exp.Replace(
1929
+ this=seq_get(args, 0),
1930
+ expression=seq_get(args, 1),
1931
+ replacement=seq_get(args, 2) or exp.Literal.string(""),
1932
+ )
@@ -508,6 +508,7 @@ class DuckDB(Dialect):
508
508
  parse_bracket: bool = False,
509
509
  is_db_reference: bool = False,
510
510
  parse_partition: bool = False,
511
+ consume_pipe: bool = False,
511
512
  ) -> t.Optional[exp.Expression]:
512
513
  # DuckDB supports prefix aliases, e.g. FROM foo: bar
513
514
  if self._next and self._next.token_type == TokenType.COLON:
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+ from sqlglot import exp, generator
3
+ from sqlglot.dialects.dialect import Dialect, rename_func
4
+
5
+
6
+ class Exasol(Dialect):
7
+ class Generator(generator.Generator):
8
+ # https://docs.exasol.com/db/latest/sql_references/data_types/datatypedetails.htm#StringDataType
9
+ STRING_TYPE_MAPPING = {
10
+ exp.DataType.Type.BLOB: "VARCHAR",
11
+ exp.DataType.Type.LONGBLOB: "VARCHAR",
12
+ exp.DataType.Type.LONGTEXT: "VARCHAR",
13
+ exp.DataType.Type.MEDIUMBLOB: "VARCHAR",
14
+ exp.DataType.Type.MEDIUMTEXT: "VARCHAR",
15
+ exp.DataType.Type.TINYBLOB: "VARCHAR",
16
+ exp.DataType.Type.TINYTEXT: "VARCHAR",
17
+ exp.DataType.Type.TEXT: "VARCHAR",
18
+ exp.DataType.Type.VARBINARY: "VARCHAR",
19
+ }
20
+
21
+ # https://docs.exasol.com/db/latest/sql_references/data_types/datatypealiases.htm
22
+ TYPE_MAPPING = {
23
+ **generator.Generator.TYPE_MAPPING,
24
+ **STRING_TYPE_MAPPING,
25
+ exp.DataType.Type.TINYINT: "SMALLINT",
26
+ exp.DataType.Type.MEDIUMINT: "INT",
27
+ exp.DataType.Type.DECIMAL32: "DECIMAL",
28
+ exp.DataType.Type.DECIMAL64: "DECIMAL",
29
+ exp.DataType.Type.DECIMAL128: "DECIMAL",
30
+ exp.DataType.Type.DECIMAL256: "DECIMAL",
31
+ exp.DataType.Type.DATETIME: "TIMESTAMP",
32
+ }
33
+
34
+ def datatype_sql(self, expression: exp.DataType) -> str:
35
+ # Exasol supports a fixed default precision of 3 for TIMESTAMP WITH LOCAL TIME ZONE
36
+ # and does not allow specifying a different custom precision
37
+ if expression.is_type(exp.DataType.Type.TIMESTAMPLTZ):
38
+ return "TIMESTAMP WITH LOCAL TIME ZONE"
39
+
40
+ return super().datatype_sql(expression)
41
+
42
+ TRANSFORMS = {
43
+ **generator.Generator.TRANSFORMS,
44
+ # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/mod.htm
45
+ exp.Mod: rename_func("MOD"),
46
+ }
@@ -0,0 +1,115 @@
1
+ from __future__ import annotations
2
+
3
+ from sqlglot import exp
4
+ from sqlglot.dialects.dialect import NormalizationStrategy
5
+ from sqlglot.dialects.tsql import TSQL
6
+ from sqlglot.tokens import TokenType
7
+
8
+
9
+ class Fabric(TSQL):
10
+ """
11
+ Microsoft Fabric Data Warehouse dialect that inherits from T-SQL.
12
+
13
+ Microsoft Fabric is a cloud-based analytics platform that provides a unified
14
+ data warehouse experience. While it shares much of T-SQL's syntax, it has
15
+ specific differences and limitations that this dialect addresses.
16
+
17
+ Key differences from T-SQL:
18
+ - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
19
+ - Limited data type support with mappings to supported alternatives
20
+ - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
21
+ - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
22
+ - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
23
+
24
+ References:
25
+ - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
26
+ - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
27
+ """
28
+
29
+ # Fabric is case-sensitive unlike T-SQL which is case-insensitive
30
+ NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE
31
+
32
+ class Tokenizer(TSQL.Tokenizer):
33
+ # Override T-SQL tokenizer to handle TIMESTAMP differently
34
+ # In T-SQL, TIMESTAMP is a synonym for ROWVERSION, but in Fabric we want it to be a datetime type
35
+ # Also add UTINYINT keyword mapping since T-SQL doesn't have it
36
+ KEYWORDS = {
37
+ **TSQL.Tokenizer.KEYWORDS,
38
+ "TIMESTAMP": TokenType.TIMESTAMP,
39
+ "UTINYINT": TokenType.UTINYINT,
40
+ }
41
+
42
+ class Generator(TSQL.Generator):
43
+ # Fabric-specific type mappings - override T-SQL types that aren't supported
44
+ # Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
45
+ TYPE_MAPPING = {
46
+ **TSQL.Generator.TYPE_MAPPING,
47
+ exp.DataType.Type.DATETIME: "DATETIME2",
48
+ exp.DataType.Type.DECIMAL: "DECIMAL",
49
+ exp.DataType.Type.IMAGE: "VARBINARY",
50
+ exp.DataType.Type.INT: "INT",
51
+ exp.DataType.Type.JSON: "VARCHAR",
52
+ exp.DataType.Type.MONEY: "DECIMAL",
53
+ exp.DataType.Type.NCHAR: "CHAR",
54
+ exp.DataType.Type.NVARCHAR: "VARCHAR",
55
+ exp.DataType.Type.ROWVERSION: "ROWVERSION",
56
+ exp.DataType.Type.SMALLDATETIME: "DATETIME2",
57
+ exp.DataType.Type.SMALLMONEY: "DECIMAL",
58
+ exp.DataType.Type.TIMESTAMP: "DATETIME2",
59
+ exp.DataType.Type.TIMESTAMPNTZ: "DATETIME2",
60
+ exp.DataType.Type.TIMESTAMPTZ: "DATETIMEOFFSET",
61
+ exp.DataType.Type.TINYINT: "SMALLINT",
62
+ exp.DataType.Type.UTINYINT: "SMALLINT",
63
+ exp.DataType.Type.UUID: "VARBINARY(MAX)",
64
+ exp.DataType.Type.XML: "VARCHAR",
65
+ }
66
+
67
+ def datatype_sql(self, expression: exp.DataType) -> str:
68
+ # Check if this is a temporal type that needs precision handling. Fabric limits temporal
69
+ # types to max 6 digits precision. When no precision is specified, we default to 6 digits.
70
+ if (
71
+ expression.is_type(*exp.DataType.TEMPORAL_TYPES)
72
+ and expression.this != exp.DataType.Type.DATE
73
+ ):
74
+ # Get the current precision (first expression if it exists)
75
+ precision_param = expression.find(exp.DataTypeParam)
76
+ target_precision = 6
77
+
78
+ if precision_param and precision_param.this.is_int:
79
+ # Cap precision at 6
80
+ current_precision = precision_param.this.to_py()
81
+ target_precision = min(current_precision, 6)
82
+ else:
83
+ # If precision exists but is not an integer, default to 6
84
+ target_precision = 6
85
+
86
+ # Create a new expression with the target precision
87
+ expression = exp.DataType(
88
+ this=expression.this,
89
+ expressions=[exp.DataTypeParam(this=exp.Literal.number(target_precision))],
90
+ )
91
+
92
+ return super().datatype_sql(expression)
93
+
94
+ def unixtotime_sql(self, expression: exp.UnixToTime) -> str:
95
+ scale = expression.args.get("scale")
96
+ timestamp = expression.this
97
+
98
+ if scale not in (None, exp.UnixToTime.SECONDS):
99
+ self.unsupported(f"UnixToTime scale {scale} is not supported by Fabric")
100
+ return ""
101
+
102
+ # Convert unix timestamp (seconds) to microseconds and round to avoid decimals
103
+ microseconds = timestamp * exp.Literal.number("1e6")
104
+ rounded = exp.func("round", microseconds, 0)
105
+ rounded_ms_as_bigint = exp.cast(rounded, exp.DataType.Type.BIGINT)
106
+
107
+ # Create the base datetime as '1970-01-01' cast to DATETIME2(6)
108
+ epoch_start = exp.cast("'1970-01-01'", "datetime2(6)", dialect="fabric")
109
+
110
+ dateadd = exp.DateAdd(
111
+ this=epoch_start,
112
+ expression=rounded_ms_as_bigint,
113
+ unit=exp.Literal.string("MICROSECONDS"),
114
+ )
115
+ return self.sql(dateadd)
sqlglot/dialects/hive.py CHANGED
@@ -305,6 +305,7 @@ class Hive(Dialect):
305
305
  LOG_DEFAULTS_TO_LN = True
306
306
  STRICT_CAST = False
307
307
  VALUES_FOLLOWED_BY_PAREN = False
308
+ JOINS_HAVE_EQUAL_PRECEDENCE = True
308
309
 
309
310
  FUNCTIONS = {
310
311
  **parser.Parser.FUNCTIONS,
@@ -128,6 +128,7 @@ class Oracle(Dialect):
128
128
  "NEXT": lambda self: self._parse_next_value_for(),
129
129
  "PRIOR": lambda self: self.expression(exp.Prior, this=self._parse_bitwise()),
130
130
  "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, sysdate=True),
131
+ "DBMS_RANDOM": lambda self: self._parse_dbms_random(),
131
132
  }
132
133
 
133
134
  FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
@@ -177,6 +178,19 @@ class Oracle(Dialect):
177
178
  ),
178
179
  }
179
180
 
181
+ def _parse_dbms_random(self) -> t.Optional[exp.Expression]:
182
+ if self._match_text_seq(".", "VALUE"):
183
+ lower, upper = None, None
184
+ if self._match(TokenType.L_PAREN, advance=False):
185
+ lower_upper = self._parse_wrapped_csv(self._parse_bitwise)
186
+ if len(lower_upper) == 2:
187
+ lower, upper = lower_upper
188
+
189
+ return exp.Rand(lower=lower, upper=upper)
190
+
191
+ self._retreat(self._index - 1)
192
+ return None
193
+
180
194
  def _parse_json_array(self, expr_type: t.Type[E], **kwargs) -> E:
181
195
  return self.expression(
182
196
  expr_type,
@@ -299,6 +313,7 @@ class Oracle(Dialect):
299
313
  exp.LogicalOr: rename_func("MAX"),
300
314
  exp.LogicalAnd: rename_func("MIN"),
301
315
  exp.Mod: rename_func("MOD"),
316
+ exp.Rand: rename_func("DBMS_RANDOM.VALUE"),
302
317
  exp.Select: transforms.preprocess(
303
318
  [
304
319
  transforms.eliminate_distinct_on,
@@ -8,6 +8,7 @@ from sqlglot.dialects.dialect import (
8
8
  NormalizationStrategy,
9
9
  binary_from_function,
10
10
  bool_xor_sql,
11
+ build_replace_with_optional_replacement,
11
12
  date_trunc_to_time,
12
13
  datestrtodate_sql,
13
14
  encode_decode_sql,
@@ -315,6 +316,7 @@ class Presto(Dialect):
315
316
 
316
317
  class Parser(parser.Parser):
317
318
  VALUES_FOLLOWED_BY_PAREN = False
319
+ ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True
318
320
 
319
321
  FUNCTIONS = {
320
322
  **parser.Parser.FUNCTIONS,
@@ -359,6 +361,7 @@ class Presto(Dialect):
359
361
  expression=seq_get(args, 1),
360
362
  replacement=seq_get(args, 2) or exp.Literal.string(""),
361
363
  ),
364
+ "REPLACE": build_replace_with_optional_replacement,
362
365
  "ROW": exp.Struct.from_arg_list,
363
366
  "SEQUENCE": exp.GenerateSeries.from_arg_list,
364
367
  "SET_AGG": exp.ArrayUniqueAgg.from_arg_list,
sqlglot/dialects/prql.py CHANGED
@@ -189,11 +189,15 @@ class PRQL(Dialect):
189
189
  parse_bracket: bool = False,
190
190
  is_db_reference: bool = False,
191
191
  parse_partition: bool = False,
192
+ consume_pipe: bool = False,
192
193
  ) -> t.Optional[exp.Expression]:
193
194
  return self._parse_table_parts()
194
195
 
195
196
  def _parse_from(
196
- self, joins: bool = False, skip_from_token: bool = False
197
+ self,
198
+ joins: bool = False,
199
+ skip_from_token: bool = False,
200
+ consume_pipe: bool = False,
197
201
  ) -> t.Optional[exp.From]:
198
202
  if not skip_from_token and not self._match(TokenType.FROM):
199
203
  return None
@@ -90,6 +90,7 @@ class Redshift(Postgres):
90
90
  parse_bracket: bool = False,
91
91
  is_db_reference: bool = False,
92
92
  parse_partition: bool = False,
93
+ consume_pipe: bool = False,
93
94
  ) -> t.Optional[exp.Expression]:
94
95
  # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr`
95
96
  unpivot = self._match(TokenType.UNPIVOT)
@@ -212,8 +213,7 @@ class Redshift(Postgres):
212
213
  exp.TableSample: no_tablesample_sql,
213
214
  exp.TsOrDsAdd: date_delta_sql("DATEADD"),
214
215
  exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
215
- exp.UnixToTime: lambda self,
216
- e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')",
216
+ exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e),
217
217
  }
218
218
 
219
219
  # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots
@@ -446,3 +446,12 @@ class Redshift(Postgres):
446
446
  def explode_sql(self, expression: exp.Explode) -> str:
447
447
  self.unsupported("Unsupported EXPLODE() function")
448
448
  return ""
449
+
450
+ def _unix_to_time_sql(self, expression: exp.UnixToTime) -> str:
451
+ scale = expression.args.get("scale")
452
+ this = self.sql(expression.this)
453
+
454
+ if scale is not None and scale != exp.UnixToTime.SECONDS and scale.is_int:
455
+ this = f"({this} / POWER(10, {scale.to_py()}))"
456
+
457
+ return f"(TIMESTAMP 'epoch' + {this} * INTERVAL '1 SECOND')"
@@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
9
9
  build_timetostr_or_tochar,
10
10
  binary_from_function,
11
11
  build_default_decimal_type,
12
+ build_replace_with_optional_replacement,
12
13
  build_timestamp_from_parts,
13
14
  date_delta_sql,
14
15
  date_trunc_to_time,
@@ -484,6 +485,7 @@ class Snowflake(Dialect):
484
485
  "REGEXP_REPLACE": _build_regexp_replace,
485
486
  "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract),
486
487
  "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll),
488
+ "REPLACE": build_replace_with_optional_replacement,
487
489
  "RLIKE": exp.RegexpLike.from_arg_list,
488
490
  "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)),
489
491
  "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)),
@@ -799,6 +801,7 @@ class Snowflake(Dialect):
799
801
  parse_bracket: bool = False,
800
802
  is_db_reference: bool = False,
801
803
  parse_partition: bool = False,
804
+ consume_pipe: bool = False,
802
805
  ) -> t.Optional[exp.Expression]:
803
806
  table = super()._parse_table(
804
807
  schema=schema,
@@ -1415,7 +1418,7 @@ class Snowflake(Dialect):
1415
1418
 
1416
1419
  def timetostr_sql(self, expression: exp.TimeToStr) -> str:
1417
1420
  this = expression.this
1418
- if not isinstance(this, exp.TsOrDsToTimestamp):
1421
+ if this.is_string:
1419
1422
  this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
1420
1423
 
1421
1424
  return self.func("TO_CHAR", this, self.format_time(expression))
sqlglot/dialects/spark.py CHANGED
@@ -7,6 +7,7 @@ from sqlglot.dialects.dialect import rename_func, unit_to_var, timestampdiff_sql
7
7
  from sqlglot.dialects.hive import _build_with_ignore_nulls
8
8
  from sqlglot.dialects.spark2 import Spark2, temporary_storage_provider, _build_as_cast
9
9
  from sqlglot.helper import ensure_list, seq_get
10
+ from sqlglot.tokens import TokenType
10
11
  from sqlglot.transforms import (
11
12
  ctas_with_tmp_tables_to_create_tmp_view,
12
13
  remove_unique_constraints,
@@ -121,6 +122,16 @@ class Spark(Spark2):
121
122
  ),
122
123
  }
123
124
 
125
+ PLACEHOLDER_PARSERS = {
126
+ **Spark2.Parser.PLACEHOLDER_PARSERS,
127
+ TokenType.L_BRACE: lambda self: self._parse_query_parameter(),
128
+ }
129
+
130
+ def _parse_query_parameter(self) -> t.Optional[exp.Expression]:
131
+ this = self._parse_id_var()
132
+ self._match(TokenType.R_BRACE)
133
+ return self.expression(exp.Placeholder, this=this, widget=True)
134
+
124
135
  def _parse_generated_as_identity(
125
136
  self,
126
137
  ) -> (
@@ -200,3 +211,9 @@ class Spark(Spark2):
200
211
  return self.func("DATEDIFF", unit_to_var(expression), start, end)
201
212
 
202
213
  return self.func("DATEDIFF", end, start)
214
+
215
+ def placeholder_sql(self, expression: exp.Placeholder) -> str:
216
+ if not expression.args.get("widget"):
217
+ return super().placeholder_sql(expression)
218
+
219
+ return f"{{{expression.name}}}"
@@ -102,6 +102,10 @@ class SQLite(Dialect):
102
102
  COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.REPLACE}
103
103
 
104
104
  class Parser(parser.Parser):
105
+ STRING_ALIASES = True
106
+ ALTER_RENAME_REQUIRES_COLUMN = False
107
+ JOINS_HAVE_EQUAL_PRECEDENCE = True
108
+
105
109
  FUNCTIONS = {
106
110
  **parser.Parser.FUNCTIONS,
107
111
  "EDITDIST3": exp.Levenshtein.from_arg_list,
@@ -110,9 +114,6 @@ class SQLite(Dialect):
110
114
  "TIME": lambda args: exp.Anonymous(this="TIME", expressions=args),
111
115
  }
112
116
 
113
- STRING_ALIASES = True
114
- ALTER_RENAME_REQUIRES_COLUMN = False
115
-
116
117
  def _parse_unique(self) -> exp.UniqueColumnConstraint:
117
118
  # Do not consume more tokens if UNIQUE is used as a standalone constraint, e.g:
118
119
  # CREATE TABLE foo (bar TEXT UNIQUE REFERENCES baz ...)
sqlglot/dialects/tsql.py CHANGED
@@ -612,6 +612,7 @@ class TSQL(Dialect):
612
612
  "SYSDATETIME": exp.CurrentTimestamp.from_arg_list,
613
613
  "SUSER_NAME": exp.CurrentUser.from_arg_list,
614
614
  "SUSER_SNAME": exp.CurrentUser.from_arg_list,
615
+ "SYSDATETIMEOFFSET": exp.CurrentTimestampLTZ.from_arg_list,
615
616
  "SYSTEM_USER": exp.CurrentUser.from_arg_list,
616
617
  "TIMEFROMPARTS": _build_timefromparts,
617
618
  "DATETRUNC": _build_datetrunc,
@@ -1020,6 +1021,7 @@ class TSQL(Dialect):
1020
1021
  exp.CTE: transforms.preprocess([qualify_derived_table_outputs]),
1021
1022
  exp.CurrentDate: rename_func("GETDATE"),
1022
1023
  exp.CurrentTimestamp: rename_func("GETDATE"),
1024
+ exp.CurrentTimestampLTZ: rename_func("SYSDATETIMEOFFSET"),
1023
1025
  exp.DateStrToDate: datestrtodate_sql,
1024
1026
  exp.Extract: rename_func("DATEPART"),
1025
1027
  exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql,
@@ -1249,15 +1251,15 @@ class TSQL(Dialect):
1249
1251
  sql_with_ctes = self.prepend_ctes(expression, sql)
1250
1252
  sql_literal = self.sql(exp.Literal.string(sql_with_ctes))
1251
1253
  if kind == "SCHEMA":
1252
- return f"""IF NOT EXISTS (SELECT * FROM information_schema.schemata WHERE schema_name = {identifier}) EXEC({sql_literal})"""
1254
+ return f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = {identifier}) EXEC({sql_literal})"""
1253
1255
  elif kind == "TABLE":
1254
1256
  assert table
1255
1257
  where = exp.and_(
1256
- exp.column("table_name").eq(table.name),
1257
- exp.column("table_schema").eq(table.db) if table.db else None,
1258
- exp.column("table_catalog").eq(table.catalog) if table.catalog else None,
1258
+ exp.column("TABLE_NAME").eq(table.name),
1259
+ exp.column("TABLE_SCHEMA").eq(table.db) if table.db else None,
1260
+ exp.column("TABLE_CATALOG").eq(table.catalog) if table.catalog else None,
1259
1261
  )
1260
- return f"""IF NOT EXISTS (SELECT * FROM information_schema.tables WHERE {where}) EXEC({sql_literal})"""
1262
+ return f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE {where}) EXEC({sql_literal})"""
1261
1263
  elif kind == "INDEX":
1262
1264
  index = self.sql(exp.Literal.string(expression.this.text("this")))
1263
1265
  return f"""IF NOT EXISTS (SELECT * FROM sys.indexes WHERE object_id = object_id({identifier}) AND name = {index}) EXEC({sql_literal})"""
sqlglot/expressions.py CHANGED
@@ -4453,8 +4453,9 @@ class SessionParameter(Condition):
4453
4453
  arg_types = {"this": True, "kind": False}
4454
4454
 
4455
4455
 
4456
+ # https://www.databricks.com/blog/parameterized-queries-pyspark
4456
4457
  class Placeholder(Condition):
4457
- arg_types = {"this": False, "kind": False}
4458
+ arg_types = {"this": False, "kind": False, "widget": False}
4458
4459
 
4459
4460
  @property
4460
4461
  def name(self) -> str:
@@ -5805,6 +5806,10 @@ class CurrentTimestamp(Func):
5805
5806
  arg_types = {"this": False, "sysdate": False}
5806
5807
 
5807
5808
 
5809
+ class CurrentTimestampLTZ(Func):
5810
+ arg_types = {}
5811
+
5812
+
5808
5813
  class CurrentSchema(Func):
5809
5814
  arg_types = {"this": False}
5810
5815
 
@@ -5845,8 +5850,6 @@ class DateTrunc(Func):
5845
5850
  unit_name = TimeUnit.UNABBREVIATED_UNIT_NAME[unit_name]
5846
5851
 
5847
5852
  args["unit"] = Literal.string(unit_name)
5848
- elif isinstance(unit, Week):
5849
- unit.set("this", Literal.string(unit.this.name.upper()))
5850
5853
 
5851
5854
  super().__init__(**args)
5852
5855
 
@@ -6668,6 +6671,11 @@ class Repeat(Func):
6668
6671
  arg_types = {"this": True, "times": True}
6669
6672
 
6670
6673
 
6674
+ # Some dialects like Snowflake support two argument replace
6675
+ class Replace(Func):
6676
+ arg_types = {"this": True, "expression": True, "replacement": False}
6677
+
6678
+
6671
6679
  # https://learn.microsoft.com/en-us/sql/t-sql/functions/round-transact-sql?view=sql-server-ver16
6672
6680
  # tsql third argument function == trunctaion if not 0
6673
6681
  class Round(Func):
sqlglot/generator.py CHANGED
@@ -3480,7 +3480,7 @@ class Generator(metaclass=_Generator):
3480
3480
 
3481
3481
  actions_list.append(action_sql)
3482
3482
 
3483
- actions_sql = self.format_args(*actions_list)
3483
+ actions_sql = self.format_args(*actions_list).lstrip("\n")
3484
3484
 
3485
3485
  exists = " IF EXISTS" if expression.args.get("exists") else ""
3486
3486
  on_cluster = self.sql(expression, "cluster")
@@ -3491,7 +3491,7 @@ class Generator(metaclass=_Generator):
3491
3491
  kind = self.sql(expression, "kind")
3492
3492
  not_valid = " NOT VALID" if expression.args.get("not_valid") else ""
3493
3493
 
3494
- return f"ALTER {kind}{exists}{only} {self.sql(expression, 'this')}{on_cluster} {actions_sql}{not_valid}{options}"
3494
+ return f"ALTER {kind}{exists}{only} {self.sql(expression, 'this')}{on_cluster}{self.sep()}{actions_sql}{not_valid}{options}"
3495
3495
 
3496
3496
  def add_column_sql(self, expression: exp.Expression) -> str:
3497
3497
  sql = self.sql(expression)
@@ -3510,7 +3510,7 @@ class Generator(metaclass=_Generator):
3510
3510
  return f"DROP{exists}{expressions}"
3511
3511
 
3512
3512
  def addconstraint_sql(self, expression: exp.AddConstraint) -> str:
3513
- return f"ADD {self.expressions(expression)}"
3513
+ return f"ADD {self.expressions(expression, indent=False)}"
3514
3514
 
3515
3515
  def addpartition_sql(self, expression: exp.AddPartition) -> str:
3516
3516
  exists = "IF NOT EXISTS " if expression.args.get("exists") else ""
@@ -358,7 +358,7 @@ class Scope:
358
358
  for expression in itertools.chain(self.derived_tables, self.udtfs):
359
359
  self._references.append(
360
360
  (
361
- expression.alias,
361
+ _get_source_alias(expression),
362
362
  expression if expression.args.get("pivots") else expression.unnest(),
363
363
  )
364
364
  )
@@ -785,7 +785,7 @@ def _traverse_tables(scope):
785
785
  # This shouldn't be a problem once qualify_columns runs, as it adds aliases on everything.
786
786
  # Until then, this means that only a single, unaliased derived table is allowed (rather,
787
787
  # the latest one wins.
788
- sources[expression.alias] = child_scope
788
+ sources[_get_source_alias(expression)] = child_scope
789
789
 
790
790
  # append the final child_scope yielded
791
791
  if child_scope:
@@ -825,7 +825,7 @@ def _traverse_udtfs(scope):
825
825
  ):
826
826
  yield child_scope
827
827
  top = child_scope
828
- sources[expression.alias] = child_scope
828
+ sources[_get_source_alias(expression)] = child_scope
829
829
 
830
830
  scope.subquery_scopes.append(top)
831
831
 
@@ -915,3 +915,13 @@ def find_in_scope(expression, expression_types, bfs=True):
915
915
  the criteria was found.
916
916
  """
917
917
  return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)
918
+
919
+
920
+ def _get_source_alias(expression):
921
+ alias_arg = expression.args.get("alias")
922
+ alias_name = expression.alias
923
+
924
+ if not alias_name and isinstance(alias_arg, exp.TableAlias) and len(alias_arg.columns) == 1:
925
+ alias_name = alias_arg.columns[0].name
926
+
927
+ return alias_name
sqlglot/parser.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ import re
4
5
  import typing as t
5
6
  import itertools
6
7
  from collections import defaultdict
@@ -23,6 +24,9 @@ logger = logging.getLogger("sqlglot")
23
24
 
24
25
  OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]]
25
26
 
27
+ # Used to detect alphabetical characters and +/- in timestamp literals
28
+ TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]")
29
+
26
30
 
27
31
  def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap:
28
32
  if len(args) == 1 and args[0].is_star:
@@ -935,7 +939,6 @@ class Parser(metaclass=_Parser):
935
939
  "AS": lambda self, query: self._build_pipe_cte(
936
940
  query, [exp.Star()], self._parse_table_alias()
937
941
  ),
938
- "DROP": lambda self, query: self._parse_pipe_syntax_drop(query),
939
942
  "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query),
940
943
  "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query),
941
944
  "ORDER BY": lambda self, query: query.order_by(
@@ -943,7 +946,6 @@ class Parser(metaclass=_Parser):
943
946
  ),
944
947
  "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query),
945
948
  "SELECT": lambda self, query: self._parse_pipe_syntax_select(query),
946
- "SET": lambda self, query: self._parse_pipe_syntax_set(query),
947
949
  "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query),
948
950
  "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query),
949
951
  "WHERE": lambda self, query: query.where(self._parse_where(), copy=False),
@@ -1518,6 +1520,15 @@ class Parser(metaclass=_Parser):
1518
1520
  # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword
1519
1521
  ALTER_RENAME_REQUIRES_COLUMN = True
1520
1522
 
1523
+ # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree.
1524
+ # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is
1525
+ # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such
1526
+ # as BigQuery, where all joins have the same precedence.
1527
+ JOINS_HAVE_EQUAL_PRECEDENCE = False
1528
+
1529
+ # Whether TIMESTAMP <literal> can produce a zone-aware timestamp
1530
+ ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False
1531
+
1521
1532
  __slots__ = (
1522
1533
  "error_level",
1523
1534
  "error_message_context",
@@ -3142,7 +3153,7 @@ class Parser(metaclass=_Parser):
3142
3153
  is_unpivot=self._prev.token_type == TokenType.UNPIVOT
3143
3154
  )
3144
3155
  elif self._match(TokenType.FROM):
3145
- from_ = self._parse_from(skip_from_token=True)
3156
+ from_ = self._parse_from(skip_from_token=True, consume_pipe=True)
3146
3157
  # Support parentheses for duckdb FROM-first syntax
3147
3158
  select = self._parse_select()
3148
3159
  if select:
@@ -3152,7 +3163,7 @@ class Parser(metaclass=_Parser):
3152
3163
  this = exp.select("*").from_(t.cast(exp.From, from_))
3153
3164
  else:
3154
3165
  this = (
3155
- self._parse_table()
3166
+ self._parse_table(consume_pipe=True)
3156
3167
  if table
3157
3168
  else self._parse_select(nested=True, parse_set_operation=False)
3158
3169
  )
@@ -3173,6 +3184,31 @@ class Parser(metaclass=_Parser):
3173
3184
  table: bool = False,
3174
3185
  parse_subquery_alias: bool = True,
3175
3186
  parse_set_operation: bool = True,
3187
+ consume_pipe: bool = True,
3188
+ ) -> t.Optional[exp.Expression]:
3189
+ query = self._parse_select_query(
3190
+ nested=nested,
3191
+ table=table,
3192
+ parse_subquery_alias=parse_subquery_alias,
3193
+ parse_set_operation=parse_set_operation,
3194
+ )
3195
+
3196
+ if (
3197
+ consume_pipe
3198
+ and self._match(TokenType.PIPE_GT, advance=False)
3199
+ and isinstance(query, exp.Query)
3200
+ ):
3201
+ query = self._parse_pipe_syntax_query(query)
3202
+ query = query.subquery(copy=False) if query and table else query
3203
+
3204
+ return query
3205
+
3206
+ def _parse_select_query(
3207
+ self,
3208
+ nested: bool = False,
3209
+ table: bool = False,
3210
+ parse_subquery_alias: bool = True,
3211
+ parse_set_operation: bool = True,
3176
3212
  ) -> t.Optional[exp.Expression]:
3177
3213
  cte = self._parse_with()
3178
3214
 
@@ -3192,7 +3228,11 @@ class Parser(metaclass=_Parser):
3192
3228
  return this
3193
3229
 
3194
3230
  # duckdb supports leading with FROM x
3195
- from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None
3231
+ from_ = (
3232
+ self._parse_from(consume_pipe=True)
3233
+ if self._match(TokenType.FROM, advance=False)
3234
+ else None
3235
+ )
3196
3236
 
3197
3237
  if self._match(TokenType.SELECT):
3198
3238
  comments = self._prev_comments
@@ -3260,8 +3300,6 @@ class Parser(metaclass=_Parser):
3260
3300
  this = self._parse_derived_table_values()
3261
3301
  elif from_:
3262
3302
  this = exp.select("*").from_(from_.this, copy=False)
3263
- if self._match(TokenType.PIPE_GT, advance=False):
3264
- return self._parse_pipe_syntax_query(this)
3265
3303
  elif self._match(TokenType.SUMMARIZE):
3266
3304
  table = self._match(TokenType.TABLE)
3267
3305
  this = self._parse_select() or self._parse_string() or self._parse_table()
@@ -3521,13 +3559,18 @@ class Parser(metaclass=_Parser):
3521
3559
  )
3522
3560
 
3523
3561
  def _parse_from(
3524
- self, joins: bool = False, skip_from_token: bool = False
3562
+ self,
3563
+ joins: bool = False,
3564
+ skip_from_token: bool = False,
3565
+ consume_pipe: bool = False,
3525
3566
  ) -> t.Optional[exp.From]:
3526
3567
  if not skip_from_token and not self._match(TokenType.FROM):
3527
3568
  return None
3528
3569
 
3529
3570
  return self.expression(
3530
- exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins)
3571
+ exp.From,
3572
+ comments=self._prev_comments,
3573
+ this=self._parse_table(joins=joins, consume_pipe=consume_pipe),
3531
3574
  )
3532
3575
 
3533
3576
  def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure:
@@ -3702,9 +3745,12 @@ class Parser(metaclass=_Parser):
3702
3745
  ) -> t.Optional[exp.Join]:
3703
3746
  if self._match(TokenType.COMMA):
3704
3747
  table = self._try_parse(self._parse_table)
3705
- if table:
3706
- return self.expression(exp.Join, this=table)
3707
- return None
3748
+ cross_join = self.expression(exp.Join, this=table) if table else None
3749
+
3750
+ if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE:
3751
+ cross_join.set("kind", "CROSS")
3752
+
3753
+ return cross_join
3708
3754
 
3709
3755
  index = self._index
3710
3756
  method, side, kind = self._parse_join_parts()
@@ -3953,6 +3999,7 @@ class Parser(metaclass=_Parser):
3953
3999
  parse_bracket: bool = False,
3954
4000
  is_db_reference: bool = False,
3955
4001
  parse_partition: bool = False,
4002
+ consume_pipe: bool = False,
3956
4003
  ) -> t.Optional[exp.Expression]:
3957
4004
  lateral = self._parse_lateral()
3958
4005
  if lateral:
@@ -3966,7 +4013,7 @@ class Parser(metaclass=_Parser):
3966
4013
  if values:
3967
4014
  return values
3968
4015
 
3969
- subquery = self._parse_select(table=True)
4016
+ subquery = self._parse_select(table=True, consume_pipe=consume_pipe)
3970
4017
  if subquery:
3971
4018
  if not subquery.args.get("pivots"):
3972
4019
  subquery.set("pivots", self._parse_pivots())
@@ -4708,7 +4755,9 @@ class Parser(metaclass=_Parser):
4708
4755
 
4709
4756
  return locks
4710
4757
 
4711
- def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
4758
+ def parse_set_operation(
4759
+ self, this: t.Optional[exp.Expression], consume_pipe: bool = False
4760
+ ) -> t.Optional[exp.Expression]:
4712
4761
  start = self._index
4713
4762
  _, side_token, kind_token = self._parse_join_parts()
4714
4763
 
@@ -4751,7 +4800,9 @@ class Parser(metaclass=_Parser):
4751
4800
  if by_name and self._match_texts(("ON", "BY")):
4752
4801
  on_column_list = self._parse_wrapped_csv(self._parse_column)
4753
4802
 
4754
- expression = self._parse_select(nested=True, parse_set_operation=False)
4803
+ expression = self._parse_select(
4804
+ nested=True, parse_set_operation=False, consume_pipe=consume_pipe
4805
+ )
4755
4806
 
4756
4807
  return self.expression(
4757
4808
  operation,
@@ -5082,12 +5133,20 @@ class Parser(metaclass=_Parser):
5082
5133
  this = self._parse_primary()
5083
5134
 
5084
5135
  if isinstance(this, exp.Literal):
5136
+ literal = this.name
5085
5137
  this = self._parse_column_ops(this)
5086
5138
 
5087
5139
  parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
5088
5140
  if parser:
5089
5141
  return parser(self, this, data_type)
5090
5142
 
5143
+ if (
5144
+ self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
5145
+ and data_type.is_type(exp.DataType.Type.TIMESTAMP)
5146
+ and TIME_ZONE_RE.search(literal)
5147
+ ):
5148
+ data_type = exp.DataType.build("TIMESTAMPTZ")
5149
+
5091
5150
  return self.expression(exp.Cast, this=this, to=data_type)
5092
5151
 
5093
5152
  # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0)
@@ -7303,8 +7362,9 @@ class Parser(metaclass=_Parser):
7303
7362
 
7304
7363
  return None
7305
7364
 
7306
- if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq(
7307
- "COLUMNS"
7365
+ if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and (
7366
+ not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
7367
+ or self._match_text_seq("COLUMNS")
7308
7368
  ):
7309
7369
  schema = self._parse_schema()
7310
7370
 
@@ -8361,34 +8421,14 @@ class Parser(metaclass=_Parser):
8361
8421
 
8362
8422
  return new_select.with_(new_cte, as_=query, copy=False)
8363
8423
 
8364
- def _build_pipe_ctes(
8365
- self,
8366
- query: exp.Select,
8367
- expressions: t.List[exp.Expression],
8368
- alias_cte: t.Optional[exp.TableAlias] = None,
8369
- ) -> exp.Select:
8370
- select = query.selects[0].assert_is(exp.Star)
8371
- if select.args.get("except") or select.args.get("replace"):
8372
- query = self._build_pipe_cte(
8373
- query=query.select(
8374
- *[expr for expr in expressions if not expr.is_star and expr.args.get("alias")],
8375
- copy=False,
8376
- ),
8377
- expressions=[
8378
- projection.args.get("alias", projection) for projection in expressions
8379
- ],
8380
- )
8381
- else:
8382
- query.select(*expressions, append=False, copy=False)
8383
-
8384
- return self._build_pipe_cte(query=query, expressions=[exp.Star()], alias_cte=alias_cte)
8385
-
8386
8424
  def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select:
8387
- select = self._parse_select()
8425
+ select = self._parse_select(consume_pipe=False)
8388
8426
  if not select:
8389
8427
  return query
8390
8428
 
8391
- return self._build_pipe_ctes(query=query, expressions=select.expressions)
8429
+ return self._build_pipe_cte(
8430
+ query=query.select(*select.expressions, append=False), expressions=[exp.Star()]
8431
+ )
8392
8432
 
8393
8433
  def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select:
8394
8434
  limit = self._parse_limit()
@@ -8437,7 +8477,7 @@ class Parser(metaclass=_Parser):
8437
8477
  copy=False,
8438
8478
  )
8439
8479
  else:
8440
- query.select(*aggregates_or_groups, copy=False)
8480
+ query.select(*aggregates_or_groups, append=False, copy=False)
8441
8481
 
8442
8482
  if orders:
8443
8483
  return query.order_by(*orders, append=False, copy=False)
@@ -8453,11 +8493,9 @@ class Parser(metaclass=_Parser):
8453
8493
  ):
8454
8494
  query = self._parse_pipe_syntax_aggregate_group_order_by(query)
8455
8495
 
8456
- return self._build_pipe_ctes(
8457
- query=query, expressions=[expr for expr in query.selects if not expr.is_star]
8458
- )
8496
+ return self._build_pipe_cte(query=query, expressions=[exp.Star()])
8459
8497
 
8460
- def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Select]:
8498
+ def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]:
8461
8499
  first_setop = self.parse_set_operation(this=query)
8462
8500
  if not first_setop:
8463
8501
  return None
@@ -8488,12 +8526,15 @@ class Parser(metaclass=_Parser):
8488
8526
 
8489
8527
  return self._build_pipe_cte(query=query, expressions=[exp.Star()])
8490
8528
 
8491
- def _parse_pipe_syntax_join(self, query: exp.Select) -> t.Optional[exp.Select]:
8529
+ def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]:
8492
8530
  join = self._parse_join()
8493
8531
  if not join:
8494
8532
  return None
8495
8533
 
8496
- return query.join(join, copy=False)
8534
+ if isinstance(query, exp.Select):
8535
+ return query.join(join, copy=False)
8536
+
8537
+ return query
8497
8538
 
8498
8539
  def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select:
8499
8540
  pivots = self._parse_pivots()
@@ -8504,37 +8545,12 @@ class Parser(metaclass=_Parser):
8504
8545
  if from_:
8505
8546
  from_.this.set("pivots", pivots)
8506
8547
 
8507
- return self._build_pipe_ctes(query=query, expressions=[exp.Star()])
8548
+ return self._build_pipe_cte(query=query, expressions=[exp.Star()])
8508
8549
 
8509
8550
  def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select:
8510
8551
  self._match_text_seq("EXTEND")
8511
- return self._build_pipe_ctes(
8512
- query=query,
8513
- expressions=[query.selects[0].assert_is(exp.Star), *self._parse_expressions()],
8514
- )
8515
-
8516
- def _parse_pipe_syntax_drop(self, query: exp.Select) -> exp.Select:
8517
- self._match_text_seq("DROP")
8518
- dropped_columns = self._parse_csv(self._parse_assignment)
8519
-
8520
- select = query.selects[0].assert_is(exp.Star)
8521
- except_ = select.args.get("except") or []
8522
- select.set("except", [*except_, *dropped_columns])
8523
-
8524
- return query
8525
-
8526
- def _parse_pipe_syntax_set(self, query: exp.Select) -> exp.Select:
8527
- self._match_text_seq("SET")
8528
- replaced_columns = [
8529
- self.expression(exp.Alias, this=expr.expression, alias=expr.this)
8530
- for expr in self._parse_csv(self._parse_assignment)
8531
- ]
8532
-
8533
- select = query.selects[0].assert_is(exp.Star)
8534
- replace_ = select.args.get("replace") or []
8535
- select.set("replace", [*replace_, *replaced_columns])
8536
-
8537
- return query
8552
+ query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False)
8553
+ return self._build_pipe_cte(query=query, expressions=[exp.Star()])
8538
8554
 
8539
8555
  def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select:
8540
8556
  sample = self._parse_table_sample()
@@ -8547,7 +8563,13 @@ class Parser(metaclass=_Parser):
8547
8563
 
8548
8564
  return query
8549
8565
 
8550
- def _parse_pipe_syntax_query(self, query: exp.Select) -> t.Optional[exp.Select]:
8566
+ def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]:
8567
+ if isinstance(query, exp.Subquery):
8568
+ query = exp.select("*").from_(query, copy=False)
8569
+
8570
+ if not query.args.get("from"):
8571
+ query = exp.select("*").from_(query.subquery(copy=False), copy=False)
8572
+
8551
8573
  while self._match(TokenType.PIPE_GT):
8552
8574
  start = self._curr
8553
8575
  parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper())
sqlglot/transforms.py CHANGED
@@ -352,13 +352,20 @@ def unnest_to_explode(
352
352
  has_multi_expr = len(exprs) > 1
353
353
  this, *expressions = _unnest_zip_exprs(unnest, exprs, has_multi_expr)
354
354
 
355
+ columns = alias.columns if alias else []
356
+ offset = unnest.args.get("offset")
357
+ if offset:
358
+ columns.insert(
359
+ 0, offset if isinstance(offset, exp.Identifier) else exp.to_identifier("pos")
360
+ )
361
+
355
362
  unnest.replace(
356
363
  exp.Table(
357
364
  this=_udtf_type(unnest, has_multi_expr)(
358
365
  this=this,
359
366
  expressions=expressions,
360
367
  ),
361
- alias=exp.TableAlias(this=alias.this, columns=alias.columns) if alias else None,
368
+ alias=exp.TableAlias(this=alias.this, columns=columns) if alias else None,
362
369
  )
363
370
  )
364
371
 
@@ -393,6 +400,13 @@ def unnest_to_explode(
393
400
  "CROSS JOIN UNNEST to LATERAL VIEW EXPLODE transformation requires explicit column aliases"
394
401
  )
395
402
 
403
+ offset = unnest.args.get("offset")
404
+ if offset:
405
+ alias_cols.insert(
406
+ 0,
407
+ offset if isinstance(offset, exp.Identifier) else exp.to_identifier("pos"),
408
+ )
409
+
396
410
  for e, column in zip(exprs, alias_cols):
397
411
  expression.append(
398
412
  "laterals",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlglot
3
- Version: 26.29.0
3
+ Version: 26.31.0
4
4
  Summary: An easily customizable SQL parser and transpiler
5
5
  Author-email: Toby Mao <toby.mao@gmail.com>
6
6
  License: MIT License
@@ -61,7 +61,7 @@ Dynamic: provides-extra
61
61
 
62
62
  ![SQLGlot logo](sqlglot.png)
63
63
 
64
- SQLGlot is a no-dependency SQL parser, transpiler, optimizer, and engine. It can be used to format SQL or translate between [27 different dialects](https://github.com/tobymao/sqlglot/blob/main/sqlglot/dialects/__init__.py) like [DuckDB](https://duckdb.org/), [Presto](https://prestodb.io/) / [Trino](https://trino.io/), [Spark](https://spark.apache.org/) / [Databricks](https://www.databricks.com/), [Snowflake](https://www.snowflake.com/en/), and [BigQuery](https://cloud.google.com/bigquery/). It aims to read a wide variety of SQL inputs and output syntactically and semantically correct SQL in the targeted dialects.
64
+ SQLGlot is a no-dependency SQL parser, transpiler, optimizer, and engine. It can be used to format SQL or translate between [29 different dialects](https://github.com/tobymao/sqlglot/blob/main/sqlglot/dialects/__init__.py) like [DuckDB](https://duckdb.org/), [Presto](https://prestodb.io/) / [Trino](https://trino.io/), [Spark](https://spark.apache.org/) / [Databricks](https://www.databricks.com/), [Snowflake](https://www.snowflake.com/en/), and [BigQuery](https://cloud.google.com/bigquery/). It aims to read a wide variety of SQL inputs and output syntactically and semantically correct SQL in the targeted dialects.
65
65
 
66
66
  It is a very comprehensive generic SQL parser with a robust [test suite](https://github.com/tobymao/sqlglot/blob/main/tests/). It is also quite [performant](#benchmarks), while being written purely in Python.
67
67
 
@@ -1,52 +1,54 @@
1
1
  sqlglot/__init__.py,sha256=za08rtdPh2v7dOpGdNomttlIVGgTrKja7rPd6sQwaTg,5391
2
2
  sqlglot/__main__.py,sha256=022c173KqxsiABWTEpUIq_tJUxuNiW7a7ABsxBXqvu8,2069
3
3
  sqlglot/_typing.py,sha256=-1HPyr3w5COlSJWqlgt8jhFk2dyMvBuvVBqIX1wyVCM,642
4
- sqlglot/_version.py,sha256=gOnetX1YzVEd7bBCS3U4KJPt8DHhhNA_iqdIiN8DYk4,515
4
+ sqlglot/_version.py,sha256=X5X34o5ymsD4ydxIloUOjJGcZ-0Zi6rgP-736DtnUZ8,515
5
5
  sqlglot/diff.py,sha256=PtOllQMQa1Sw1-V2Y8eypmDqGujXYPaTOp_WLsWkAWk,17314
6
6
  sqlglot/errors.py,sha256=QNKMr-pzLUDR-tuMmn_GK6iMHUIVdb_YSJ_BhGEvuso,2126
7
- sqlglot/expressions.py,sha256=r3WkNufDInSqIoMasryY4W_XUV7DyIFU2G29jglFPqQ,243249
8
- sqlglot/generator.py,sha256=E1LjyN49nX9XfK-hysHWvpw7-qtws4xeb85sZi5x3M0,213345
7
+ sqlglot/expressions.py,sha256=rYPkorYfWlBzPxyaodGqIkW-x6RG1gSkVjBkOfkdZiI,243434
8
+ sqlglot/generator.py,sha256=Od0aBsKJph1wG_YhrknJAcAcVvuVIN823iyxA3KPi0Y,213383
9
9
  sqlglot/helper.py,sha256=9nZjFVRBtMKFC3EdzpDQ6jkazFO19po6BF8xHiNGZIo,15111
10
10
  sqlglot/jsonpath.py,sha256=dKdI3PNINNGimmSse2IIv-GbPN_3lXncXh_70QH7Lss,7664
11
11
  sqlglot/lineage.py,sha256=kXBDSErmZZluZx_kkrMj4MPEOAbkvcbX1tbOW7Bpl-U,15303
12
- sqlglot/parser.py,sha256=IXOPic_GfVXDaNRna9JbxmG-l2FjxZTIlV0wtWvWnqM,323926
12
+ sqlglot/parser.py,sha256=Mqm77jhuF0b3hyuFPgYtLAMPkuslF64Y8iHIOPw3ZWA,324610
13
13
  sqlglot/planner.py,sha256=ql7Li-bWJRcyXzNaZy_n6bQ6B2ZfunEIB8Ztv2xaxq4,14634
14
14
  sqlglot/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  sqlglot/schema.py,sha256=13H2qKQs27EKdTpDLOvcNnSTDAUbYNKjWtJs4aQCSOA,20509
16
16
  sqlglot/serde.py,sha256=DQVJ95WrIvhYfe02Ytb4NQug2aMwDCEwpMBW1LKDqzE,2031
17
17
  sqlglot/time.py,sha256=Q62gv6kL40OiRBF6BMESxKJcMVn7ZLNw7sv8H34z5FI,18400
18
18
  sqlglot/tokens.py,sha256=R0B8GQSbQ9GoDc0NlaT5Tc8RjgEOx2IYIkYU5rY8Rg8,48742
19
- sqlglot/transforms.py,sha256=3jpbHeVTLK9hmQi5f3_vmK-5jZB32_ittCkO7poxCs4,40631
19
+ sqlglot/transforms.py,sha256=s96QMtR7rJbcLAU1I_IF1xLNxno6yvEbhERgbS5xmJ4,41164
20
20
  sqlglot/trie.py,sha256=v27uXMrHfqrXlJ6GmeTSMovsB_3o0ctnlKhdNt7W6fI,2245
21
- sqlglot/dialects/__init__.py,sha256=aZTLpe2SwgWqiVrRabmfV8TVLPVHFydGwb_zhcVhRss,3499
21
+ sqlglot/dialects/__init__.py,sha256=G-YO1_zIcONWb9LjTjHX_HGzGl9Rm0sA9MX4ok6tpns,3527
22
22
  sqlglot/dialects/athena.py,sha256=gPE9ybRcbd6dVa1mrTFB_eVjsjQG36hErq5EpHyQmXo,6344
23
- sqlglot/dialects/bigquery.py,sha256=PIRhlNIj6I5iXPxR2_9q1OWXvy4ovVB_ae5qe8SWV80,52713
24
- sqlglot/dialects/clickhouse.py,sha256=0ahX0zjIwN9-RzfNyITBHs9PsgQXjL0uMRlRgYz9crI,56520
25
- sqlglot/dialects/databricks.py,sha256=8PoaiP8PfiBjpheRiua-rO_HzX2TRUXqc3DnlQ8zYrg,4481
26
- sqlglot/dialects/dialect.py,sha256=uiRHCJ2pjIea3EnRXhizNni1o-d31X02CRBuvXXne7U,68529
23
+ sqlglot/dialects/bigquery.py,sha256=5s4hSe-PXbjeIlKhAZon-rGq4ZIywYZj1kxx213V748,52862
24
+ sqlglot/dialects/clickhouse.py,sha256=Dc0aXwEgN8b6coXKM6P8zh3IsyrXjBajNGB-cVhnu1Y,56603
25
+ sqlglot/dialects/databricks.py,sha256=mJN2lFpqgH95x3mtry3qWbuRf4q7NV5jbRAOspqclzY,4548
26
+ sqlglot/dialects/dialect.py,sha256=qcpaE4cYO3v2R1cQVonpbrJOybYspnEdXSkXWxDW6d4,68921
27
27
  sqlglot/dialects/doris.py,sha256=eC7Ct-iz7p4Usz659NkelUFhm-GmVolIZy5uaBvgjaA,14397
28
28
  sqlglot/dialects/drill.py,sha256=FOh7_KjPx_77pv0DiHKZog0CcmzqeF9_PEmGnJ1ESSM,5825
29
29
  sqlglot/dialects/druid.py,sha256=kh3snZtneehNOWqs3XcPjsrhNaRbkCQ8E4hHbWJ1fHM,690
30
- sqlglot/dialects/duckdb.py,sha256=rARz845jDTzx8WUncAYHZeoBcVi7WvIJlGbjnNHaxZM,47965
30
+ sqlglot/dialects/duckdb.py,sha256=oGCgK0KjwJcCKy-YOZeiQnEo4v7Zc1r5AK0tCXO2VIc,48005
31
31
  sqlglot/dialects/dune.py,sha256=gALut-fFfN2qMsr8LvZ1NQK3F3W9z2f4PwMvTMXVVVg,375
32
- sqlglot/dialects/hive.py,sha256=PO6DLT1kHL-U2kFfV1CsNgQFT7A32LuGN71gnTXEOfY,31728
32
+ sqlglot/dialects/exasol.py,sha256=r2fO9FHfMV1_1M62wBGlNcQ6fHWikO4SBr8eCzxEYEY,2008
33
+ sqlglot/dialects/fabric.py,sha256=IU7aMh2yEuG8eVBAYzXO5pObZBZ4rZSd5UgvkwbCI-E,5277
34
+ sqlglot/dialects/hive.py,sha256=yKCsVN4R8pIB2Lmx1YGiSR9b8Me3li6rsGuZrKjHTo4,31771
33
35
  sqlglot/dialects/materialize.py,sha256=_DPLPt8YrdQIIXNrGJw1IMcGOoAEJ9NO9X9pDfy4hxs,3494
34
36
  sqlglot/dialects/mysql.py,sha256=prZecn3zeoifZX7l54UuLG64ar7I-or_z9lF-rT8bds,49233
35
- sqlglot/dialects/oracle.py,sha256=llxu2LzndrsGyceTod-Leh03vuPWEUKzVHB5gQY-tY8,15313
37
+ sqlglot/dialects/oracle.py,sha256=o6On1cYWFt6TpQYKuzo4kCz5vKb8jQr8WSwc619h3Lg,15967
36
38
  sqlglot/dialects/postgres.py,sha256=KUyMoLkm1_sZKUbdjn6bjXx9xz7sbEMKa-fl5Mzfrsk,31025
37
- sqlglot/dialects/presto.py,sha256=ltKbQ44efeq1HM0T8Qq0rsBSx6B6bF9RoKtUBVeoz70,33155
38
- sqlglot/dialects/prql.py,sha256=OF2LfDb4uzKIF7kpCfpL5G7VP1pnzLbjfW5QFUnuPvo,7803
39
- sqlglot/dialects/redshift.py,sha256=H8H8lGizHIAd4qLoPeFchyiGZKO1I8U_B058woukuGw,15366
39
+ sqlglot/dialects/presto.py,sha256=dHdPv6tUO-7SAYUWnx5ftKzv6FcRvzBfiYDTlQvL2Cs,33312
40
+ sqlglot/dialects/prql.py,sha256=fwN-SPEGx-drwf1K0U2MByN-PkW3C_rOgQ3xeJeychg,7908
41
+ sqlglot/dialects/redshift.py,sha256=sHhibn2g6_hVRd1XEe8HSQd_ofWkEpzld0odsNQ6X2g,15747
40
42
  sqlglot/dialects/risingwave.py,sha256=hwEOPjMw0ZM_3fjQcBUE00oy6I8V6mzYOOYmcwwS8mw,2898
41
- sqlglot/dialects/snowflake.py,sha256=dP5o1sH0q5UDMxPoI5vYp1_2FQyBU7VbeYdxF1HVyEs,63398
42
- sqlglot/dialects/spark.py,sha256=fbmiTKAQiKqG9yE_HAxYGgQiOjdxB9tJyjOtgdqF100,7645
43
+ sqlglot/dialects/snowflake.py,sha256=68I7OjdWXSVnDxJ-ItmXnJd-A1nlND1T6aKNv0nkJlQ,63518
44
+ sqlglot/dialects/spark.py,sha256=bOUSXUoWtLfWaQ9fIjWaw4zLBJY6N7vxajdMbAxLdOk,8307
43
45
  sqlglot/dialects/spark2.py,sha256=8er7nHDm5Wc57m9AOxKN0sd_DVzbhAL44H_udlFh9O8,14258
44
- sqlglot/dialects/sqlite.py,sha256=UzJwIdY1PsLArMxNt5lKvk8COHvXeo4FoqW41LqVmM8,12440
46
+ sqlglot/dialects/sqlite.py,sha256=fwqmopeuoupD_2dh2q6rT3UFxWtFHkskZ1OXAYnPT9Q,12483
45
47
  sqlglot/dialects/starrocks.py,sha256=fHNgvq5Nz7dI4QUWCTOO5VDOYjasBxRRlcg9TbY0UZE,11235
46
48
  sqlglot/dialects/tableau.py,sha256=oIawDzUITxGCWaEMB8OaNMPWhbC3U-2y09pYPm4eazc,2190
47
49
  sqlglot/dialects/teradata.py,sha256=xWa-9kSTsT-eM1NePi_oIM1dPHmXW89GLU5Uda3_6Ao,14036
48
50
  sqlglot/dialects/trino.py,sha256=wgLsiX1NQvjGny_rgrU1e2r6kK1LD0KgaSdIDrYmjD0,4285
49
- sqlglot/dialects/tsql.py,sha256=kMa8hYAXp3D2-g4HzkuzHDsWeXU1WgbyZm2sNl2a8rE,54397
51
+ sqlglot/dialects/tsql.py,sha256=dKlGmOmRFDx2MO5YebAAIK3FHorLZfzR0iqtK6xiiX4,54540
50
52
  sqlglot/executor/__init__.py,sha256=FslewzYQtQdDNg_0Ju2UaiP4vo4IMUgkfkmFsYUhcN0,2958
51
53
  sqlglot/executor/context.py,sha256=WJHJdYQCOeVXwLw0uSSrWSc25eBMn5Ix108RCvdsKRQ,3386
52
54
  sqlglot/executor/env.py,sha256=tQhU5PpTBMcxgZIFddFqxWMNPtHN0vOOz72voncY3KY,8276
@@ -69,11 +71,11 @@ sqlglot/optimizer/pushdown_projections.py,sha256=7NoK5NAUVYVhs0YnYyo6WuXfaO-BShS
69
71
  sqlglot/optimizer/qualify.py,sha256=oAPfwub7dEkrlCrsptcJWpLya4BgKhN6M5SwIs_86LY,4002
70
72
  sqlglot/optimizer/qualify_columns.py,sha256=77aScPakXYaiagnoCWk2qwMxlKuRGsFTAK9sOQuR2vY,40872
71
73
  sqlglot/optimizer/qualify_tables.py,sha256=5f5enBAh-bpNB9ewF97W9fx9h1TGXj1Ih5fncvH42sY,6486
72
- sqlglot/optimizer/scope.py,sha256=r-2PaO7-woaIWaWrKC88J9eTgdQardNYQ1rIXXaPr1w,30501
74
+ sqlglot/optimizer/scope.py,sha256=HI3TZ4VWTgM6_x8k5ClA0lA0xidaKv4xgn8iGERJRjk,30824
73
75
  sqlglot/optimizer/simplify.py,sha256=S0Blqg5Mq2KRRWhWz-Eivch9sBjBhg9fRJA6EdBzj2g,50704
74
76
  sqlglot/optimizer/unnest_subqueries.py,sha256=kzWUVDlxs8z9nmRx-8U-pHXPtVZhEIwkKqmKhr2QLvc,10908
75
- sqlglot-26.29.0.dist-info/licenses/LICENSE,sha256=AI3__mHZfOtzY3EluR_pIYBm3_pE7TbVx7qaHxoZ114,1065
76
- sqlglot-26.29.0.dist-info/METADATA,sha256=rc1ouFaDp1lgiQ2W3jRFM2VNj7RUrf2drwij1_rajpg,20732
77
- sqlglot-26.29.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
- sqlglot-26.29.0.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
79
- sqlglot-26.29.0.dist-info/RECORD,,
77
+ sqlglot-26.31.0.dist-info/licenses/LICENSE,sha256=AI3__mHZfOtzY3EluR_pIYBm3_pE7TbVx7qaHxoZ114,1065
78
+ sqlglot-26.31.0.dist-info/METADATA,sha256=OAEEcPh5a0gV2C4sacAbhuXg4cpNWPUXeGS0H6iAGgs,20732
79
+ sqlglot-26.31.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
+ sqlglot-26.31.0.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
81
+ sqlglot-26.31.0.dist-info/RECORD,,