sqlglot 26.25.3__py3-none-any.whl → 26.27.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlglot/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '26.25.3'
21
- __version_tuple__ = version_tuple = (26, 25, 3)
20
+ __version__ = version = '26.27.1'
21
+ __version_tuple__ = version_tuple = (26, 27, 1)
@@ -525,6 +525,16 @@ class BigQuery(Dialect):
525
525
  LOG_DEFAULTS_TO_LN = True
526
526
  SUPPORTS_IMPLICIT_UNNEST = True
527
527
 
528
+ # BigQuery does not allow ASC/DESC to be used as an identifier
529
+ ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC}
530
+ ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
531
+ TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
532
+ COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - {
533
+ TokenType.ASC,
534
+ TokenType.DESC,
535
+ }
536
+ UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
537
+
528
538
  FUNCTIONS = {
529
539
  **parser.Parser.FUNCTIONS,
530
540
  "CONTAINS_SUBSTR": _build_contains_substring,
@@ -1096,6 +1096,7 @@ class ClickHouse(Dialect):
1096
1096
  exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression),
1097
1097
  exp.Rand: rename_func("randCanonical"),
1098
1098
  exp.StartsWith: rename_func("startsWith"),
1099
+ exp.EndsWith: rename_func("endsWith"),
1099
1100
  exp.StrPosition: lambda self, e: strposition_sql(
1100
1101
  self,
1101
1102
  e,
sqlglot/dialects/hive.py CHANGED
@@ -557,6 +557,7 @@ class Hive(Dialect):
557
557
  exp.GenerateDateArray: sequence_sql,
558
558
  exp.If: if_sql(),
559
559
  exp.ILike: no_ilike_sql,
560
+ exp.IntDiv: lambda self, e: self.binary(e, "DIV"),
560
561
  exp.IsNan: rename_func("ISNAN"),
561
562
  exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression),
562
563
  exp.JSONExtractScalar: lambda self, e: self.func(
sqlglot/dialects/mysql.py CHANGED
@@ -489,6 +489,27 @@ class MySQL(Dialect):
489
489
  VALUES_FOLLOWED_BY_PAREN = False
490
490
  SUPPORTS_PARTITION_SELECTION = True
491
491
 
492
+ def _parse_generated_as_identity(
493
+ self,
494
+ ) -> (
495
+ exp.GeneratedAsIdentityColumnConstraint
496
+ | exp.ComputedColumnConstraint
497
+ | exp.GeneratedAsRowColumnConstraint
498
+ ):
499
+ this = super()._parse_generated_as_identity()
500
+
501
+ if self._match_texts(("STORED", "VIRTUAL")):
502
+ persisted = self._prev.text.upper() == "STORED"
503
+
504
+ if isinstance(this, exp.ComputedColumnConstraint):
505
+ this.set("persisted", persisted)
506
+ elif isinstance(this, exp.GeneratedAsIdentityColumnConstraint):
507
+ this = self.expression(
508
+ exp.ComputedColumnConstraint, this=this.expression, persisted=persisted
509
+ )
510
+
511
+ return this
512
+
492
513
  def _parse_primary_key_part(self) -> t.Optional[exp.Expression]:
493
514
  this = self._parse_id_var()
494
515
  if not self._match(TokenType.L_PAREN):
@@ -1154,6 +1175,10 @@ class MySQL(Dialect):
1154
1175
  "zerofill",
1155
1176
  }
1156
1177
 
1178
+ def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str:
1179
+ persisted = "STORED" if expression.args.get("persisted") else "VIRTUAL"
1180
+ return f"GENERATED ALWAYS AS ({self.sql(expression.this.unnest())}) {persisted}"
1181
+
1157
1182
  def array_sql(self, expression: exp.Array) -> str:
1158
1183
  self.unsupported("Arrays are not supported by MySQL")
1159
1184
  return self.function_fallback_sql(expression)
@@ -36,6 +36,7 @@ from sqlglot.dialects.dialect import (
36
36
  strposition_sql,
37
37
  count_if_to_sum,
38
38
  groupconcat_sql,
39
+ Version,
39
40
  )
40
41
  from sqlglot.generator import unsupported_args
41
42
  from sqlglot.helper import is_int, seq_get
@@ -255,6 +256,15 @@ def _levenshtein_sql(self: Postgres.Generator, expression: exp.Levenshtein) -> s
255
256
  return rename_func(name)(self, expression)
256
257
 
257
258
 
259
+ def _versioned_anyvalue_sql(self: Postgres.Generator, expression: exp.AnyValue) -> str:
260
+ # https://www.postgresql.org/docs/16/functions-aggregate.html
261
+ # https://www.postgresql.org/about/featurematrix/
262
+ if self.dialect.version < Version("16.0"):
263
+ return any_value_to_max_sql(self, expression)
264
+
265
+ return rename_func("ANY_VALUE")(self, expression)
266
+
267
+
258
268
  class Postgres(Dialect):
259
269
  INDEX_OFFSET = 1
260
270
  TYPED_DIVISION = True
@@ -502,6 +512,18 @@ class Postgres(Dialect):
502
512
 
503
513
  return this
504
514
 
515
+ def _parse_user_defined_type(
516
+ self, identifier: exp.Identifier
517
+ ) -> t.Optional[exp.Expression]:
518
+ udt_type: exp.Identifier | exp.Dot = identifier
519
+
520
+ while self._match(TokenType.DOT):
521
+ part = self._parse_id_var()
522
+ if part:
523
+ udt_type = exp.Dot(this=udt_type, expression=part)
524
+
525
+ return exp.DataType.build(udt_type, udt=True)
526
+
505
527
  class Generator(generator.Generator):
506
528
  SINGLE_STRING_INTERVAL = True
507
529
  RENAME_TABLE_WITH_DB = False
@@ -546,7 +568,7 @@ class Postgres(Dialect):
546
568
 
547
569
  TRANSFORMS = {
548
570
  **generator.Generator.TRANSFORMS,
549
- exp.AnyValue: any_value_to_max_sql,
571
+ exp.AnyValue: _versioned_anyvalue_sql,
550
572
  exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"),
551
573
  exp.ArrayFilter: filter_array_using_unnest,
552
574
  exp.BitwiseXor: lambda self, e: self.binary(e, "#"),
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
  from sqlglot.dialects.postgres import Postgres
3
+ from sqlglot.generator import Generator
3
4
  from sqlglot.tokens import TokenType
4
5
  import typing as t
5
6
 
@@ -72,3 +73,6 @@ class RisingWave(Postgres):
72
73
  }
73
74
 
74
75
  EXPRESSION_PRECEDES_PROPERTIES_CREATABLES = {"SINK"}
76
+
77
+ def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str:
78
+ return Generator.computedcolumnconstraint_sql(self, expression)
@@ -863,8 +863,14 @@ class Snowflake(Dialect):
863
863
  properties=self._parse_properties(),
864
864
  )
865
865
 
866
- def _parse_get(self) -> exp.Get | exp.Command:
866
+ def _parse_get(self) -> t.Optional[exp.Expression]:
867
867
  start = self._prev
868
+
869
+ # If we detect GET( then we need to parse a function, not a statement
870
+ if self._match(TokenType.L_PAREN):
871
+ self._retreat(self._index - 2)
872
+ return self._parse_expression()
873
+
868
874
  target = self._parse_location_path()
869
875
 
870
876
  # Parse as command if unquoted file path
@@ -1019,6 +1025,7 @@ class Snowflake(Dialect):
1019
1025
  exp.ArgMin: rename_func("MIN_BY"),
1020
1026
  exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"),
1021
1027
  exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this),
1028
+ exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"),
1022
1029
  exp.AtTimeZone: lambda self, e: self.func(
1023
1030
  "CONVERT_TIMEZONE", e.args.get("zone"), e.this
1024
1031
  ),
@@ -1094,11 +1101,14 @@ class Snowflake(Dialect):
1094
1101
  exp.SHA: rename_func("SHA1"),
1095
1102
  exp.StarMap: rename_func("OBJECT_CONSTRUCT"),
1096
1103
  exp.StartsWith: rename_func("STARTSWITH"),
1104
+ exp.EndsWith: rename_func("ENDSWITH"),
1097
1105
  exp.StrPosition: lambda self, e: strposition_sql(
1098
1106
  self, e, func_name="CHARINDEX", supports_position=True
1099
1107
  ),
1100
1108
  exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)),
1109
+ exp.StringToArray: rename_func("STRTOK_TO_ARRAY"),
1101
1110
  exp.Stuff: rename_func("INSERT"),
1111
+ exp.StPoint: rename_func("ST_MAKEPOINT"),
1102
1112
  exp.TimeAdd: date_delta_sql("TIMEADD"),
1103
1113
  exp.Timestamp: no_timestamp_sql,
1104
1114
  exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"),
sqlglot/dialects/spark.py CHANGED
@@ -163,6 +163,7 @@ class Spark(Spark2):
163
163
  move_partitioned_by_to_schema_columns,
164
164
  ]
165
165
  ),
166
+ exp.EndsWith: rename_func("ENDSWITH"),
166
167
  exp.PartitionedByProperty: lambda self,
167
168
  e: f"PARTITIONED BY {self.wrap(self.expressions(sqls=[_normalize_partition(e) for e in e.this.expressions], skip_first=True))}",
168
169
  exp.StartsWith: rename_func("STARTSWITH"),
@@ -99,6 +99,8 @@ class SQLite(Dialect):
99
99
  KEYWORDS = tokens.Tokenizer.KEYWORDS.copy()
100
100
  KEYWORDS.pop("/*+")
101
101
 
102
+ COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.REPLACE}
103
+
102
104
  class Parser(parser.Parser):
103
105
  FUNCTIONS = {
104
106
  **parser.Parser.FUNCTIONS,
@@ -107,7 +109,9 @@ class SQLite(Dialect):
107
109
  "DATETIME": lambda args: exp.Anonymous(this="DATETIME", expressions=args),
108
110
  "TIME": lambda args: exp.Anonymous(this="TIME", expressions=args),
109
111
  }
112
+
110
113
  STRING_ALIASES = True
114
+ ALTER_RENAME_REQUIRES_COLUMN = False
111
115
 
112
116
  def _parse_unique(self) -> exp.UniqueColumnConstraint:
113
117
  # Do not consume more tokens if UNIQUE is used as a standalone constraint, e.g:
@@ -307,3 +311,10 @@ class SQLite(Dialect):
307
311
  @unsupported_args("this")
308
312
  def currentschema_sql(self, expression: exp.CurrentSchema) -> str:
309
313
  return "'main'"
314
+
315
+ def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
316
+ self.unsupported("SQLite does not support IGNORE NULLS.")
317
+ return self.sql(expression.this)
318
+
319
+ def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
320
+ return self.sql(expression.this)
@@ -17,6 +17,19 @@ from sqlglot.helper import seq_get
17
17
  from sqlglot.tokens import TokenType
18
18
 
19
19
 
20
+ # https://docs.starrocks.io/docs/sql-reference/sql-functions/spatial-functions/st_distance_sphere/
21
+ def st_distance_sphere(self, expression: exp.StDistance) -> str:
22
+ point1 = expression.this
23
+ point2 = expression.expression
24
+
25
+ point1_x = self.func("ST_X", point1)
26
+ point1_y = self.func("ST_Y", point1)
27
+ point2_x = self.func("ST_X", point2)
28
+ point2_y = self.func("ST_Y", point2)
29
+
30
+ return self.func("ST_Distance_Sphere", point1_x, point1_y, point2_x, point2_y)
31
+
32
+
20
33
  class StarRocks(MySQL):
21
34
  STRICT_JSON_PATH_SYNTAX = False
22
35
 
@@ -132,6 +145,8 @@ class StarRocks(MySQL):
132
145
  TRANSFORMS = {
133
146
  **MySQL.Generator.TRANSFORMS,
134
147
  exp.Array: inline_array_sql,
148
+ exp.ArrayAgg: rename_func("ARRAY_AGG"),
149
+ exp.ArrayFilter: rename_func("ARRAY_FILTER"),
135
150
  exp.ArrayToString: rename_func("ARRAY_JOIN"),
136
151
  exp.ApproxDistinct: approx_count_distinct_sql,
137
152
  exp.DateDiff: lambda self, e: self.func(
@@ -141,12 +156,12 @@ class StarRocks(MySQL):
141
156
  exp.JSONExtract: arrow_json_extract_sql,
142
157
  exp.Property: property_sql,
143
158
  exp.RegexpLike: rename_func("REGEXP"),
159
+ exp.StDistance: st_distance_sphere,
144
160
  exp.StrToUnix: lambda self, e: self.func("UNIX_TIMESTAMP", e.this, self.format_time(e)),
145
161
  exp.TimestampTrunc: lambda self, e: self.func("DATE_TRUNC", unit_to_str(e), e.this),
146
162
  exp.TimeStrToDate: rename_func("TO_DATE"),
147
163
  exp.UnixToStr: lambda self, e: self.func("FROM_UNIXTIME", e.this, self.format_time(e)),
148
164
  exp.UnixToTime: rename_func("FROM_UNIXTIME"),
149
- exp.ArrayFilter: rename_func("ARRAY_FILTER"),
150
165
  }
151
166
 
152
167
  TRANSFORMS.pop(exp.DateTrunc)
sqlglot/expressions.py CHANGED
@@ -31,6 +31,7 @@ from sqlglot.helper import (
31
31
  ensure_collection,
32
32
  ensure_list,
33
33
  seq_get,
34
+ split_num_words,
34
35
  subclasses,
35
36
  to_bool,
36
37
  )
@@ -1723,15 +1724,15 @@ class Column(Condition):
1723
1724
  if self.args.get(part)
1724
1725
  ]
1725
1726
 
1726
- def to_dot(self) -> Dot | Identifier:
1727
+ def to_dot(self, include_dots: bool = True) -> Dot | Identifier:
1727
1728
  """Converts the column into a dot expression."""
1728
1729
  parts = self.parts
1729
1730
  parent = self.parent
1730
1731
 
1731
- while parent:
1732
- if isinstance(parent, Dot):
1732
+ if include_dots:
1733
+ while isinstance(parent, Dot):
1733
1734
  parts.append(parent.expression)
1734
- parent = parent.parent
1735
+ parent = parent.parent
1735
1736
 
1736
1737
  return Dot.build(deepcopy(parts)) if len(parts) > 1 else parts[0]
1737
1738
 
@@ -1993,11 +1994,6 @@ class OnUpdateColumnConstraint(ColumnConstraintKind):
1993
1994
  pass
1994
1995
 
1995
1996
 
1996
- # https://docs.snowflake.com/en/sql-reference/sql/create-external-table#optional-parameters
1997
- class TransformColumnConstraint(ColumnConstraintKind):
1998
- pass
1999
-
2000
-
2001
1997
  class PrimaryKeyColumnConstraint(ColumnConstraintKind):
2002
1998
  arg_types = {"desc": False, "options": False}
2003
1999
 
@@ -4757,6 +4753,8 @@ class DataType(Expression):
4757
4753
  if udt:
4758
4754
  return DataType(this=DataType.Type.USERDEFINED, kind=dtype, **kwargs)
4759
4755
  raise
4756
+ elif isinstance(dtype, (Identifier, Dot)) and udt:
4757
+ return DataType(this=DataType.Type.USERDEFINED, kind=dtype, **kwargs)
4760
4758
  elif isinstance(dtype, DataType.Type):
4761
4759
  data_type_exp = DataType(this=dtype)
4762
4760
  elif isinstance(dtype, DataType):
@@ -4798,9 +4796,6 @@ class DataType(Expression):
4798
4796
  return False
4799
4797
 
4800
4798
 
4801
- DATA_TYPE = t.Union[str, DataType, DataType.Type]
4802
-
4803
-
4804
4799
  # https://www.postgresql.org/docs/15/datatype-pseudo.html
4805
4800
  class PseudoType(DataType):
4806
4801
  arg_types = {"this": True}
@@ -4930,6 +4925,10 @@ class AddConstraint(Expression):
4930
4925
  arg_types = {"expressions": True}
4931
4926
 
4932
4927
 
4928
+ class AddPartition(Expression):
4929
+ arg_types = {"this": True, "exists": False}
4930
+
4931
+
4933
4932
  class AttachOption(Expression):
4934
4933
  arg_types = {"this": True, "expression": False}
4935
4934
 
@@ -5030,6 +5029,9 @@ class Dot(Binary):
5030
5029
  return parts
5031
5030
 
5032
5031
 
5032
+ DATA_TYPE = t.Union[str, Identifier, Dot, DataType, DataType.Type]
5033
+
5034
+
5033
5035
  class DPipe(Binary):
5034
5036
  arg_types = {"this": True, "expression": True, "safe": False}
5035
5037
 
@@ -5570,13 +5572,28 @@ class ArrayToString(Func):
5570
5572
  _sql_names = ["ARRAY_TO_STRING", "ARRAY_JOIN"]
5571
5573
 
5572
5574
 
5575
+ class ArrayIntersect(Func):
5576
+ arg_types = {"expressions": True}
5577
+ is_var_len_args = True
5578
+ _sql_names = ["ARRAY_INTERSECT", "ARRAY_INTERSECTION"]
5579
+
5580
+
5581
+ class StPoint(Func):
5582
+ arg_types = {"this": True, "expression": True, "null": False}
5583
+ _sql_names = ["ST_POINT", "ST_MAKEPOINT"]
5584
+
5585
+
5586
+ class StDistance(Func):
5587
+ arg_types = {"this": True, "expression": True, "use_spheroid": False}
5588
+
5589
+
5573
5590
  # https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions#string
5574
5591
  class String(Func):
5575
5592
  arg_types = {"this": True, "zone": False}
5576
5593
 
5577
5594
 
5578
5595
  class StringToArray(Func):
5579
- arg_types = {"this": True, "expression": True, "null": False}
5596
+ arg_types = {"this": True, "expression": False, "null": False}
5580
5597
  _sql_names = ["STRING_TO_ARRAY", "SPLIT_BY_STRING", "STRTOK_TO_ARRAY"]
5581
5598
 
5582
5599
 
@@ -6706,6 +6723,11 @@ class StartsWith(Func):
6706
6723
  arg_types = {"this": True, "expression": True}
6707
6724
 
6708
6725
 
6726
+ class EndsWith(Func):
6727
+ _sql_names = ["ENDS_WITH", "ENDSWITH"]
6728
+ arg_types = {"this": True, "expression": True}
6729
+
6730
+
6709
6731
  class StrPosition(Func):
6710
6732
  arg_types = {
6711
6733
  "this": True,
@@ -7366,7 +7388,7 @@ def _apply_set_operation(
7366
7388
  **opts,
7367
7389
  ) -> S:
7368
7390
  return reduce(
7369
- lambda x, y: set_operation(this=x, expression=y, distinct=distinct),
7391
+ lambda x, y: set_operation(this=x, expression=y, distinct=distinct, **opts),
7370
7392
  (maybe_parse(e, dialect=dialect, copy=copy, **opts) for e in expressions),
7371
7393
  )
7372
7394
 
@@ -7962,7 +7984,15 @@ def to_table(
7962
7984
  if isinstance(sql_path, Table):
7963
7985
  return maybe_copy(sql_path, copy=copy)
7964
7986
 
7965
- table = maybe_parse(sql_path, into=Table, dialect=dialect)
7987
+ try:
7988
+ table = maybe_parse(sql_path, into=Table, dialect=dialect)
7989
+ except ParseError:
7990
+ catalog, db, this = split_num_words(sql_path, ".", 3)
7991
+
7992
+ if not this:
7993
+ raise
7994
+
7995
+ table = table_(this, db=db, catalog=catalog)
7966
7996
 
7967
7997
  for k, v in kwargs.items():
7968
7998
  table.set(k, v)
@@ -8110,7 +8140,7 @@ def column(
8110
8140
 
8111
8141
  @t.overload
8112
8142
  def column(
8113
- col: str | Identifier,
8143
+ col: str | Identifier | Star,
8114
8144
  table: t.Optional[str | Identifier] = None,
8115
8145
  db: t.Optional[str | Identifier] = None,
8116
8146
  catalog: t.Optional[str | Identifier] = None,
@@ -8147,8 +8177,11 @@ def column(
8147
8177
  Returns:
8148
8178
  The new Column instance.
8149
8179
  """
8180
+ if not isinstance(col, Star):
8181
+ col = to_identifier(col, quoted=quoted, copy=copy)
8182
+
8150
8183
  this = Column(
8151
- this=to_identifier(col, quoted=quoted, copy=copy),
8184
+ this=col,
8152
8185
  table=to_identifier(table, quoted=quoted, copy=copy),
8153
8186
  db=to_identifier(db, quoted=quoted, copy=copy),
8154
8187
  catalog=to_identifier(catalog, quoted=quoted, copy=copy),
sqlglot/generator.py CHANGED
@@ -1018,6 +1018,7 @@ class Generator(metaclass=_Generator):
1018
1018
  persisted = " PERSISTED"
1019
1019
  else:
1020
1020
  persisted = ""
1021
+
1021
1022
  return f"AS {this}{persisted}"
1022
1023
 
1023
1024
  def autoincrementcolumnconstraint_sql(self, _) -> str:
@@ -1079,9 +1080,6 @@ class Generator(metaclass=_Generator):
1079
1080
  def notnullcolumnconstraint_sql(self, expression: exp.NotNullColumnConstraint) -> str:
1080
1081
  return f"{'' if expression.args.get('allow_null') else 'NOT '}NULL"
1081
1082
 
1082
- def transformcolumnconstraint_sql(self, expression: exp.TransformColumnConstraint) -> str:
1083
- return f"AS {self.sql(expression, 'this')}"
1084
-
1085
1083
  def primarykeycolumnconstraint_sql(self, expression: exp.PrimaryKeyColumnConstraint) -> str:
1086
1084
  desc = expression.args.get("desc")
1087
1085
  if desc is not None:
@@ -3500,6 +3498,10 @@ class Generator(metaclass=_Generator):
3500
3498
  def addconstraint_sql(self, expression: exp.AddConstraint) -> str:
3501
3499
  return f"ADD {self.expressions(expression)}"
3502
3500
 
3501
+ def addpartition_sql(self, expression: exp.AddPartition) -> str:
3502
+ exists = "IF NOT EXISTS " if expression.args.get("exists") else ""
3503
+ return f"ADD {exists}{self.sql(expression.this)}"
3504
+
3503
3505
  def distinct_sql(self, expression: exp.Distinct) -> str:
3504
3506
  this = self.expressions(expression, flat=True)
3505
3507
 
sqlglot/parser.py CHANGED
@@ -933,9 +933,12 @@ class Parser(metaclass=_Parser):
933
933
  PIPE_SYNTAX_TRANSFORM_PARSERS = {
934
934
  "SELECT": lambda self, query: self._parse_pipe_syntax_select(query),
935
935
  "WHERE": lambda self, query: self._parse_pipe_syntax_where(query),
936
- "ORDER BY": lambda self, query: query.order_by(self._parse_order(), copy=False),
936
+ "ORDER BY": lambda self, query: query.order_by(
937
+ self._parse_order(), append=False, copy=False
938
+ ),
937
939
  "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query),
938
940
  "OFFSET": lambda self, query: query.offset(self._parse_offset(), copy=False),
941
+ "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query),
939
942
  }
940
943
 
941
944
  PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
@@ -1124,25 +1127,6 @@ class Parser(metaclass=_Parser):
1124
1127
  "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(),
1125
1128
  }
1126
1129
 
1127
- def _parse_pipe_syntax_select(self, query: exp.Query) -> exp.Query:
1128
- select = self._parse_select()
1129
- if isinstance(select, exp.Select):
1130
- return select.from_(query.subquery(copy=False), copy=False)
1131
- return query
1132
-
1133
- def _parse_pipe_syntax_where(self, query: exp.Query) -> exp.Query:
1134
- where = self._parse_where()
1135
- return query.where(where, copy=False)
1136
-
1137
- def _parse_pipe_syntax_limit(self, query: exp.Query) -> exp.Query:
1138
- limit = self._parse_limit()
1139
- offset = self._parse_offset()
1140
- if limit:
1141
- query.limit(limit, copy=False)
1142
- if offset:
1143
- query.offset(offset, copy=False)
1144
- return query
1145
-
1146
1130
  def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression:
1147
1131
  klass = (
1148
1132
  exp.PartitionedByBucket
@@ -1523,6 +1507,9 @@ class Parser(metaclass=_Parser):
1523
1507
  # Whether the 'AS' keyword is optional in the CTE definition syntax
1524
1508
  OPTIONAL_ALIAS_TOKEN_CTE = True
1525
1509
 
1510
+ # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword
1511
+ ALTER_RENAME_REQUIRES_COLUMN = True
1512
+
1526
1513
  __slots__ = (
1527
1514
  "error_level",
1528
1515
  "error_message_context",
@@ -1536,6 +1523,7 @@ class Parser(metaclass=_Parser):
1536
1523
  "_next",
1537
1524
  "_prev",
1538
1525
  "_prev_comments",
1526
+ "_pipe_cte_counter",
1539
1527
  )
1540
1528
 
1541
1529
  # Autofilled
@@ -1566,6 +1554,7 @@ class Parser(metaclass=_Parser):
1566
1554
  self._next = None
1567
1555
  self._prev = None
1568
1556
  self._prev_comments = None
1557
+ self._pipe_cte_counter = 0
1569
1558
 
1570
1559
  def parse(
1571
1560
  self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
@@ -3262,9 +3251,11 @@ class Parser(metaclass=_Parser):
3262
3251
  elif self._match(TokenType.VALUES, advance=False):
3263
3252
  this = self._parse_derived_table_values()
3264
3253
  elif from_:
3265
- this = exp.select("*").from_(from_.this, copy=False)
3266
3254
  if self._match(TokenType.PIPE_GT, advance=False):
3267
- return self._parse_pipe_syntax_query(this)
3255
+ return self._parse_pipe_syntax_query(
3256
+ exp.Select().from_(from_.this, append=False, copy=False)
3257
+ )
3258
+ this = exp.select("*").from_(from_.this, copy=False)
3268
3259
  elif self._match(TokenType.SUMMARIZE):
3269
3260
  table = self._match(TokenType.TABLE)
3270
3261
  this = self._parse_select() or self._parse_string() or self._parse_table()
@@ -5130,6 +5121,14 @@ class Parser(metaclass=_Parser):
5130
5121
  exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True)
5131
5122
  )
5132
5123
 
5124
+ def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]:
5125
+ type_name = identifier.name
5126
+
5127
+ while self._match(TokenType.DOT):
5128
+ type_name = f"{type_name}.{self._advance_any() and self._prev.text}"
5129
+
5130
+ return exp.DataType.build(type_name, udt=True)
5131
+
5133
5132
  def _parse_types(
5134
5133
  self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
5135
5134
  ) -> t.Optional[exp.Expression]:
@@ -5151,12 +5150,7 @@ class Parser(metaclass=_Parser):
5151
5150
  if tokens[0].token_type in self.TYPE_TOKENS:
5152
5151
  self._prev = tokens[0]
5153
5152
  elif self.dialect.SUPPORTS_USER_DEFINED_TYPES:
5154
- type_name = identifier.name
5155
-
5156
- while self._match(TokenType.DOT):
5157
- type_name = f"{type_name}.{self._advance_any() and self._prev.text}"
5158
-
5159
- this = exp.DataType.build(type_name, udt=True)
5153
+ this = self._parse_user_defined_type(identifier)
5160
5154
  else:
5161
5155
  self._retreat(self._index - 1)
5162
5156
  return None
@@ -5514,18 +5508,12 @@ class Parser(metaclass=_Parser):
5514
5508
  else:
5515
5509
  field = self._parse_field(any_token=True, anonymous_func=True)
5516
5510
 
5511
+ # Function calls can be qualified, e.g., x.y.FOO()
5512
+ # This converts the final AST to a series of Dots leading to the function call
5513
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
5517
5514
  if isinstance(field, (exp.Func, exp.Window)) and this:
5518
- # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc
5519
- # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
5520
- this = exp.replace_tree(
5521
- this,
5522
- lambda n: (
5523
- self.expression(exp.Dot, this=n.args.get("table"), expression=n.this)
5524
- if n.table
5525
- else n.this
5526
- )
5527
- if isinstance(n, exp.Column)
5528
- else n,
5515
+ this = this.transform(
5516
+ lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n
5529
5517
  )
5530
5518
 
5531
5519
  if op:
@@ -5900,7 +5888,11 @@ class Parser(metaclass=_Parser):
5900
5888
  constraints.append(
5901
5889
  self.expression(
5902
5890
  exp.ColumnConstraint,
5903
- kind=exp.TransformColumnConstraint(this=self._parse_disjunction()),
5891
+ kind=exp.ComputedColumnConstraint(
5892
+ this=self._parse_disjunction(),
5893
+ persisted=self._match_texts(("STORED", "VIRTUAL"))
5894
+ and self._prev.text.upper() == "STORED",
5895
+ ),
5904
5896
  )
5905
5897
  )
5906
5898
 
@@ -7163,16 +7155,6 @@ class Parser(metaclass=_Parser):
7163
7155
 
7164
7156
  return this
7165
7157
 
7166
- def _parse_pipe_syntax_query(self, query: exp.Select) -> exp.Query:
7167
- while self._match(TokenType.PIPE_GT):
7168
- parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper())
7169
- if not parser:
7170
- self.raise_error(f"Unsupported pipe syntax operator: '{self._curr.text.upper()}'.")
7171
- else:
7172
- query = parser(self, query)
7173
-
7174
- return query
7175
-
7176
7158
  def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]:
7177
7159
  return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
7178
7160
 
@@ -7251,24 +7233,29 @@ class Parser(metaclass=_Parser):
7251
7233
  self._match(TokenType.TABLE)
7252
7234
  return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table())
7253
7235
 
7254
- def _parse_add_column(self) -> t.Optional[exp.Expression]:
7236
+ def _parse_add_column(self) -> t.Optional[exp.ColumnDef]:
7255
7237
  if not self._prev.text.upper() == "ADD":
7256
7238
  return None
7257
7239
 
7240
+ start = self._index
7258
7241
  self._match(TokenType.COLUMN)
7242
+
7259
7243
  exists_column = self._parse_exists(not_=True)
7260
7244
  expression = self._parse_field_def()
7261
7245
 
7262
- if expression:
7263
- expression.set("exists", exists_column)
7246
+ if not isinstance(expression, exp.ColumnDef):
7247
+ self._retreat(start)
7248
+ return None
7264
7249
 
7265
- # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
7266
- if self._match_texts(("FIRST", "AFTER")):
7267
- position = self._prev.text
7268
- column_position = self.expression(
7269
- exp.ColumnPosition, this=self._parse_column(), position=position
7270
- )
7271
- expression.set("position", column_position)
7250
+ expression.set("exists", exists_column)
7251
+
7252
+ # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
7253
+ if self._match_texts(("FIRST", "AFTER")):
7254
+ position = self._prev.text
7255
+ column_position = self.expression(
7256
+ exp.ColumnPosition, this=self._parse_column(), position=position
7257
+ )
7258
+ expression.set("position", column_position)
7272
7259
 
7273
7260
  return expression
7274
7261
 
@@ -7285,13 +7272,24 @@ class Parser(metaclass=_Parser):
7285
7272
  )
7286
7273
 
7287
7274
  def _parse_alter_table_add(self) -> t.List[exp.Expression]:
7288
- def _parse_add_column_or_constraint():
7275
+ def _parse_add_alteration() -> t.Optional[exp.Expression]:
7289
7276
  self._match_text_seq("ADD")
7290
7277
  if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False):
7291
7278
  return self.expression(
7292
7279
  exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint)
7293
7280
  )
7294
- return self._parse_add_column()
7281
+
7282
+ column_def = self._parse_add_column()
7283
+ if isinstance(column_def, exp.ColumnDef):
7284
+ return column_def
7285
+
7286
+ exists = self._parse_exists(not_=True)
7287
+ if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False):
7288
+ return self.expression(
7289
+ exp.AddPartition, exists=exists, this=self._parse_field(any_token=True)
7290
+ )
7291
+
7292
+ return None
7295
7293
 
7296
7294
  if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq(
7297
7295
  "COLUMNS"
@@ -7300,7 +7298,7 @@ class Parser(metaclass=_Parser):
7300
7298
 
7301
7299
  return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def)
7302
7300
 
7303
- return self._parse_csv(_parse_add_column_or_constraint)
7301
+ return self._parse_csv(_parse_add_alteration)
7304
7302
 
7305
7303
  def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]:
7306
7304
  if self._match_texts(self.ALTER_ALTER_PARSERS):
@@ -7378,7 +7376,7 @@ class Parser(metaclass=_Parser):
7378
7376
  return self._parse_csv(self._parse_drop_column)
7379
7377
 
7380
7378
  def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]:
7381
- if self._match(TokenType.COLUMN):
7379
+ if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN:
7382
7380
  exists = self._parse_exists()
7383
7381
  old_column = self._parse_column()
7384
7382
  to = self._match_text_seq("TO")
@@ -8328,3 +8326,146 @@ class Parser(metaclass=_Parser):
8328
8326
  expression = self.expression(exp.Identifier, this=token.text, **kwargs)
8329
8327
  expression.update_positions(token)
8330
8328
  return expression
8329
+
8330
+ def _build_pipe_cte(self, query: exp.Query, expressions: t.List[exp.Expression]) -> exp.Query:
8331
+ if query.selects:
8332
+ self._pipe_cte_counter += 1
8333
+ new_cte = f"__tmp{self._pipe_cte_counter}"
8334
+
8335
+ # For `exp.Select`, generated CTEs are attached to its `with`
8336
+ # For `exp.SetOperation`, generated CTEs are attached to the `with` of its LHS, accessed via `this`
8337
+ with_ = (
8338
+ query.args.get("with")
8339
+ if isinstance(query, exp.Select)
8340
+ else query.this.args.get("with")
8341
+ )
8342
+ ctes = with_.pop() if with_ else None
8343
+
8344
+ new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False)
8345
+ if ctes:
8346
+ new_select.set("with", ctes)
8347
+
8348
+ return new_select.with_(new_cte, as_=query, copy=False)
8349
+
8350
+ return query.select(*expressions, copy=False)
8351
+
8352
+ def _parse_pipe_syntax_select(self, query: exp.Query) -> exp.Query:
8353
+ select = self._parse_select()
8354
+ if isinstance(select, exp.Select):
8355
+ return self._build_pipe_cte(query, select.expressions)
8356
+
8357
+ return query
8358
+
8359
+ def _parse_pipe_syntax_where(self, query: exp.Query) -> exp.Query:
8360
+ where = self._parse_where()
8361
+ return query.where(where, copy=False)
8362
+
8363
+ def _parse_pipe_syntax_limit(self, query: exp.Query) -> exp.Query:
8364
+ limit = self._parse_limit()
8365
+ offset = self._parse_offset()
8366
+ if limit:
8367
+ curr_limit = query.args.get("limit", limit)
8368
+ if curr_limit.expression.to_py() >= limit.expression.to_py():
8369
+ query.limit(limit, copy=False)
8370
+ if offset:
8371
+ curr_offset = query.args.get("offset")
8372
+ curr_offset = curr_offset.expression.to_py() if curr_offset else 0
8373
+ query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False)
8374
+ return query
8375
+
8376
+ def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]:
8377
+ this = self._parse_assignment()
8378
+ if self._match_text_seq("GROUP", "AND", advance=False):
8379
+ return this
8380
+
8381
+ this = self._parse_alias(this)
8382
+
8383
+ if self._match_set((TokenType.ASC, TokenType.DESC), advance=False):
8384
+ return self._parse_ordered(lambda: this)
8385
+
8386
+ return this
8387
+
8388
+ def _parse_pipe_syntax_aggregate_group_order_by(
8389
+ self, query: exp.Query, group_by_exists: bool = True
8390
+ ) -> exp.Query:
8391
+ expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields)
8392
+ aggregates_or_groups, orders = [], []
8393
+ for element in expr:
8394
+ if isinstance(element, exp.Ordered):
8395
+ this = element.this
8396
+ if isinstance(this, exp.Alias):
8397
+ element.set("this", this.args["alias"])
8398
+ orders.append(element)
8399
+ else:
8400
+ this = element
8401
+ aggregates_or_groups.append(this)
8402
+
8403
+ if group_by_exists and isinstance(query, exp.Select):
8404
+ query = query.select(*aggregates_or_groups, copy=False).group_by(
8405
+ *[projection.args.get("alias", projection) for projection in aggregates_or_groups],
8406
+ copy=False,
8407
+ )
8408
+ else:
8409
+ query = query.select(*aggregates_or_groups, append=False, copy=False)
8410
+
8411
+ if orders:
8412
+ return query.order_by(*orders, append=False, copy=False)
8413
+
8414
+ return query
8415
+
8416
+ def _parse_pipe_syntax_aggregate(self, query: exp.Query) -> exp.Query:
8417
+ self._match_text_seq("AGGREGATE")
8418
+ query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False)
8419
+
8420
+ if self._match(TokenType.GROUP_BY) or (
8421
+ self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY)
8422
+ ):
8423
+ return self._parse_pipe_syntax_aggregate_group_order_by(query)
8424
+
8425
+ return query
8426
+
8427
+ def _parse_pipe_syntax_set_operator(
8428
+ self, query: t.Optional[exp.Query]
8429
+ ) -> t.Optional[exp.Query]:
8430
+ first_setop = self.parse_set_operation(this=query)
8431
+
8432
+ if not first_setop or not query:
8433
+ return None
8434
+
8435
+ if not query.selects:
8436
+ query.select("*", copy=False)
8437
+
8438
+ this = first_setop.this.pop()
8439
+ distinct = first_setop.args.pop("distinct")
8440
+ setops = [first_setop.expression.pop(), *self._parse_expressions()]
8441
+
8442
+ if isinstance(first_setop, exp.Union):
8443
+ query = query.union(*setops, distinct=distinct, copy=False, **first_setop.args)
8444
+ elif isinstance(first_setop, exp.Except):
8445
+ query = query.except_(*setops, distinct=distinct, copy=False, **first_setop.args)
8446
+ else:
8447
+ query = query.intersect(*setops, distinct=distinct, copy=False, **first_setop.args)
8448
+
8449
+ return self._build_pipe_cte(
8450
+ query, [projection.args.get("alias", projection) for projection in this.expressions]
8451
+ )
8452
+
8453
+ def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]:
8454
+ while self._match(TokenType.PIPE_GT):
8455
+ start = self._curr
8456
+ parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper())
8457
+ if not parser:
8458
+ set_op_query = self._parse_pipe_syntax_set_operator(query)
8459
+ if not set_op_query:
8460
+ self._retreat(start)
8461
+ self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.")
8462
+ break
8463
+
8464
+ query = set_op_query
8465
+ else:
8466
+ query = parser(self, query)
8467
+
8468
+ if query and not query.selects:
8469
+ return query.select("*", copy=False)
8470
+
8471
+ return query
sqlglot/transforms.py CHANGED
@@ -842,113 +842,122 @@ def struct_kv_to_alias(expression: exp.Expression) -> exp.Expression:
842
842
 
843
843
 
844
844
  def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
845
- """
846
- Remove join marks from an AST. This rule assumes that all marked columns are qualified.
847
- If this does not hold for a query, consider running `sqlglot.optimizer.qualify` first.
845
+ """https://docs.oracle.com/cd/B19306_01/server.102/b14200/queries006.htm#sthref3178
848
846
 
849
- For example,
850
- SELECT * FROM a, b WHERE a.id = b.id(+) -- ... is converted to
851
- SELECT * FROM a LEFT JOIN b ON a.id = b.id -- this
847
+ 1. You cannot specify the (+) operator in a query block that also contains FROM clause join syntax.
852
848
 
853
- Args:
854
- expression: The AST to remove join marks from.
849
+ 2. The (+) operator can appear only in the WHERE clause or, in the context of left-correlation (that is, when specifying the TABLE clause) in the FROM clause, and can be applied only to a column of a table or view.
855
850
 
856
- Returns:
857
- The AST with join marks removed.
851
+ The (+) operator does not produce an outer join if you specify one table in the outer query and the other table in an inner query.
852
+
853
+ You cannot use the (+) operator to outer-join a table to itself, although self joins are valid.
854
+
855
+ The (+) operator can be applied only to a column, not to an arbitrary expression. However, an arbitrary expression can contain one or more columns marked with the (+) operator.
856
+
857
+ A WHERE condition containing the (+) operator cannot be combined with another condition using the OR logical operator.
858
+
859
+ A WHERE condition cannot use the IN comparison condition to compare a column marked with the (+) operator with an expression.
860
+
861
+ A WHERE condition cannot compare any column marked with the (+) operator with a subquery.
862
+
863
+ -- example with WHERE
864
+ SELECT d.department_name, sum(e.salary) as total_salary
865
+ FROM departments d, employees e
866
+ WHERE e.department_id(+) = d.department_id
867
+ group by department_name
868
+
869
+ -- example of left correlation in select
870
+ SELECT d.department_name, (
871
+ SELECT SUM(e.salary)
872
+ FROM employees e
873
+ WHERE e.department_id(+) = d.department_id) AS total_salary
874
+ FROM departments d;
875
+
876
+ -- example of left correlation in from
877
+ SELECT d.department_name, t.total_salary
878
+ FROM departments d, (
879
+ SELECT SUM(e.salary) AS total_salary
880
+ FROM employees e
881
+ WHERE e.department_id(+) = d.department_id
882
+ ) t
858
883
  """
884
+
859
885
  from sqlglot.optimizer.scope import traverse_scope
886
+ from sqlglot.optimizer.normalize import normalize, normalized
887
+ from collections import defaultdict
860
888
 
861
- for scope in traverse_scope(expression):
889
+ # we go in reverse to check the main query for left correlation
890
+ for scope in reversed(traverse_scope(expression)):
862
891
  query = scope.expression
863
892
 
864
893
  where = query.args.get("where")
865
- joins = query.args.get("joins")
894
+ joins = query.args.get("joins", [])
866
895
 
867
- if not where or not joins:
896
+ # knockout: we do not support left correlation (see point 2)
897
+ assert not scope.is_correlated_subquery, "Correlated queries are not supported"
898
+
899
+ # nothing to do - we check it here after knockout above
900
+ if not where or not any(c.args.get("join_mark") for c in where.find_all(exp.Column)):
868
901
  continue
869
902
 
870
- query_from = query.args["from"]
903
+ # make sure we have AND of ORs to have clear join terms
904
+ where = normalize(where.this)
905
+ assert normalized(where), "Cannot normalize JOIN predicates"
871
906
 
872
- # These keep track of the joins to be replaced
873
- new_joins: t.Dict[str, exp.Join] = {}
874
- old_joins = {join.alias_or_name: join for join in joins}
907
+ joins_ons = defaultdict(list) # dict of {name: list of join AND conditions}
908
+ for cond in [where] if not isinstance(where, exp.And) else where.flatten():
909
+ join_cols = [col for col in cond.find_all(exp.Column) if col.args.get("join_mark")]
875
910
 
876
- for column in scope.columns:
877
- if not column.args.get("join_mark"):
911
+ left_join_table = set(col.table for col in join_cols)
912
+ if not left_join_table:
878
913
  continue
879
914
 
880
- predicate = column.find_ancestor(exp.Predicate, exp.Select)
881
- assert isinstance(
882
- predicate, exp.Binary
883
- ), "Columns can only be marked with (+) when involved in a binary operation"
884
-
885
- predicate_parent = predicate.parent
886
- join_predicate = predicate.pop()
887
-
888
- left_columns = [
889
- c for c in join_predicate.left.find_all(exp.Column) if c.args.get("join_mark")
890
- ]
891
- right_columns = [
892
- c for c in join_predicate.right.find_all(exp.Column) if c.args.get("join_mark")
893
- ]
894
-
895
915
  assert not (
896
- left_columns and right_columns
897
- ), "The (+) marker cannot appear in both sides of a binary predicate"
898
-
899
- marked_column_tables = set()
900
- for col in left_columns or right_columns:
901
- table = col.table
902
- assert table, f"Column {col} needs to be qualified with a table"
916
+ len(left_join_table) > 1
917
+ ), "Cannot combine JOIN predicates from different tables"
903
918
 
919
+ for col in join_cols:
904
920
  col.set("join_mark", False)
905
- marked_column_tables.add(table)
906
921
 
907
- assert (
908
- len(marked_column_tables) == 1
909
- ), "Columns of only a single table can be marked with (+) in a given binary predicate"
910
-
911
- # Add predicate if join already copied, or add join if it is new
912
- join_this = old_joins.get(col.table, query_from).this
913
- existing_join = new_joins.get(join_this.alias_or_name)
914
- if existing_join:
915
- existing_join.set("on", exp.and_(existing_join.args["on"], join_predicate))
916
- else:
917
- new_joins[join_this.alias_or_name] = exp.Join(
918
- this=join_this.copy(), on=join_predicate.copy(), kind="LEFT"
919
- )
922
+ joins_ons[left_join_table.pop()].append(cond)
920
923
 
921
- # If the parent of the target predicate is a binary node, then it now has only one child
922
- if isinstance(predicate_parent, exp.Binary):
923
- if predicate_parent.left is None:
924
- predicate_parent.replace(predicate_parent.right)
925
- else:
926
- predicate_parent.replace(predicate_parent.left)
924
+ old_joins = {join.alias_or_name: join for join in joins}
925
+ new_joins = {}
926
+ query_from = query.args["from"]
927
+
928
+ for table, predicates in joins_ons.items():
929
+ join_what = old_joins.get(table, query_from).this.copy()
930
+ new_joins[join_what.alias_or_name] = exp.Join(
931
+ this=join_what, on=exp.and_(*predicates), kind="LEFT"
932
+ )
927
933
 
928
- only_old_join_sources = old_joins.keys() - new_joins.keys()
934
+ for p in predicates:
935
+ while isinstance(p.parent, exp.Paren):
936
+ p.parent.replace(p)
937
+
938
+ parent = p.parent
939
+ p.pop()
940
+ if isinstance(parent, exp.Binary):
941
+ parent.replace(parent.right if parent.left is None else parent.left)
942
+ elif isinstance(parent, exp.Where):
943
+ parent.pop()
929
944
 
930
945
  if query_from.alias_or_name in new_joins:
946
+ only_old_joins = old_joins.keys() - new_joins.keys()
931
947
  assert (
932
- len(only_old_join_sources) >= 1
948
+ len(only_old_joins) >= 1
933
949
  ), "Cannot determine which table to use in the new FROM clause"
934
950
 
935
- new_from_name = list(only_old_join_sources)[0]
936
- query.set("from", exp.From(this=old_joins.pop(new_from_name).this))
937
- only_old_join_sources.remove(new_from_name)
951
+ new_from_name = list(only_old_joins)[0]
952
+ query.set("from", exp.From(this=old_joins[new_from_name].this))
938
953
 
939
954
  if new_joins:
940
- only_old_join_expressions = []
941
- for old_join_source in only_old_join_sources:
942
- old_join_expression = old_joins[old_join_source]
943
- if not old_join_expression.kind:
944
- old_join_expression.set("kind", "CROSS")
945
-
946
- only_old_join_expressions.append(old_join_expression)
947
-
948
- query.set("joins", list(new_joins.values()) + only_old_join_expressions)
949
-
950
- if not where.this:
951
- where.pop()
955
+ for n, j in old_joins.items(): # preserve any other joins
956
+ if n not in new_joins and n != query.args["from"].name:
957
+ if not j.kind:
958
+ j.set("kind", "CROSS")
959
+ new_joins[n] = j
960
+ query.set("joins", list(new_joins.values()))
952
961
 
953
962
  return expression
954
963
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlglot
3
- Version: 26.25.3
3
+ Version: 26.27.1
4
4
  Summary: An easily customizable SQL parser and transpiler
5
5
  Author-email: Toby Mao <toby.mao@gmail.com>
6
6
  License: MIT License
@@ -558,6 +558,7 @@ See also: [Writing a Python SQL engine from scratch](https://github.com/tobymao/
558
558
  * [Dagster](https://github.com/dagster-io/dagster)
559
559
  * [Fugue](https://github.com/fugue-project/fugue)
560
560
  * [Ibis](https://github.com/ibis-project/ibis)
561
+ * [dlt](https://github.com/dlt-hub/dlt)
561
562
  * [mysql-mimic](https://github.com/kelsin/mysql-mimic)
562
563
  * [Querybook](https://github.com/pinterest/querybook)
563
564
  * [Quokka](https://github.com/marsupialtail/quokka)
@@ -1,27 +1,27 @@
1
1
  sqlglot/__init__.py,sha256=za08rtdPh2v7dOpGdNomttlIVGgTrKja7rPd6sQwaTg,5391
2
2
  sqlglot/__main__.py,sha256=022c173KqxsiABWTEpUIq_tJUxuNiW7a7ABsxBXqvu8,2069
3
3
  sqlglot/_typing.py,sha256=-1HPyr3w5COlSJWqlgt8jhFk2dyMvBuvVBqIX1wyVCM,642
4
- sqlglot/_version.py,sha256=igXuzzRuyME6XqNXRLY-jNKxfl1yIclYIKvVbIffUII,515
4
+ sqlglot/_version.py,sha256=tGzmcwxazE8ZaUBuCO5phuigC0f9SB40km_TjYfnb90,515
5
5
  sqlglot/diff.py,sha256=PtOllQMQa1Sw1-V2Y8eypmDqGujXYPaTOp_WLsWkAWk,17314
6
6
  sqlglot/errors.py,sha256=QNKMr-pzLUDR-tuMmn_GK6iMHUIVdb_YSJ_BhGEvuso,2126
7
- sqlglot/expressions.py,sha256=uyRDFl1k--uVrVAhY0zu54tn49a5adE6n7C8RpnahEc,242084
8
- sqlglot/generator.py,sha256=KOX6s9hQ8wP_VqvGeNmq5uBjnb8KDe9G-BI_aFtW3s4,212376
7
+ sqlglot/expressions.py,sha256=HspDzfH5_xnGPUvMPcwtNmIHaLbIj_NGmBWcvm8qIKw,242992
8
+ sqlglot/generator.py,sha256=4iJ0BxkzinmosIhfhb34xjxaFpzw3Zo7fvmknaf5uRs,212432
9
9
  sqlglot/helper.py,sha256=9nZjFVRBtMKFC3EdzpDQ6jkazFO19po6BF8xHiNGZIo,15111
10
10
  sqlglot/jsonpath.py,sha256=dKdI3PNINNGimmSse2IIv-GbPN_3lXncXh_70QH7Lss,7664
11
11
  sqlglot/lineage.py,sha256=kXBDSErmZZluZx_kkrMj4MPEOAbkvcbX1tbOW7Bpl-U,15303
12
- sqlglot/parser.py,sha256=uP_cKZFk1zjNXbLrYWvkciexTOaJm22Xyq005RN1IFs,314600
12
+ sqlglot/parser.py,sha256=Jv02-ikrk8uqmYR4nxKgLUj704lIx1_ugXf-hShkZ8w,320240
13
13
  sqlglot/planner.py,sha256=ql7Li-bWJRcyXzNaZy_n6bQ6B2ZfunEIB8Ztv2xaxq4,14634
14
14
  sqlglot/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  sqlglot/schema.py,sha256=13H2qKQs27EKdTpDLOvcNnSTDAUbYNKjWtJs4aQCSOA,20509
16
16
  sqlglot/serde.py,sha256=DQVJ95WrIvhYfe02Ytb4NQug2aMwDCEwpMBW1LKDqzE,2031
17
17
  sqlglot/time.py,sha256=Q62gv6kL40OiRBF6BMESxKJcMVn7ZLNw7sv8H34z5FI,18400
18
18
  sqlglot/tokens.py,sha256=R0B8GQSbQ9GoDc0NlaT5Tc8RjgEOx2IYIkYU5rY8Rg8,48742
19
- sqlglot/transforms.py,sha256=iTwRPMHTyRx_RG25ItSOnigw_v2tnG9cgwMq0Nwcy2U,39778
19
+ sqlglot/transforms.py,sha256=3jpbHeVTLK9hmQi5f3_vmK-5jZB32_ittCkO7poxCs4,40631
20
20
  sqlglot/trie.py,sha256=v27uXMrHfqrXlJ6GmeTSMovsB_3o0ctnlKhdNt7W6fI,2245
21
21
  sqlglot/dialects/__init__.py,sha256=aZTLpe2SwgWqiVrRabmfV8TVLPVHFydGwb_zhcVhRss,3499
22
22
  sqlglot/dialects/athena.py,sha256=xjy75ej0T3douCUfFKhE1I3kqvPEuQY29x24WG1--Vw,6307
23
- sqlglot/dialects/bigquery.py,sha256=HdON3UjuyNb6Zk_UxQFnd3UTAhPBV0P4sFKM22XxWzo,52130
24
- sqlglot/dialects/clickhouse.py,sha256=TXZKzjE7Xx65P4h1iaqxHCRU4zIHT8GUxgma5Qo_e-Q,56469
23
+ sqlglot/dialects/bigquery.py,sha256=PIRhlNIj6I5iXPxR2_9q1OWXvy4ovVB_ae5qe8SWV80,52713
24
+ sqlglot/dialects/clickhouse.py,sha256=0ahX0zjIwN9-RzfNyITBHs9PsgQXjL0uMRlRgYz9crI,56520
25
25
  sqlglot/dialects/databricks.py,sha256=8PoaiP8PfiBjpheRiua-rO_HzX2TRUXqc3DnlQ8zYrg,4481
26
26
  sqlglot/dialects/dialect.py,sha256=uuek7l3vUf8OB987UUxzNqdsZdrSj1TtmImVyxbI7Go,68463
27
27
  sqlglot/dialects/doris.py,sha256=eC7Ct-iz7p4Usz659NkelUFhm-GmVolIZy5uaBvgjaA,14397
@@ -29,20 +29,20 @@ sqlglot/dialects/drill.py,sha256=FOh7_KjPx_77pv0DiHKZog0CcmzqeF9_PEmGnJ1ESSM,582
29
29
  sqlglot/dialects/druid.py,sha256=kh3snZtneehNOWqs3XcPjsrhNaRbkCQ8E4hHbWJ1fHM,690
30
30
  sqlglot/dialects/duckdb.py,sha256=alEYXBW5uUApRC8IRYnsapeiJq7JJwUmrK18C56RYsg,47780
31
31
  sqlglot/dialects/dune.py,sha256=gALut-fFfN2qMsr8LvZ1NQK3F3W9z2f4PwMvTMXVVVg,375
32
- sqlglot/dialects/hive.py,sha256=IKAM2elf_n3LgRcPK_4-JuE1j6shd6FhE1QJvBaP55U,31665
32
+ sqlglot/dialects/hive.py,sha256=PO6DLT1kHL-U2kFfV1CsNgQFT7A32LuGN71gnTXEOfY,31728
33
33
  sqlglot/dialects/materialize.py,sha256=_DPLPt8YrdQIIXNrGJw1IMcGOoAEJ9NO9X9pDfy4hxs,3494
34
- sqlglot/dialects/mysql.py,sha256=PnhqX2B15J71WUROefPTc7ZOP0vybbkZGWIDrxYN5Dc,48159
34
+ sqlglot/dialects/mysql.py,sha256=prZecn3zeoifZX7l54UuLG64ar7I-or_z9lF-rT8bds,49233
35
35
  sqlglot/dialects/oracle.py,sha256=llxu2LzndrsGyceTod-Leh03vuPWEUKzVHB5gQY-tY8,15313
36
- sqlglot/dialects/postgres.py,sha256=ysZCX-_gd3wVVUzT05D-zgXlPy19jAdorVsVr6NEVtA,30212
36
+ sqlglot/dialects/postgres.py,sha256=KUyMoLkm1_sZKUbdjn6bjXx9xz7sbEMKa-fl5Mzfrsk,31025
37
37
  sqlglot/dialects/presto.py,sha256=ltKbQ44efeq1HM0T8Qq0rsBSx6B6bF9RoKtUBVeoz70,33155
38
38
  sqlglot/dialects/prql.py,sha256=OF2LfDb4uzKIF7kpCfpL5G7VP1pnzLbjfW5QFUnuPvo,7803
39
39
  sqlglot/dialects/redshift.py,sha256=H8H8lGizHIAd4qLoPeFchyiGZKO1I8U_B058woukuGw,15366
40
- sqlglot/dialects/risingwave.py,sha256=A7XAhtuipA5ummnDNfKBJ8OLD9Jis2mOK9hXKSW5Jf0,2684
41
- sqlglot/dialects/snowflake.py,sha256=KKxy98xvdTNj7Uhwtpi-5EJo0teyDbGYGGEq7rIIwrA,61295
42
- sqlglot/dialects/spark.py,sha256=fBj6MpL0CR5RGhdyd6RuO5OV_IuKYFsK7cPoktW74Ws,7594
40
+ sqlglot/dialects/risingwave.py,sha256=hwEOPjMw0ZM_3fjQcBUE00oy6I8V6mzYOOYmcwwS8mw,2898
41
+ sqlglot/dialects/snowflake.py,sha256=m4Gekw4NhoD3q4WF1TJhetRmmwkh8XG9Rqq8mL3P31E,61761
42
+ sqlglot/dialects/spark.py,sha256=fbmiTKAQiKqG9yE_HAxYGgQiOjdxB9tJyjOtgdqF100,7645
43
43
  sqlglot/dialects/spark2.py,sha256=8er7nHDm5Wc57m9AOxKN0sd_DVzbhAL44H_udlFh9O8,14258
44
- sqlglot/dialects/sqlite.py,sha256=fB3F90qfWwTX1oRuvnQp6y_VoqjlPer7LNhYBhjVl8E,12020
45
- sqlglot/dialects/starrocks.py,sha256=_NjJukTInnV96P8VfIvQkL64kyMmeShArkyFmbd7bec,10656
44
+ sqlglot/dialects/sqlite.py,sha256=UzJwIdY1PsLArMxNt5lKvk8COHvXeo4FoqW41LqVmM8,12440
45
+ sqlglot/dialects/starrocks.py,sha256=fHNgvq5Nz7dI4QUWCTOO5VDOYjasBxRRlcg9TbY0UZE,11235
46
46
  sqlglot/dialects/tableau.py,sha256=oIawDzUITxGCWaEMB8OaNMPWhbC3U-2y09pYPm4eazc,2190
47
47
  sqlglot/dialects/teradata.py,sha256=xWa-9kSTsT-eM1NePi_oIM1dPHmXW89GLU5Uda3_6Ao,14036
48
48
  sqlglot/dialects/trino.py,sha256=wgLsiX1NQvjGny_rgrU1e2r6kK1LD0KgaSdIDrYmjD0,4285
@@ -72,8 +72,8 @@ sqlglot/optimizer/qualify_tables.py,sha256=5f5enBAh-bpNB9ewF97W9fx9h1TGXj1Ih5fnc
72
72
  sqlglot/optimizer/scope.py,sha256=Fqz9GpBqO1GWzRAnqdflXXNz44ot_1JqVBC-DnYAU_E,30063
73
73
  sqlglot/optimizer/simplify.py,sha256=S0Blqg5Mq2KRRWhWz-Eivch9sBjBhg9fRJA6EdBzj2g,50704
74
74
  sqlglot/optimizer/unnest_subqueries.py,sha256=kzWUVDlxs8z9nmRx-8U-pHXPtVZhEIwkKqmKhr2QLvc,10908
75
- sqlglot-26.25.3.dist-info/licenses/LICENSE,sha256=AI3__mHZfOtzY3EluR_pIYBm3_pE7TbVx7qaHxoZ114,1065
76
- sqlglot-26.25.3.dist-info/METADATA,sha256=gJEaU_bTc08J57XhQg9zwTLIG1qidHiPVTlU_2egG_4,20692
77
- sqlglot-26.25.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
- sqlglot-26.25.3.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
79
- sqlglot-26.25.3.dist-info/RECORD,,
75
+ sqlglot-26.27.1.dist-info/licenses/LICENSE,sha256=AI3__mHZfOtzY3EluR_pIYBm3_pE7TbVx7qaHxoZ114,1065
76
+ sqlglot-26.27.1.dist-info/METADATA,sha256=R_0FXBifra90Z576tjTOsln1IrNAgj-vTzJL1-izN_4,20732
77
+ sqlglot-26.27.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
+ sqlglot-26.27.1.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
79
+ sqlglot-26.27.1.dist-info/RECORD,,