sqlglot 26.25.3__py3-none-any.whl → 26.27.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/_version.py +2 -2
- sqlglot/dialects/bigquery.py +10 -0
- sqlglot/dialects/clickhouse.py +1 -0
- sqlglot/dialects/hive.py +1 -0
- sqlglot/dialects/mysql.py +25 -0
- sqlglot/dialects/postgres.py +23 -1
- sqlglot/dialects/risingwave.py +4 -0
- sqlglot/dialects/snowflake.py +11 -1
- sqlglot/dialects/spark.py +1 -0
- sqlglot/dialects/sqlite.py +11 -0
- sqlglot/dialects/starrocks.py +16 -1
- sqlglot/expressions.py +50 -17
- sqlglot/generator.py +5 -3
- sqlglot/parser.py +205 -64
- sqlglot/transforms.py +87 -78
- {sqlglot-26.25.3.dist-info → sqlglot-26.27.1.dist-info}/METADATA +2 -1
- {sqlglot-26.25.3.dist-info → sqlglot-26.27.1.dist-info}/RECORD +20 -20
- {sqlglot-26.25.3.dist-info → sqlglot-26.27.1.dist-info}/WHEEL +0 -0
- {sqlglot-26.25.3.dist-info → sqlglot-26.27.1.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-26.25.3.dist-info → sqlglot-26.27.1.dist-info}/top_level.txt +0 -0
sqlglot/_version.py
CHANGED
sqlglot/dialects/bigquery.py
CHANGED
@@ -525,6 +525,16 @@ class BigQuery(Dialect):
|
|
525
525
|
LOG_DEFAULTS_TO_LN = True
|
526
526
|
SUPPORTS_IMPLICIT_UNNEST = True
|
527
527
|
|
528
|
+
# BigQuery does not allow ASC/DESC to be used as an identifier
|
529
|
+
ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC}
|
530
|
+
ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
|
531
|
+
TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
|
532
|
+
COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - {
|
533
|
+
TokenType.ASC,
|
534
|
+
TokenType.DESC,
|
535
|
+
}
|
536
|
+
UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
|
537
|
+
|
528
538
|
FUNCTIONS = {
|
529
539
|
**parser.Parser.FUNCTIONS,
|
530
540
|
"CONTAINS_SUBSTR": _build_contains_substring,
|
sqlglot/dialects/clickhouse.py
CHANGED
@@ -1096,6 +1096,7 @@ class ClickHouse(Dialect):
|
|
1096
1096
|
exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression),
|
1097
1097
|
exp.Rand: rename_func("randCanonical"),
|
1098
1098
|
exp.StartsWith: rename_func("startsWith"),
|
1099
|
+
exp.EndsWith: rename_func("endsWith"),
|
1099
1100
|
exp.StrPosition: lambda self, e: strposition_sql(
|
1100
1101
|
self,
|
1101
1102
|
e,
|
sqlglot/dialects/hive.py
CHANGED
@@ -557,6 +557,7 @@ class Hive(Dialect):
|
|
557
557
|
exp.GenerateDateArray: sequence_sql,
|
558
558
|
exp.If: if_sql(),
|
559
559
|
exp.ILike: no_ilike_sql,
|
560
|
+
exp.IntDiv: lambda self, e: self.binary(e, "DIV"),
|
560
561
|
exp.IsNan: rename_func("ISNAN"),
|
561
562
|
exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression),
|
562
563
|
exp.JSONExtractScalar: lambda self, e: self.func(
|
sqlglot/dialects/mysql.py
CHANGED
@@ -489,6 +489,27 @@ class MySQL(Dialect):
|
|
489
489
|
VALUES_FOLLOWED_BY_PAREN = False
|
490
490
|
SUPPORTS_PARTITION_SELECTION = True
|
491
491
|
|
492
|
+
def _parse_generated_as_identity(
|
493
|
+
self,
|
494
|
+
) -> (
|
495
|
+
exp.GeneratedAsIdentityColumnConstraint
|
496
|
+
| exp.ComputedColumnConstraint
|
497
|
+
| exp.GeneratedAsRowColumnConstraint
|
498
|
+
):
|
499
|
+
this = super()._parse_generated_as_identity()
|
500
|
+
|
501
|
+
if self._match_texts(("STORED", "VIRTUAL")):
|
502
|
+
persisted = self._prev.text.upper() == "STORED"
|
503
|
+
|
504
|
+
if isinstance(this, exp.ComputedColumnConstraint):
|
505
|
+
this.set("persisted", persisted)
|
506
|
+
elif isinstance(this, exp.GeneratedAsIdentityColumnConstraint):
|
507
|
+
this = self.expression(
|
508
|
+
exp.ComputedColumnConstraint, this=this.expression, persisted=persisted
|
509
|
+
)
|
510
|
+
|
511
|
+
return this
|
512
|
+
|
492
513
|
def _parse_primary_key_part(self) -> t.Optional[exp.Expression]:
|
493
514
|
this = self._parse_id_var()
|
494
515
|
if not self._match(TokenType.L_PAREN):
|
@@ -1154,6 +1175,10 @@ class MySQL(Dialect):
|
|
1154
1175
|
"zerofill",
|
1155
1176
|
}
|
1156
1177
|
|
1178
|
+
def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str:
|
1179
|
+
persisted = "STORED" if expression.args.get("persisted") else "VIRTUAL"
|
1180
|
+
return f"GENERATED ALWAYS AS ({self.sql(expression.this.unnest())}) {persisted}"
|
1181
|
+
|
1157
1182
|
def array_sql(self, expression: exp.Array) -> str:
|
1158
1183
|
self.unsupported("Arrays are not supported by MySQL")
|
1159
1184
|
return self.function_fallback_sql(expression)
|
sqlglot/dialects/postgres.py
CHANGED
@@ -36,6 +36,7 @@ from sqlglot.dialects.dialect import (
|
|
36
36
|
strposition_sql,
|
37
37
|
count_if_to_sum,
|
38
38
|
groupconcat_sql,
|
39
|
+
Version,
|
39
40
|
)
|
40
41
|
from sqlglot.generator import unsupported_args
|
41
42
|
from sqlglot.helper import is_int, seq_get
|
@@ -255,6 +256,15 @@ def _levenshtein_sql(self: Postgres.Generator, expression: exp.Levenshtein) -> s
|
|
255
256
|
return rename_func(name)(self, expression)
|
256
257
|
|
257
258
|
|
259
|
+
def _versioned_anyvalue_sql(self: Postgres.Generator, expression: exp.AnyValue) -> str:
|
260
|
+
# https://www.postgresql.org/docs/16/functions-aggregate.html
|
261
|
+
# https://www.postgresql.org/about/featurematrix/
|
262
|
+
if self.dialect.version < Version("16.0"):
|
263
|
+
return any_value_to_max_sql(self, expression)
|
264
|
+
|
265
|
+
return rename_func("ANY_VALUE")(self, expression)
|
266
|
+
|
267
|
+
|
258
268
|
class Postgres(Dialect):
|
259
269
|
INDEX_OFFSET = 1
|
260
270
|
TYPED_DIVISION = True
|
@@ -502,6 +512,18 @@ class Postgres(Dialect):
|
|
502
512
|
|
503
513
|
return this
|
504
514
|
|
515
|
+
def _parse_user_defined_type(
|
516
|
+
self, identifier: exp.Identifier
|
517
|
+
) -> t.Optional[exp.Expression]:
|
518
|
+
udt_type: exp.Identifier | exp.Dot = identifier
|
519
|
+
|
520
|
+
while self._match(TokenType.DOT):
|
521
|
+
part = self._parse_id_var()
|
522
|
+
if part:
|
523
|
+
udt_type = exp.Dot(this=udt_type, expression=part)
|
524
|
+
|
525
|
+
return exp.DataType.build(udt_type, udt=True)
|
526
|
+
|
505
527
|
class Generator(generator.Generator):
|
506
528
|
SINGLE_STRING_INTERVAL = True
|
507
529
|
RENAME_TABLE_WITH_DB = False
|
@@ -546,7 +568,7 @@ class Postgres(Dialect):
|
|
546
568
|
|
547
569
|
TRANSFORMS = {
|
548
570
|
**generator.Generator.TRANSFORMS,
|
549
|
-
exp.AnyValue:
|
571
|
+
exp.AnyValue: _versioned_anyvalue_sql,
|
550
572
|
exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"),
|
551
573
|
exp.ArrayFilter: filter_array_using_unnest,
|
552
574
|
exp.BitwiseXor: lambda self, e: self.binary(e, "#"),
|
sqlglot/dialects/risingwave.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
from sqlglot.dialects.postgres import Postgres
|
3
|
+
from sqlglot.generator import Generator
|
3
4
|
from sqlglot.tokens import TokenType
|
4
5
|
import typing as t
|
5
6
|
|
@@ -72,3 +73,6 @@ class RisingWave(Postgres):
|
|
72
73
|
}
|
73
74
|
|
74
75
|
EXPRESSION_PRECEDES_PROPERTIES_CREATABLES = {"SINK"}
|
76
|
+
|
77
|
+
def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str:
|
78
|
+
return Generator.computedcolumnconstraint_sql(self, expression)
|
sqlglot/dialects/snowflake.py
CHANGED
@@ -863,8 +863,14 @@ class Snowflake(Dialect):
|
|
863
863
|
properties=self._parse_properties(),
|
864
864
|
)
|
865
865
|
|
866
|
-
def _parse_get(self) ->
|
866
|
+
def _parse_get(self) -> t.Optional[exp.Expression]:
|
867
867
|
start = self._prev
|
868
|
+
|
869
|
+
# If we detect GET( then we need to parse a function, not a statement
|
870
|
+
if self._match(TokenType.L_PAREN):
|
871
|
+
self._retreat(self._index - 2)
|
872
|
+
return self._parse_expression()
|
873
|
+
|
868
874
|
target = self._parse_location_path()
|
869
875
|
|
870
876
|
# Parse as command if unquoted file path
|
@@ -1019,6 +1025,7 @@ class Snowflake(Dialect):
|
|
1019
1025
|
exp.ArgMin: rename_func("MIN_BY"),
|
1020
1026
|
exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"),
|
1021
1027
|
exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this),
|
1028
|
+
exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"),
|
1022
1029
|
exp.AtTimeZone: lambda self, e: self.func(
|
1023
1030
|
"CONVERT_TIMEZONE", e.args.get("zone"), e.this
|
1024
1031
|
),
|
@@ -1094,11 +1101,14 @@ class Snowflake(Dialect):
|
|
1094
1101
|
exp.SHA: rename_func("SHA1"),
|
1095
1102
|
exp.StarMap: rename_func("OBJECT_CONSTRUCT"),
|
1096
1103
|
exp.StartsWith: rename_func("STARTSWITH"),
|
1104
|
+
exp.EndsWith: rename_func("ENDSWITH"),
|
1097
1105
|
exp.StrPosition: lambda self, e: strposition_sql(
|
1098
1106
|
self, e, func_name="CHARINDEX", supports_position=True
|
1099
1107
|
),
|
1100
1108
|
exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)),
|
1109
|
+
exp.StringToArray: rename_func("STRTOK_TO_ARRAY"),
|
1101
1110
|
exp.Stuff: rename_func("INSERT"),
|
1111
|
+
exp.StPoint: rename_func("ST_MAKEPOINT"),
|
1102
1112
|
exp.TimeAdd: date_delta_sql("TIMEADD"),
|
1103
1113
|
exp.Timestamp: no_timestamp_sql,
|
1104
1114
|
exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"),
|
sqlglot/dialects/spark.py
CHANGED
@@ -163,6 +163,7 @@ class Spark(Spark2):
|
|
163
163
|
move_partitioned_by_to_schema_columns,
|
164
164
|
]
|
165
165
|
),
|
166
|
+
exp.EndsWith: rename_func("ENDSWITH"),
|
166
167
|
exp.PartitionedByProperty: lambda self,
|
167
168
|
e: f"PARTITIONED BY {self.wrap(self.expressions(sqls=[_normalize_partition(e) for e in e.this.expressions], skip_first=True))}",
|
168
169
|
exp.StartsWith: rename_func("STARTSWITH"),
|
sqlglot/dialects/sqlite.py
CHANGED
@@ -99,6 +99,8 @@ class SQLite(Dialect):
|
|
99
99
|
KEYWORDS = tokens.Tokenizer.KEYWORDS.copy()
|
100
100
|
KEYWORDS.pop("/*+")
|
101
101
|
|
102
|
+
COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.REPLACE}
|
103
|
+
|
102
104
|
class Parser(parser.Parser):
|
103
105
|
FUNCTIONS = {
|
104
106
|
**parser.Parser.FUNCTIONS,
|
@@ -107,7 +109,9 @@ class SQLite(Dialect):
|
|
107
109
|
"DATETIME": lambda args: exp.Anonymous(this="DATETIME", expressions=args),
|
108
110
|
"TIME": lambda args: exp.Anonymous(this="TIME", expressions=args),
|
109
111
|
}
|
112
|
+
|
110
113
|
STRING_ALIASES = True
|
114
|
+
ALTER_RENAME_REQUIRES_COLUMN = False
|
111
115
|
|
112
116
|
def _parse_unique(self) -> exp.UniqueColumnConstraint:
|
113
117
|
# Do not consume more tokens if UNIQUE is used as a standalone constraint, e.g:
|
@@ -307,3 +311,10 @@ class SQLite(Dialect):
|
|
307
311
|
@unsupported_args("this")
|
308
312
|
def currentschema_sql(self, expression: exp.CurrentSchema) -> str:
|
309
313
|
return "'main'"
|
314
|
+
|
315
|
+
def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
|
316
|
+
self.unsupported("SQLite does not support IGNORE NULLS.")
|
317
|
+
return self.sql(expression.this)
|
318
|
+
|
319
|
+
def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
|
320
|
+
return self.sql(expression.this)
|
sqlglot/dialects/starrocks.py
CHANGED
@@ -17,6 +17,19 @@ from sqlglot.helper import seq_get
|
|
17
17
|
from sqlglot.tokens import TokenType
|
18
18
|
|
19
19
|
|
20
|
+
# https://docs.starrocks.io/docs/sql-reference/sql-functions/spatial-functions/st_distance_sphere/
|
21
|
+
def st_distance_sphere(self, expression: exp.StDistance) -> str:
|
22
|
+
point1 = expression.this
|
23
|
+
point2 = expression.expression
|
24
|
+
|
25
|
+
point1_x = self.func("ST_X", point1)
|
26
|
+
point1_y = self.func("ST_Y", point1)
|
27
|
+
point2_x = self.func("ST_X", point2)
|
28
|
+
point2_y = self.func("ST_Y", point2)
|
29
|
+
|
30
|
+
return self.func("ST_Distance_Sphere", point1_x, point1_y, point2_x, point2_y)
|
31
|
+
|
32
|
+
|
20
33
|
class StarRocks(MySQL):
|
21
34
|
STRICT_JSON_PATH_SYNTAX = False
|
22
35
|
|
@@ -132,6 +145,8 @@ class StarRocks(MySQL):
|
|
132
145
|
TRANSFORMS = {
|
133
146
|
**MySQL.Generator.TRANSFORMS,
|
134
147
|
exp.Array: inline_array_sql,
|
148
|
+
exp.ArrayAgg: rename_func("ARRAY_AGG"),
|
149
|
+
exp.ArrayFilter: rename_func("ARRAY_FILTER"),
|
135
150
|
exp.ArrayToString: rename_func("ARRAY_JOIN"),
|
136
151
|
exp.ApproxDistinct: approx_count_distinct_sql,
|
137
152
|
exp.DateDiff: lambda self, e: self.func(
|
@@ -141,12 +156,12 @@ class StarRocks(MySQL):
|
|
141
156
|
exp.JSONExtract: arrow_json_extract_sql,
|
142
157
|
exp.Property: property_sql,
|
143
158
|
exp.RegexpLike: rename_func("REGEXP"),
|
159
|
+
exp.StDistance: st_distance_sphere,
|
144
160
|
exp.StrToUnix: lambda self, e: self.func("UNIX_TIMESTAMP", e.this, self.format_time(e)),
|
145
161
|
exp.TimestampTrunc: lambda self, e: self.func("DATE_TRUNC", unit_to_str(e), e.this),
|
146
162
|
exp.TimeStrToDate: rename_func("TO_DATE"),
|
147
163
|
exp.UnixToStr: lambda self, e: self.func("FROM_UNIXTIME", e.this, self.format_time(e)),
|
148
164
|
exp.UnixToTime: rename_func("FROM_UNIXTIME"),
|
149
|
-
exp.ArrayFilter: rename_func("ARRAY_FILTER"),
|
150
165
|
}
|
151
166
|
|
152
167
|
TRANSFORMS.pop(exp.DateTrunc)
|
sqlglot/expressions.py
CHANGED
@@ -31,6 +31,7 @@ from sqlglot.helper import (
|
|
31
31
|
ensure_collection,
|
32
32
|
ensure_list,
|
33
33
|
seq_get,
|
34
|
+
split_num_words,
|
34
35
|
subclasses,
|
35
36
|
to_bool,
|
36
37
|
)
|
@@ -1723,15 +1724,15 @@ class Column(Condition):
|
|
1723
1724
|
if self.args.get(part)
|
1724
1725
|
]
|
1725
1726
|
|
1726
|
-
def to_dot(self) -> Dot | Identifier:
|
1727
|
+
def to_dot(self, include_dots: bool = True) -> Dot | Identifier:
|
1727
1728
|
"""Converts the column into a dot expression."""
|
1728
1729
|
parts = self.parts
|
1729
1730
|
parent = self.parent
|
1730
1731
|
|
1731
|
-
|
1732
|
-
|
1732
|
+
if include_dots:
|
1733
|
+
while isinstance(parent, Dot):
|
1733
1734
|
parts.append(parent.expression)
|
1734
|
-
|
1735
|
+
parent = parent.parent
|
1735
1736
|
|
1736
1737
|
return Dot.build(deepcopy(parts)) if len(parts) > 1 else parts[0]
|
1737
1738
|
|
@@ -1993,11 +1994,6 @@ class OnUpdateColumnConstraint(ColumnConstraintKind):
|
|
1993
1994
|
pass
|
1994
1995
|
|
1995
1996
|
|
1996
|
-
# https://docs.snowflake.com/en/sql-reference/sql/create-external-table#optional-parameters
|
1997
|
-
class TransformColumnConstraint(ColumnConstraintKind):
|
1998
|
-
pass
|
1999
|
-
|
2000
|
-
|
2001
1997
|
class PrimaryKeyColumnConstraint(ColumnConstraintKind):
|
2002
1998
|
arg_types = {"desc": False, "options": False}
|
2003
1999
|
|
@@ -4757,6 +4753,8 @@ class DataType(Expression):
|
|
4757
4753
|
if udt:
|
4758
4754
|
return DataType(this=DataType.Type.USERDEFINED, kind=dtype, **kwargs)
|
4759
4755
|
raise
|
4756
|
+
elif isinstance(dtype, (Identifier, Dot)) and udt:
|
4757
|
+
return DataType(this=DataType.Type.USERDEFINED, kind=dtype, **kwargs)
|
4760
4758
|
elif isinstance(dtype, DataType.Type):
|
4761
4759
|
data_type_exp = DataType(this=dtype)
|
4762
4760
|
elif isinstance(dtype, DataType):
|
@@ -4798,9 +4796,6 @@ class DataType(Expression):
|
|
4798
4796
|
return False
|
4799
4797
|
|
4800
4798
|
|
4801
|
-
DATA_TYPE = t.Union[str, DataType, DataType.Type]
|
4802
|
-
|
4803
|
-
|
4804
4799
|
# https://www.postgresql.org/docs/15/datatype-pseudo.html
|
4805
4800
|
class PseudoType(DataType):
|
4806
4801
|
arg_types = {"this": True}
|
@@ -4930,6 +4925,10 @@ class AddConstraint(Expression):
|
|
4930
4925
|
arg_types = {"expressions": True}
|
4931
4926
|
|
4932
4927
|
|
4928
|
+
class AddPartition(Expression):
|
4929
|
+
arg_types = {"this": True, "exists": False}
|
4930
|
+
|
4931
|
+
|
4933
4932
|
class AttachOption(Expression):
|
4934
4933
|
arg_types = {"this": True, "expression": False}
|
4935
4934
|
|
@@ -5030,6 +5029,9 @@ class Dot(Binary):
|
|
5030
5029
|
return parts
|
5031
5030
|
|
5032
5031
|
|
5032
|
+
DATA_TYPE = t.Union[str, Identifier, Dot, DataType, DataType.Type]
|
5033
|
+
|
5034
|
+
|
5033
5035
|
class DPipe(Binary):
|
5034
5036
|
arg_types = {"this": True, "expression": True, "safe": False}
|
5035
5037
|
|
@@ -5570,13 +5572,28 @@ class ArrayToString(Func):
|
|
5570
5572
|
_sql_names = ["ARRAY_TO_STRING", "ARRAY_JOIN"]
|
5571
5573
|
|
5572
5574
|
|
5575
|
+
class ArrayIntersect(Func):
|
5576
|
+
arg_types = {"expressions": True}
|
5577
|
+
is_var_len_args = True
|
5578
|
+
_sql_names = ["ARRAY_INTERSECT", "ARRAY_INTERSECTION"]
|
5579
|
+
|
5580
|
+
|
5581
|
+
class StPoint(Func):
|
5582
|
+
arg_types = {"this": True, "expression": True, "null": False}
|
5583
|
+
_sql_names = ["ST_POINT", "ST_MAKEPOINT"]
|
5584
|
+
|
5585
|
+
|
5586
|
+
class StDistance(Func):
|
5587
|
+
arg_types = {"this": True, "expression": True, "use_spheroid": False}
|
5588
|
+
|
5589
|
+
|
5573
5590
|
# https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions#string
|
5574
5591
|
class String(Func):
|
5575
5592
|
arg_types = {"this": True, "zone": False}
|
5576
5593
|
|
5577
5594
|
|
5578
5595
|
class StringToArray(Func):
|
5579
|
-
arg_types = {"this": True, "expression":
|
5596
|
+
arg_types = {"this": True, "expression": False, "null": False}
|
5580
5597
|
_sql_names = ["STRING_TO_ARRAY", "SPLIT_BY_STRING", "STRTOK_TO_ARRAY"]
|
5581
5598
|
|
5582
5599
|
|
@@ -6706,6 +6723,11 @@ class StartsWith(Func):
|
|
6706
6723
|
arg_types = {"this": True, "expression": True}
|
6707
6724
|
|
6708
6725
|
|
6726
|
+
class EndsWith(Func):
|
6727
|
+
_sql_names = ["ENDS_WITH", "ENDSWITH"]
|
6728
|
+
arg_types = {"this": True, "expression": True}
|
6729
|
+
|
6730
|
+
|
6709
6731
|
class StrPosition(Func):
|
6710
6732
|
arg_types = {
|
6711
6733
|
"this": True,
|
@@ -7366,7 +7388,7 @@ def _apply_set_operation(
|
|
7366
7388
|
**opts,
|
7367
7389
|
) -> S:
|
7368
7390
|
return reduce(
|
7369
|
-
lambda x, y: set_operation(this=x, expression=y, distinct=distinct),
|
7391
|
+
lambda x, y: set_operation(this=x, expression=y, distinct=distinct, **opts),
|
7370
7392
|
(maybe_parse(e, dialect=dialect, copy=copy, **opts) for e in expressions),
|
7371
7393
|
)
|
7372
7394
|
|
@@ -7962,7 +7984,15 @@ def to_table(
|
|
7962
7984
|
if isinstance(sql_path, Table):
|
7963
7985
|
return maybe_copy(sql_path, copy=copy)
|
7964
7986
|
|
7965
|
-
|
7987
|
+
try:
|
7988
|
+
table = maybe_parse(sql_path, into=Table, dialect=dialect)
|
7989
|
+
except ParseError:
|
7990
|
+
catalog, db, this = split_num_words(sql_path, ".", 3)
|
7991
|
+
|
7992
|
+
if not this:
|
7993
|
+
raise
|
7994
|
+
|
7995
|
+
table = table_(this, db=db, catalog=catalog)
|
7966
7996
|
|
7967
7997
|
for k, v in kwargs.items():
|
7968
7998
|
table.set(k, v)
|
@@ -8110,7 +8140,7 @@ def column(
|
|
8110
8140
|
|
8111
8141
|
@t.overload
|
8112
8142
|
def column(
|
8113
|
-
col: str | Identifier,
|
8143
|
+
col: str | Identifier | Star,
|
8114
8144
|
table: t.Optional[str | Identifier] = None,
|
8115
8145
|
db: t.Optional[str | Identifier] = None,
|
8116
8146
|
catalog: t.Optional[str | Identifier] = None,
|
@@ -8147,8 +8177,11 @@ def column(
|
|
8147
8177
|
Returns:
|
8148
8178
|
The new Column instance.
|
8149
8179
|
"""
|
8180
|
+
if not isinstance(col, Star):
|
8181
|
+
col = to_identifier(col, quoted=quoted, copy=copy)
|
8182
|
+
|
8150
8183
|
this = Column(
|
8151
|
-
this=
|
8184
|
+
this=col,
|
8152
8185
|
table=to_identifier(table, quoted=quoted, copy=copy),
|
8153
8186
|
db=to_identifier(db, quoted=quoted, copy=copy),
|
8154
8187
|
catalog=to_identifier(catalog, quoted=quoted, copy=copy),
|
sqlglot/generator.py
CHANGED
@@ -1018,6 +1018,7 @@ class Generator(metaclass=_Generator):
|
|
1018
1018
|
persisted = " PERSISTED"
|
1019
1019
|
else:
|
1020
1020
|
persisted = ""
|
1021
|
+
|
1021
1022
|
return f"AS {this}{persisted}"
|
1022
1023
|
|
1023
1024
|
def autoincrementcolumnconstraint_sql(self, _) -> str:
|
@@ -1079,9 +1080,6 @@ class Generator(metaclass=_Generator):
|
|
1079
1080
|
def notnullcolumnconstraint_sql(self, expression: exp.NotNullColumnConstraint) -> str:
|
1080
1081
|
return f"{'' if expression.args.get('allow_null') else 'NOT '}NULL"
|
1081
1082
|
|
1082
|
-
def transformcolumnconstraint_sql(self, expression: exp.TransformColumnConstraint) -> str:
|
1083
|
-
return f"AS {self.sql(expression, 'this')}"
|
1084
|
-
|
1085
1083
|
def primarykeycolumnconstraint_sql(self, expression: exp.PrimaryKeyColumnConstraint) -> str:
|
1086
1084
|
desc = expression.args.get("desc")
|
1087
1085
|
if desc is not None:
|
@@ -3500,6 +3498,10 @@ class Generator(metaclass=_Generator):
|
|
3500
3498
|
def addconstraint_sql(self, expression: exp.AddConstraint) -> str:
|
3501
3499
|
return f"ADD {self.expressions(expression)}"
|
3502
3500
|
|
3501
|
+
def addpartition_sql(self, expression: exp.AddPartition) -> str:
|
3502
|
+
exists = "IF NOT EXISTS " if expression.args.get("exists") else ""
|
3503
|
+
return f"ADD {exists}{self.sql(expression.this)}"
|
3504
|
+
|
3503
3505
|
def distinct_sql(self, expression: exp.Distinct) -> str:
|
3504
3506
|
this = self.expressions(expression, flat=True)
|
3505
3507
|
|
sqlglot/parser.py
CHANGED
@@ -933,9 +933,12 @@ class Parser(metaclass=_Parser):
|
|
933
933
|
PIPE_SYNTAX_TRANSFORM_PARSERS = {
|
934
934
|
"SELECT": lambda self, query: self._parse_pipe_syntax_select(query),
|
935
935
|
"WHERE": lambda self, query: self._parse_pipe_syntax_where(query),
|
936
|
-
"ORDER BY": lambda self, query: query.order_by(
|
936
|
+
"ORDER BY": lambda self, query: query.order_by(
|
937
|
+
self._parse_order(), append=False, copy=False
|
938
|
+
),
|
937
939
|
"LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query),
|
938
940
|
"OFFSET": lambda self, query: query.offset(self._parse_offset(), copy=False),
|
941
|
+
"AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query),
|
939
942
|
}
|
940
943
|
|
941
944
|
PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
|
@@ -1124,25 +1127,6 @@ class Parser(metaclass=_Parser):
|
|
1124
1127
|
"TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(),
|
1125
1128
|
}
|
1126
1129
|
|
1127
|
-
def _parse_pipe_syntax_select(self, query: exp.Query) -> exp.Query:
|
1128
|
-
select = self._parse_select()
|
1129
|
-
if isinstance(select, exp.Select):
|
1130
|
-
return select.from_(query.subquery(copy=False), copy=False)
|
1131
|
-
return query
|
1132
|
-
|
1133
|
-
def _parse_pipe_syntax_where(self, query: exp.Query) -> exp.Query:
|
1134
|
-
where = self._parse_where()
|
1135
|
-
return query.where(where, copy=False)
|
1136
|
-
|
1137
|
-
def _parse_pipe_syntax_limit(self, query: exp.Query) -> exp.Query:
|
1138
|
-
limit = self._parse_limit()
|
1139
|
-
offset = self._parse_offset()
|
1140
|
-
if limit:
|
1141
|
-
query.limit(limit, copy=False)
|
1142
|
-
if offset:
|
1143
|
-
query.offset(offset, copy=False)
|
1144
|
-
return query
|
1145
|
-
|
1146
1130
|
def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression:
|
1147
1131
|
klass = (
|
1148
1132
|
exp.PartitionedByBucket
|
@@ -1523,6 +1507,9 @@ class Parser(metaclass=_Parser):
|
|
1523
1507
|
# Whether the 'AS' keyword is optional in the CTE definition syntax
|
1524
1508
|
OPTIONAL_ALIAS_TOKEN_CTE = True
|
1525
1509
|
|
1510
|
+
# Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword
|
1511
|
+
ALTER_RENAME_REQUIRES_COLUMN = True
|
1512
|
+
|
1526
1513
|
__slots__ = (
|
1527
1514
|
"error_level",
|
1528
1515
|
"error_message_context",
|
@@ -1536,6 +1523,7 @@ class Parser(metaclass=_Parser):
|
|
1536
1523
|
"_next",
|
1537
1524
|
"_prev",
|
1538
1525
|
"_prev_comments",
|
1526
|
+
"_pipe_cte_counter",
|
1539
1527
|
)
|
1540
1528
|
|
1541
1529
|
# Autofilled
|
@@ -1566,6 +1554,7 @@ class Parser(metaclass=_Parser):
|
|
1566
1554
|
self._next = None
|
1567
1555
|
self._prev = None
|
1568
1556
|
self._prev_comments = None
|
1557
|
+
self._pipe_cte_counter = 0
|
1569
1558
|
|
1570
1559
|
def parse(
|
1571
1560
|
self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
|
@@ -3262,9 +3251,11 @@ class Parser(metaclass=_Parser):
|
|
3262
3251
|
elif self._match(TokenType.VALUES, advance=False):
|
3263
3252
|
this = self._parse_derived_table_values()
|
3264
3253
|
elif from_:
|
3265
|
-
this = exp.select("*").from_(from_.this, copy=False)
|
3266
3254
|
if self._match(TokenType.PIPE_GT, advance=False):
|
3267
|
-
return self._parse_pipe_syntax_query(
|
3255
|
+
return self._parse_pipe_syntax_query(
|
3256
|
+
exp.Select().from_(from_.this, append=False, copy=False)
|
3257
|
+
)
|
3258
|
+
this = exp.select("*").from_(from_.this, copy=False)
|
3268
3259
|
elif self._match(TokenType.SUMMARIZE):
|
3269
3260
|
table = self._match(TokenType.TABLE)
|
3270
3261
|
this = self._parse_select() or self._parse_string() or self._parse_table()
|
@@ -5130,6 +5121,14 @@ class Parser(metaclass=_Parser):
|
|
5130
5121
|
exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True)
|
5131
5122
|
)
|
5132
5123
|
|
5124
|
+
def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]:
|
5125
|
+
type_name = identifier.name
|
5126
|
+
|
5127
|
+
while self._match(TokenType.DOT):
|
5128
|
+
type_name = f"{type_name}.{self._advance_any() and self._prev.text}"
|
5129
|
+
|
5130
|
+
return exp.DataType.build(type_name, udt=True)
|
5131
|
+
|
5133
5132
|
def _parse_types(
|
5134
5133
|
self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
|
5135
5134
|
) -> t.Optional[exp.Expression]:
|
@@ -5151,12 +5150,7 @@ class Parser(metaclass=_Parser):
|
|
5151
5150
|
if tokens[0].token_type in self.TYPE_TOKENS:
|
5152
5151
|
self._prev = tokens[0]
|
5153
5152
|
elif self.dialect.SUPPORTS_USER_DEFINED_TYPES:
|
5154
|
-
|
5155
|
-
|
5156
|
-
while self._match(TokenType.DOT):
|
5157
|
-
type_name = f"{type_name}.{self._advance_any() and self._prev.text}"
|
5158
|
-
|
5159
|
-
this = exp.DataType.build(type_name, udt=True)
|
5153
|
+
this = self._parse_user_defined_type(identifier)
|
5160
5154
|
else:
|
5161
5155
|
self._retreat(self._index - 1)
|
5162
5156
|
return None
|
@@ -5514,18 +5508,12 @@ class Parser(metaclass=_Parser):
|
|
5514
5508
|
else:
|
5515
5509
|
field = self._parse_field(any_token=True, anonymous_func=True)
|
5516
5510
|
|
5511
|
+
# Function calls can be qualified, e.g., x.y.FOO()
|
5512
|
+
# This converts the final AST to a series of Dots leading to the function call
|
5513
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
|
5517
5514
|
if isinstance(field, (exp.Func, exp.Window)) and this:
|
5518
|
-
|
5519
|
-
|
5520
|
-
this = exp.replace_tree(
|
5521
|
-
this,
|
5522
|
-
lambda n: (
|
5523
|
-
self.expression(exp.Dot, this=n.args.get("table"), expression=n.this)
|
5524
|
-
if n.table
|
5525
|
-
else n.this
|
5526
|
-
)
|
5527
|
-
if isinstance(n, exp.Column)
|
5528
|
-
else n,
|
5515
|
+
this = this.transform(
|
5516
|
+
lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n
|
5529
5517
|
)
|
5530
5518
|
|
5531
5519
|
if op:
|
@@ -5900,7 +5888,11 @@ class Parser(metaclass=_Parser):
|
|
5900
5888
|
constraints.append(
|
5901
5889
|
self.expression(
|
5902
5890
|
exp.ColumnConstraint,
|
5903
|
-
kind=exp.
|
5891
|
+
kind=exp.ComputedColumnConstraint(
|
5892
|
+
this=self._parse_disjunction(),
|
5893
|
+
persisted=self._match_texts(("STORED", "VIRTUAL"))
|
5894
|
+
and self._prev.text.upper() == "STORED",
|
5895
|
+
),
|
5904
5896
|
)
|
5905
5897
|
)
|
5906
5898
|
|
@@ -7163,16 +7155,6 @@ class Parser(metaclass=_Parser):
|
|
7163
7155
|
|
7164
7156
|
return this
|
7165
7157
|
|
7166
|
-
def _parse_pipe_syntax_query(self, query: exp.Select) -> exp.Query:
|
7167
|
-
while self._match(TokenType.PIPE_GT):
|
7168
|
-
parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper())
|
7169
|
-
if not parser:
|
7170
|
-
self.raise_error(f"Unsupported pipe syntax operator: '{self._curr.text.upper()}'.")
|
7171
|
-
else:
|
7172
|
-
query = parser(self, query)
|
7173
|
-
|
7174
|
-
return query
|
7175
|
-
|
7176
7158
|
def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]:
|
7177
7159
|
return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
|
7178
7160
|
|
@@ -7251,24 +7233,29 @@ class Parser(metaclass=_Parser):
|
|
7251
7233
|
self._match(TokenType.TABLE)
|
7252
7234
|
return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table())
|
7253
7235
|
|
7254
|
-
def _parse_add_column(self) -> t.Optional[exp.
|
7236
|
+
def _parse_add_column(self) -> t.Optional[exp.ColumnDef]:
|
7255
7237
|
if not self._prev.text.upper() == "ADD":
|
7256
7238
|
return None
|
7257
7239
|
|
7240
|
+
start = self._index
|
7258
7241
|
self._match(TokenType.COLUMN)
|
7242
|
+
|
7259
7243
|
exists_column = self._parse_exists(not_=True)
|
7260
7244
|
expression = self._parse_field_def()
|
7261
7245
|
|
7262
|
-
if expression:
|
7263
|
-
|
7246
|
+
if not isinstance(expression, exp.ColumnDef):
|
7247
|
+
self._retreat(start)
|
7248
|
+
return None
|
7264
7249
|
|
7265
|
-
|
7266
|
-
|
7267
|
-
|
7268
|
-
|
7269
|
-
|
7270
|
-
|
7271
|
-
|
7250
|
+
expression.set("exists", exists_column)
|
7251
|
+
|
7252
|
+
# https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
|
7253
|
+
if self._match_texts(("FIRST", "AFTER")):
|
7254
|
+
position = self._prev.text
|
7255
|
+
column_position = self.expression(
|
7256
|
+
exp.ColumnPosition, this=self._parse_column(), position=position
|
7257
|
+
)
|
7258
|
+
expression.set("position", column_position)
|
7272
7259
|
|
7273
7260
|
return expression
|
7274
7261
|
|
@@ -7285,13 +7272,24 @@ class Parser(metaclass=_Parser):
|
|
7285
7272
|
)
|
7286
7273
|
|
7287
7274
|
def _parse_alter_table_add(self) -> t.List[exp.Expression]:
|
7288
|
-
def
|
7275
|
+
def _parse_add_alteration() -> t.Optional[exp.Expression]:
|
7289
7276
|
self._match_text_seq("ADD")
|
7290
7277
|
if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False):
|
7291
7278
|
return self.expression(
|
7292
7279
|
exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint)
|
7293
7280
|
)
|
7294
|
-
|
7281
|
+
|
7282
|
+
column_def = self._parse_add_column()
|
7283
|
+
if isinstance(column_def, exp.ColumnDef):
|
7284
|
+
return column_def
|
7285
|
+
|
7286
|
+
exists = self._parse_exists(not_=True)
|
7287
|
+
if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False):
|
7288
|
+
return self.expression(
|
7289
|
+
exp.AddPartition, exists=exists, this=self._parse_field(any_token=True)
|
7290
|
+
)
|
7291
|
+
|
7292
|
+
return None
|
7295
7293
|
|
7296
7294
|
if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq(
|
7297
7295
|
"COLUMNS"
|
@@ -7300,7 +7298,7 @@ class Parser(metaclass=_Parser):
|
|
7300
7298
|
|
7301
7299
|
return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def)
|
7302
7300
|
|
7303
|
-
return self._parse_csv(
|
7301
|
+
return self._parse_csv(_parse_add_alteration)
|
7304
7302
|
|
7305
7303
|
def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]:
|
7306
7304
|
if self._match_texts(self.ALTER_ALTER_PARSERS):
|
@@ -7378,7 +7376,7 @@ class Parser(metaclass=_Parser):
|
|
7378
7376
|
return self._parse_csv(self._parse_drop_column)
|
7379
7377
|
|
7380
7378
|
def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]:
|
7381
|
-
if self._match(TokenType.COLUMN):
|
7379
|
+
if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN:
|
7382
7380
|
exists = self._parse_exists()
|
7383
7381
|
old_column = self._parse_column()
|
7384
7382
|
to = self._match_text_seq("TO")
|
@@ -8328,3 +8326,146 @@ class Parser(metaclass=_Parser):
|
|
8328
8326
|
expression = self.expression(exp.Identifier, this=token.text, **kwargs)
|
8329
8327
|
expression.update_positions(token)
|
8330
8328
|
return expression
|
8329
|
+
|
8330
|
+
def _build_pipe_cte(self, query: exp.Query, expressions: t.List[exp.Expression]) -> exp.Query:
|
8331
|
+
if query.selects:
|
8332
|
+
self._pipe_cte_counter += 1
|
8333
|
+
new_cte = f"__tmp{self._pipe_cte_counter}"
|
8334
|
+
|
8335
|
+
# For `exp.Select`, generated CTEs are attached to its `with`
|
8336
|
+
# For `exp.SetOperation`, generated CTEs are attached to the `with` of its LHS, accessed via `this`
|
8337
|
+
with_ = (
|
8338
|
+
query.args.get("with")
|
8339
|
+
if isinstance(query, exp.Select)
|
8340
|
+
else query.this.args.get("with")
|
8341
|
+
)
|
8342
|
+
ctes = with_.pop() if with_ else None
|
8343
|
+
|
8344
|
+
new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False)
|
8345
|
+
if ctes:
|
8346
|
+
new_select.set("with", ctes)
|
8347
|
+
|
8348
|
+
return new_select.with_(new_cte, as_=query, copy=False)
|
8349
|
+
|
8350
|
+
return query.select(*expressions, copy=False)
|
8351
|
+
|
8352
|
+
def _parse_pipe_syntax_select(self, query: exp.Query) -> exp.Query:
|
8353
|
+
select = self._parse_select()
|
8354
|
+
if isinstance(select, exp.Select):
|
8355
|
+
return self._build_pipe_cte(query, select.expressions)
|
8356
|
+
|
8357
|
+
return query
|
8358
|
+
|
8359
|
+
def _parse_pipe_syntax_where(self, query: exp.Query) -> exp.Query:
|
8360
|
+
where = self._parse_where()
|
8361
|
+
return query.where(where, copy=False)
|
8362
|
+
|
8363
|
+
def _parse_pipe_syntax_limit(self, query: exp.Query) -> exp.Query:
|
8364
|
+
limit = self._parse_limit()
|
8365
|
+
offset = self._parse_offset()
|
8366
|
+
if limit:
|
8367
|
+
curr_limit = query.args.get("limit", limit)
|
8368
|
+
if curr_limit.expression.to_py() >= limit.expression.to_py():
|
8369
|
+
query.limit(limit, copy=False)
|
8370
|
+
if offset:
|
8371
|
+
curr_offset = query.args.get("offset")
|
8372
|
+
curr_offset = curr_offset.expression.to_py() if curr_offset else 0
|
8373
|
+
query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False)
|
8374
|
+
return query
|
8375
|
+
|
8376
|
+
def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]:
|
8377
|
+
this = self._parse_assignment()
|
8378
|
+
if self._match_text_seq("GROUP", "AND", advance=False):
|
8379
|
+
return this
|
8380
|
+
|
8381
|
+
this = self._parse_alias(this)
|
8382
|
+
|
8383
|
+
if self._match_set((TokenType.ASC, TokenType.DESC), advance=False):
|
8384
|
+
return self._parse_ordered(lambda: this)
|
8385
|
+
|
8386
|
+
return this
|
8387
|
+
|
8388
|
+
def _parse_pipe_syntax_aggregate_group_order_by(
|
8389
|
+
self, query: exp.Query, group_by_exists: bool = True
|
8390
|
+
) -> exp.Query:
|
8391
|
+
expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields)
|
8392
|
+
aggregates_or_groups, orders = [], []
|
8393
|
+
for element in expr:
|
8394
|
+
if isinstance(element, exp.Ordered):
|
8395
|
+
this = element.this
|
8396
|
+
if isinstance(this, exp.Alias):
|
8397
|
+
element.set("this", this.args["alias"])
|
8398
|
+
orders.append(element)
|
8399
|
+
else:
|
8400
|
+
this = element
|
8401
|
+
aggregates_or_groups.append(this)
|
8402
|
+
|
8403
|
+
if group_by_exists and isinstance(query, exp.Select):
|
8404
|
+
query = query.select(*aggregates_or_groups, copy=False).group_by(
|
8405
|
+
*[projection.args.get("alias", projection) for projection in aggregates_or_groups],
|
8406
|
+
copy=False,
|
8407
|
+
)
|
8408
|
+
else:
|
8409
|
+
query = query.select(*aggregates_or_groups, append=False, copy=False)
|
8410
|
+
|
8411
|
+
if orders:
|
8412
|
+
return query.order_by(*orders, append=False, copy=False)
|
8413
|
+
|
8414
|
+
return query
|
8415
|
+
|
8416
|
+
def _parse_pipe_syntax_aggregate(self, query: exp.Query) -> exp.Query:
|
8417
|
+
self._match_text_seq("AGGREGATE")
|
8418
|
+
query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False)
|
8419
|
+
|
8420
|
+
if self._match(TokenType.GROUP_BY) or (
|
8421
|
+
self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY)
|
8422
|
+
):
|
8423
|
+
return self._parse_pipe_syntax_aggregate_group_order_by(query)
|
8424
|
+
|
8425
|
+
return query
|
8426
|
+
|
8427
|
+
def _parse_pipe_syntax_set_operator(
|
8428
|
+
self, query: t.Optional[exp.Query]
|
8429
|
+
) -> t.Optional[exp.Query]:
|
8430
|
+
first_setop = self.parse_set_operation(this=query)
|
8431
|
+
|
8432
|
+
if not first_setop or not query:
|
8433
|
+
return None
|
8434
|
+
|
8435
|
+
if not query.selects:
|
8436
|
+
query.select("*", copy=False)
|
8437
|
+
|
8438
|
+
this = first_setop.this.pop()
|
8439
|
+
distinct = first_setop.args.pop("distinct")
|
8440
|
+
setops = [first_setop.expression.pop(), *self._parse_expressions()]
|
8441
|
+
|
8442
|
+
if isinstance(first_setop, exp.Union):
|
8443
|
+
query = query.union(*setops, distinct=distinct, copy=False, **first_setop.args)
|
8444
|
+
elif isinstance(first_setop, exp.Except):
|
8445
|
+
query = query.except_(*setops, distinct=distinct, copy=False, **first_setop.args)
|
8446
|
+
else:
|
8447
|
+
query = query.intersect(*setops, distinct=distinct, copy=False, **first_setop.args)
|
8448
|
+
|
8449
|
+
return self._build_pipe_cte(
|
8450
|
+
query, [projection.args.get("alias", projection) for projection in this.expressions]
|
8451
|
+
)
|
8452
|
+
|
8453
|
+
def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]:
|
8454
|
+
while self._match(TokenType.PIPE_GT):
|
8455
|
+
start = self._curr
|
8456
|
+
parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper())
|
8457
|
+
if not parser:
|
8458
|
+
set_op_query = self._parse_pipe_syntax_set_operator(query)
|
8459
|
+
if not set_op_query:
|
8460
|
+
self._retreat(start)
|
8461
|
+
self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.")
|
8462
|
+
break
|
8463
|
+
|
8464
|
+
query = set_op_query
|
8465
|
+
else:
|
8466
|
+
query = parser(self, query)
|
8467
|
+
|
8468
|
+
if query and not query.selects:
|
8469
|
+
return query.select("*", copy=False)
|
8470
|
+
|
8471
|
+
return query
|
sqlglot/transforms.py
CHANGED
@@ -842,113 +842,122 @@ def struct_kv_to_alias(expression: exp.Expression) -> exp.Expression:
|
|
842
842
|
|
843
843
|
|
844
844
|
def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
|
845
|
-
"""
|
846
|
-
Remove join marks from an AST. This rule assumes that all marked columns are qualified.
|
847
|
-
If this does not hold for a query, consider running `sqlglot.optimizer.qualify` first.
|
845
|
+
"""https://docs.oracle.com/cd/B19306_01/server.102/b14200/queries006.htm#sthref3178
|
848
846
|
|
849
|
-
|
850
|
-
SELECT * FROM a, b WHERE a.id = b.id(+) -- ... is converted to
|
851
|
-
SELECT * FROM a LEFT JOIN b ON a.id = b.id -- this
|
847
|
+
1. You cannot specify the (+) operator in a query block that also contains FROM clause join syntax.
|
852
848
|
|
853
|
-
|
854
|
-
expression: The AST to remove join marks from.
|
849
|
+
2. The (+) operator can appear only in the WHERE clause or, in the context of left-correlation (that is, when specifying the TABLE clause) in the FROM clause, and can be applied only to a column of a table or view.
|
855
850
|
|
856
|
-
|
857
|
-
|
851
|
+
The (+) operator does not produce an outer join if you specify one table in the outer query and the other table in an inner query.
|
852
|
+
|
853
|
+
You cannot use the (+) operator to outer-join a table to itself, although self joins are valid.
|
854
|
+
|
855
|
+
The (+) operator can be applied only to a column, not to an arbitrary expression. However, an arbitrary expression can contain one or more columns marked with the (+) operator.
|
856
|
+
|
857
|
+
A WHERE condition containing the (+) operator cannot be combined with another condition using the OR logical operator.
|
858
|
+
|
859
|
+
A WHERE condition cannot use the IN comparison condition to compare a column marked with the (+) operator with an expression.
|
860
|
+
|
861
|
+
A WHERE condition cannot compare any column marked with the (+) operator with a subquery.
|
862
|
+
|
863
|
+
-- example with WHERE
|
864
|
+
SELECT d.department_name, sum(e.salary) as total_salary
|
865
|
+
FROM departments d, employees e
|
866
|
+
WHERE e.department_id(+) = d.department_id
|
867
|
+
group by department_name
|
868
|
+
|
869
|
+
-- example of left correlation in select
|
870
|
+
SELECT d.department_name, (
|
871
|
+
SELECT SUM(e.salary)
|
872
|
+
FROM employees e
|
873
|
+
WHERE e.department_id(+) = d.department_id) AS total_salary
|
874
|
+
FROM departments d;
|
875
|
+
|
876
|
+
-- example of left correlation in from
|
877
|
+
SELECT d.department_name, t.total_salary
|
878
|
+
FROM departments d, (
|
879
|
+
SELECT SUM(e.salary) AS total_salary
|
880
|
+
FROM employees e
|
881
|
+
WHERE e.department_id(+) = d.department_id
|
882
|
+
) t
|
858
883
|
"""
|
884
|
+
|
859
885
|
from sqlglot.optimizer.scope import traverse_scope
|
886
|
+
from sqlglot.optimizer.normalize import normalize, normalized
|
887
|
+
from collections import defaultdict
|
860
888
|
|
861
|
-
|
889
|
+
# we go in reverse to check the main query for left correlation
|
890
|
+
for scope in reversed(traverse_scope(expression)):
|
862
891
|
query = scope.expression
|
863
892
|
|
864
893
|
where = query.args.get("where")
|
865
|
-
joins = query.args.get("joins")
|
894
|
+
joins = query.args.get("joins", [])
|
866
895
|
|
867
|
-
|
896
|
+
# knockout: we do not support left correlation (see point 2)
|
897
|
+
assert not scope.is_correlated_subquery, "Correlated queries are not supported"
|
898
|
+
|
899
|
+
# nothing to do - we check it here after knockout above
|
900
|
+
if not where or not any(c.args.get("join_mark") for c in where.find_all(exp.Column)):
|
868
901
|
continue
|
869
902
|
|
870
|
-
|
903
|
+
# make sure we have AND of ORs to have clear join terms
|
904
|
+
where = normalize(where.this)
|
905
|
+
assert normalized(where), "Cannot normalize JOIN predicates"
|
871
906
|
|
872
|
-
|
873
|
-
|
874
|
-
|
907
|
+
joins_ons = defaultdict(list) # dict of {name: list of join AND conditions}
|
908
|
+
for cond in [where] if not isinstance(where, exp.And) else where.flatten():
|
909
|
+
join_cols = [col for col in cond.find_all(exp.Column) if col.args.get("join_mark")]
|
875
910
|
|
876
|
-
|
877
|
-
if not
|
911
|
+
left_join_table = set(col.table for col in join_cols)
|
912
|
+
if not left_join_table:
|
878
913
|
continue
|
879
914
|
|
880
|
-
predicate = column.find_ancestor(exp.Predicate, exp.Select)
|
881
|
-
assert isinstance(
|
882
|
-
predicate, exp.Binary
|
883
|
-
), "Columns can only be marked with (+) when involved in a binary operation"
|
884
|
-
|
885
|
-
predicate_parent = predicate.parent
|
886
|
-
join_predicate = predicate.pop()
|
887
|
-
|
888
|
-
left_columns = [
|
889
|
-
c for c in join_predicate.left.find_all(exp.Column) if c.args.get("join_mark")
|
890
|
-
]
|
891
|
-
right_columns = [
|
892
|
-
c for c in join_predicate.right.find_all(exp.Column) if c.args.get("join_mark")
|
893
|
-
]
|
894
|
-
|
895
915
|
assert not (
|
896
|
-
|
897
|
-
), "
|
898
|
-
|
899
|
-
marked_column_tables = set()
|
900
|
-
for col in left_columns or right_columns:
|
901
|
-
table = col.table
|
902
|
-
assert table, f"Column {col} needs to be qualified with a table"
|
916
|
+
len(left_join_table) > 1
|
917
|
+
), "Cannot combine JOIN predicates from different tables"
|
903
918
|
|
919
|
+
for col in join_cols:
|
904
920
|
col.set("join_mark", False)
|
905
|
-
marked_column_tables.add(table)
|
906
921
|
|
907
|
-
|
908
|
-
len(marked_column_tables) == 1
|
909
|
-
), "Columns of only a single table can be marked with (+) in a given binary predicate"
|
910
|
-
|
911
|
-
# Add predicate if join already copied, or add join if it is new
|
912
|
-
join_this = old_joins.get(col.table, query_from).this
|
913
|
-
existing_join = new_joins.get(join_this.alias_or_name)
|
914
|
-
if existing_join:
|
915
|
-
existing_join.set("on", exp.and_(existing_join.args["on"], join_predicate))
|
916
|
-
else:
|
917
|
-
new_joins[join_this.alias_or_name] = exp.Join(
|
918
|
-
this=join_this.copy(), on=join_predicate.copy(), kind="LEFT"
|
919
|
-
)
|
922
|
+
joins_ons[left_join_table.pop()].append(cond)
|
920
923
|
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
924
|
+
old_joins = {join.alias_or_name: join for join in joins}
|
925
|
+
new_joins = {}
|
926
|
+
query_from = query.args["from"]
|
927
|
+
|
928
|
+
for table, predicates in joins_ons.items():
|
929
|
+
join_what = old_joins.get(table, query_from).this.copy()
|
930
|
+
new_joins[join_what.alias_or_name] = exp.Join(
|
931
|
+
this=join_what, on=exp.and_(*predicates), kind="LEFT"
|
932
|
+
)
|
927
933
|
|
928
|
-
|
934
|
+
for p in predicates:
|
935
|
+
while isinstance(p.parent, exp.Paren):
|
936
|
+
p.parent.replace(p)
|
937
|
+
|
938
|
+
parent = p.parent
|
939
|
+
p.pop()
|
940
|
+
if isinstance(parent, exp.Binary):
|
941
|
+
parent.replace(parent.right if parent.left is None else parent.left)
|
942
|
+
elif isinstance(parent, exp.Where):
|
943
|
+
parent.pop()
|
929
944
|
|
930
945
|
if query_from.alias_or_name in new_joins:
|
946
|
+
only_old_joins = old_joins.keys() - new_joins.keys()
|
931
947
|
assert (
|
932
|
-
len(
|
948
|
+
len(only_old_joins) >= 1
|
933
949
|
), "Cannot determine which table to use in the new FROM clause"
|
934
950
|
|
935
|
-
new_from_name = list(
|
936
|
-
query.set("from", exp.From(this=old_joins
|
937
|
-
only_old_join_sources.remove(new_from_name)
|
951
|
+
new_from_name = list(only_old_joins)[0]
|
952
|
+
query.set("from", exp.From(this=old_joins[new_from_name].this))
|
938
953
|
|
939
954
|
if new_joins:
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
only_old_join_expressions.append(old_join_expression)
|
947
|
-
|
948
|
-
query.set("joins", list(new_joins.values()) + only_old_join_expressions)
|
949
|
-
|
950
|
-
if not where.this:
|
951
|
-
where.pop()
|
955
|
+
for n, j in old_joins.items(): # preserve any other joins
|
956
|
+
if n not in new_joins and n != query.args["from"].name:
|
957
|
+
if not j.kind:
|
958
|
+
j.set("kind", "CROSS")
|
959
|
+
new_joins[n] = j
|
960
|
+
query.set("joins", list(new_joins.values()))
|
952
961
|
|
953
962
|
return expression
|
954
963
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: sqlglot
|
3
|
-
Version: 26.
|
3
|
+
Version: 26.27.1
|
4
4
|
Summary: An easily customizable SQL parser and transpiler
|
5
5
|
Author-email: Toby Mao <toby.mao@gmail.com>
|
6
6
|
License: MIT License
|
@@ -558,6 +558,7 @@ See also: [Writing a Python SQL engine from scratch](https://github.com/tobymao/
|
|
558
558
|
* [Dagster](https://github.com/dagster-io/dagster)
|
559
559
|
* [Fugue](https://github.com/fugue-project/fugue)
|
560
560
|
* [Ibis](https://github.com/ibis-project/ibis)
|
561
|
+
* [dlt](https://github.com/dlt-hub/dlt)
|
561
562
|
* [mysql-mimic](https://github.com/kelsin/mysql-mimic)
|
562
563
|
* [Querybook](https://github.com/pinterest/querybook)
|
563
564
|
* [Quokka](https://github.com/marsupialtail/quokka)
|
@@ -1,27 +1,27 @@
|
|
1
1
|
sqlglot/__init__.py,sha256=za08rtdPh2v7dOpGdNomttlIVGgTrKja7rPd6sQwaTg,5391
|
2
2
|
sqlglot/__main__.py,sha256=022c173KqxsiABWTEpUIq_tJUxuNiW7a7ABsxBXqvu8,2069
|
3
3
|
sqlglot/_typing.py,sha256=-1HPyr3w5COlSJWqlgt8jhFk2dyMvBuvVBqIX1wyVCM,642
|
4
|
-
sqlglot/_version.py,sha256=
|
4
|
+
sqlglot/_version.py,sha256=tGzmcwxazE8ZaUBuCO5phuigC0f9SB40km_TjYfnb90,515
|
5
5
|
sqlglot/diff.py,sha256=PtOllQMQa1Sw1-V2Y8eypmDqGujXYPaTOp_WLsWkAWk,17314
|
6
6
|
sqlglot/errors.py,sha256=QNKMr-pzLUDR-tuMmn_GK6iMHUIVdb_YSJ_BhGEvuso,2126
|
7
|
-
sqlglot/expressions.py,sha256=
|
8
|
-
sqlglot/generator.py,sha256=
|
7
|
+
sqlglot/expressions.py,sha256=HspDzfH5_xnGPUvMPcwtNmIHaLbIj_NGmBWcvm8qIKw,242992
|
8
|
+
sqlglot/generator.py,sha256=4iJ0BxkzinmosIhfhb34xjxaFpzw3Zo7fvmknaf5uRs,212432
|
9
9
|
sqlglot/helper.py,sha256=9nZjFVRBtMKFC3EdzpDQ6jkazFO19po6BF8xHiNGZIo,15111
|
10
10
|
sqlglot/jsonpath.py,sha256=dKdI3PNINNGimmSse2IIv-GbPN_3lXncXh_70QH7Lss,7664
|
11
11
|
sqlglot/lineage.py,sha256=kXBDSErmZZluZx_kkrMj4MPEOAbkvcbX1tbOW7Bpl-U,15303
|
12
|
-
sqlglot/parser.py,sha256=
|
12
|
+
sqlglot/parser.py,sha256=Jv02-ikrk8uqmYR4nxKgLUj704lIx1_ugXf-hShkZ8w,320240
|
13
13
|
sqlglot/planner.py,sha256=ql7Li-bWJRcyXzNaZy_n6bQ6B2ZfunEIB8Ztv2xaxq4,14634
|
14
14
|
sqlglot/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
15
|
sqlglot/schema.py,sha256=13H2qKQs27EKdTpDLOvcNnSTDAUbYNKjWtJs4aQCSOA,20509
|
16
16
|
sqlglot/serde.py,sha256=DQVJ95WrIvhYfe02Ytb4NQug2aMwDCEwpMBW1LKDqzE,2031
|
17
17
|
sqlglot/time.py,sha256=Q62gv6kL40OiRBF6BMESxKJcMVn7ZLNw7sv8H34z5FI,18400
|
18
18
|
sqlglot/tokens.py,sha256=R0B8GQSbQ9GoDc0NlaT5Tc8RjgEOx2IYIkYU5rY8Rg8,48742
|
19
|
-
sqlglot/transforms.py,sha256=
|
19
|
+
sqlglot/transforms.py,sha256=3jpbHeVTLK9hmQi5f3_vmK-5jZB32_ittCkO7poxCs4,40631
|
20
20
|
sqlglot/trie.py,sha256=v27uXMrHfqrXlJ6GmeTSMovsB_3o0ctnlKhdNt7W6fI,2245
|
21
21
|
sqlglot/dialects/__init__.py,sha256=aZTLpe2SwgWqiVrRabmfV8TVLPVHFydGwb_zhcVhRss,3499
|
22
22
|
sqlglot/dialects/athena.py,sha256=xjy75ej0T3douCUfFKhE1I3kqvPEuQY29x24WG1--Vw,6307
|
23
|
-
sqlglot/dialects/bigquery.py,sha256=
|
24
|
-
sqlglot/dialects/clickhouse.py,sha256=
|
23
|
+
sqlglot/dialects/bigquery.py,sha256=PIRhlNIj6I5iXPxR2_9q1OWXvy4ovVB_ae5qe8SWV80,52713
|
24
|
+
sqlglot/dialects/clickhouse.py,sha256=0ahX0zjIwN9-RzfNyITBHs9PsgQXjL0uMRlRgYz9crI,56520
|
25
25
|
sqlglot/dialects/databricks.py,sha256=8PoaiP8PfiBjpheRiua-rO_HzX2TRUXqc3DnlQ8zYrg,4481
|
26
26
|
sqlglot/dialects/dialect.py,sha256=uuek7l3vUf8OB987UUxzNqdsZdrSj1TtmImVyxbI7Go,68463
|
27
27
|
sqlglot/dialects/doris.py,sha256=eC7Ct-iz7p4Usz659NkelUFhm-GmVolIZy5uaBvgjaA,14397
|
@@ -29,20 +29,20 @@ sqlglot/dialects/drill.py,sha256=FOh7_KjPx_77pv0DiHKZog0CcmzqeF9_PEmGnJ1ESSM,582
|
|
29
29
|
sqlglot/dialects/druid.py,sha256=kh3snZtneehNOWqs3XcPjsrhNaRbkCQ8E4hHbWJ1fHM,690
|
30
30
|
sqlglot/dialects/duckdb.py,sha256=alEYXBW5uUApRC8IRYnsapeiJq7JJwUmrK18C56RYsg,47780
|
31
31
|
sqlglot/dialects/dune.py,sha256=gALut-fFfN2qMsr8LvZ1NQK3F3W9z2f4PwMvTMXVVVg,375
|
32
|
-
sqlglot/dialects/hive.py,sha256=
|
32
|
+
sqlglot/dialects/hive.py,sha256=PO6DLT1kHL-U2kFfV1CsNgQFT7A32LuGN71gnTXEOfY,31728
|
33
33
|
sqlglot/dialects/materialize.py,sha256=_DPLPt8YrdQIIXNrGJw1IMcGOoAEJ9NO9X9pDfy4hxs,3494
|
34
|
-
sqlglot/dialects/mysql.py,sha256=
|
34
|
+
sqlglot/dialects/mysql.py,sha256=prZecn3zeoifZX7l54UuLG64ar7I-or_z9lF-rT8bds,49233
|
35
35
|
sqlglot/dialects/oracle.py,sha256=llxu2LzndrsGyceTod-Leh03vuPWEUKzVHB5gQY-tY8,15313
|
36
|
-
sqlglot/dialects/postgres.py,sha256=
|
36
|
+
sqlglot/dialects/postgres.py,sha256=KUyMoLkm1_sZKUbdjn6bjXx9xz7sbEMKa-fl5Mzfrsk,31025
|
37
37
|
sqlglot/dialects/presto.py,sha256=ltKbQ44efeq1HM0T8Qq0rsBSx6B6bF9RoKtUBVeoz70,33155
|
38
38
|
sqlglot/dialects/prql.py,sha256=OF2LfDb4uzKIF7kpCfpL5G7VP1pnzLbjfW5QFUnuPvo,7803
|
39
39
|
sqlglot/dialects/redshift.py,sha256=H8H8lGizHIAd4qLoPeFchyiGZKO1I8U_B058woukuGw,15366
|
40
|
-
sqlglot/dialects/risingwave.py,sha256=
|
41
|
-
sqlglot/dialects/snowflake.py,sha256=
|
42
|
-
sqlglot/dialects/spark.py,sha256=
|
40
|
+
sqlglot/dialects/risingwave.py,sha256=hwEOPjMw0ZM_3fjQcBUE00oy6I8V6mzYOOYmcwwS8mw,2898
|
41
|
+
sqlglot/dialects/snowflake.py,sha256=m4Gekw4NhoD3q4WF1TJhetRmmwkh8XG9Rqq8mL3P31E,61761
|
42
|
+
sqlglot/dialects/spark.py,sha256=fbmiTKAQiKqG9yE_HAxYGgQiOjdxB9tJyjOtgdqF100,7645
|
43
43
|
sqlglot/dialects/spark2.py,sha256=8er7nHDm5Wc57m9AOxKN0sd_DVzbhAL44H_udlFh9O8,14258
|
44
|
-
sqlglot/dialects/sqlite.py,sha256=
|
45
|
-
sqlglot/dialects/starrocks.py,sha256=
|
44
|
+
sqlglot/dialects/sqlite.py,sha256=UzJwIdY1PsLArMxNt5lKvk8COHvXeo4FoqW41LqVmM8,12440
|
45
|
+
sqlglot/dialects/starrocks.py,sha256=fHNgvq5Nz7dI4QUWCTOO5VDOYjasBxRRlcg9TbY0UZE,11235
|
46
46
|
sqlglot/dialects/tableau.py,sha256=oIawDzUITxGCWaEMB8OaNMPWhbC3U-2y09pYPm4eazc,2190
|
47
47
|
sqlglot/dialects/teradata.py,sha256=xWa-9kSTsT-eM1NePi_oIM1dPHmXW89GLU5Uda3_6Ao,14036
|
48
48
|
sqlglot/dialects/trino.py,sha256=wgLsiX1NQvjGny_rgrU1e2r6kK1LD0KgaSdIDrYmjD0,4285
|
@@ -72,8 +72,8 @@ sqlglot/optimizer/qualify_tables.py,sha256=5f5enBAh-bpNB9ewF97W9fx9h1TGXj1Ih5fnc
|
|
72
72
|
sqlglot/optimizer/scope.py,sha256=Fqz9GpBqO1GWzRAnqdflXXNz44ot_1JqVBC-DnYAU_E,30063
|
73
73
|
sqlglot/optimizer/simplify.py,sha256=S0Blqg5Mq2KRRWhWz-Eivch9sBjBhg9fRJA6EdBzj2g,50704
|
74
74
|
sqlglot/optimizer/unnest_subqueries.py,sha256=kzWUVDlxs8z9nmRx-8U-pHXPtVZhEIwkKqmKhr2QLvc,10908
|
75
|
-
sqlglot-26.
|
76
|
-
sqlglot-26.
|
77
|
-
sqlglot-26.
|
78
|
-
sqlglot-26.
|
79
|
-
sqlglot-26.
|
75
|
+
sqlglot-26.27.1.dist-info/licenses/LICENSE,sha256=AI3__mHZfOtzY3EluR_pIYBm3_pE7TbVx7qaHxoZ114,1065
|
76
|
+
sqlglot-26.27.1.dist-info/METADATA,sha256=R_0FXBifra90Z576tjTOsln1IrNAgj-vTzJL1-izN_4,20732
|
77
|
+
sqlglot-26.27.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
78
|
+
sqlglot-26.27.1.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
|
79
|
+
sqlglot-26.27.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|