sqlglotc 30.0.0__tar.gz → 30.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlglotc-30.0.0/sqlglotc.egg-info → sqlglotc-30.0.2}/PKG-INFO +1 -1
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/setup.py +8 -11
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/array.py +7 -1
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/core.py +2 -2
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/string.py +8 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parser.py +304 -78
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/bigquery.py +10 -3
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/materialize.py +2 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/prql.py +7 -1
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/snowflake.py +24 -17
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/tokenizer_core.py +15 -3
- {sqlglotc-30.0.0 → sqlglotc-30.0.2/sqlglotc.egg-info}/PKG-INFO +1 -1
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/MANIFEST.in +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/pyproject.toml +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/setup.cfg +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/__init__.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/errors.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/executor/__init__.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/executor/table.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/__init__.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/aggregate.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/builders.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/constraints.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/datatypes.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/ddl.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/dml.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/functions.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/json.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/math.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/properties.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/query.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/expressions/temporal.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/helper.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/optimizer/__init__.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/optimizer/isolate_table_selects.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/optimizer/normalize_identifiers.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/optimizer/qualify.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/optimizer/qualify_columns.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/optimizer/qualify_tables.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/optimizer/resolver.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/optimizer/scope.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/__init__.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/athena.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/base.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/clickhouse.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/databricks.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/doris.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/dremio.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/drill.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/druid.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/duckdb.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/dune.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/exasol.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/fabric.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/hive.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/mysql.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/oracle.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/postgres.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/presto.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/redshift.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/risingwave.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/singlestore.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/solr.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/spark.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/spark2.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/sqlite.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/starrocks.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/tableau.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/teradata.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/trino.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/parsers/tsql.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/schema.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/serde.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/time.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglot/trie.py +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglotc.egg-info/SOURCES.txt +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglotc.egg-info/dependency_links.txt +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglotc.egg-info/requires.txt +0 -0
- {sqlglotc-30.0.0 → sqlglotc-30.0.2}/sqlglotc.egg-info/top_level.txt +0 -0
|
@@ -12,8 +12,11 @@ sqlglot_src = os.path.join(here, "..", "sqlglot")
|
|
|
12
12
|
|
|
13
13
|
def _subpkg_files(subpkg, files=None):
|
|
14
14
|
"""List source files from a sqlglot subpackage. Compiles all .py files if `files` is None."""
|
|
15
|
-
subpkg_dir = os.path.join(sqlglot_src, subpkg)
|
|
16
15
|
if files is None:
|
|
16
|
+
# Try repo source first, fall back to sdist-bundled copy.
|
|
17
|
+
subpkg_dir = os.path.join(sqlglot_src, subpkg)
|
|
18
|
+
if not os.path.isdir(subpkg_dir):
|
|
19
|
+
subpkg_dir = os.path.join(here, "sqlglot", subpkg)
|
|
17
20
|
files = sorted(
|
|
18
21
|
f for f in os.listdir(subpkg_dir) if f.endswith(".py") and f != "__init__.py"
|
|
19
22
|
)
|
|
@@ -58,7 +61,6 @@ def _source_paths():
|
|
|
58
61
|
class build_ext(_build_ext):
|
|
59
62
|
def copy_extensions_to_source(self):
|
|
60
63
|
"""For editable installs, put sqlglot.* .so files in the sqlglot source dir."""
|
|
61
|
-
build_py = self.get_finalized_command("build_py")
|
|
62
64
|
for ext in self.extensions:
|
|
63
65
|
fullname = self.get_ext_fullname(ext.name)
|
|
64
66
|
filename = self.get_ext_filename(fullname)
|
|
@@ -69,14 +71,9 @@ class build_ext(_build_ext):
|
|
|
69
71
|
sub_module = ".".join(parts[1:])
|
|
70
72
|
dst = os.path.join(sqlglot_src, self.get_ext_filename(sub_module))
|
|
71
73
|
else:
|
|
72
|
-
#
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
dst = (
|
|
76
|
-
os.path.join(package_dir, os.path.basename(filename))
|
|
77
|
-
if package_dir
|
|
78
|
-
else os.path.basename(filename)
|
|
79
|
-
)
|
|
74
|
+
# Place the mypyc runtime helper (e.g., HASH__mypyc) inside sqlglot/.
|
|
75
|
+
# sqlglot/__init__.py bootstraps it into sys.modules for editable installs.
|
|
76
|
+
dst = os.path.join(sqlglot_src, os.path.basename(filename))
|
|
80
77
|
self.copy_file(src, dst, level=self.verbose)
|
|
81
78
|
|
|
82
79
|
|
|
@@ -105,6 +102,6 @@ class sdist(_sdist):
|
|
|
105
102
|
setup(
|
|
106
103
|
name="sqlglotc",
|
|
107
104
|
packages=[],
|
|
108
|
-
ext_modules=mypycify(_source_paths(), opt_level=os.environ.get("MYPYC_OPT", "
|
|
105
|
+
ext_modules=mypycify(_source_paths(), opt_level=os.environ.get("MYPYC_OPT", "2")),
|
|
109
106
|
cmdclass={"build_ext": build_ext, "sdist": sdist},
|
|
110
107
|
)
|
|
@@ -174,7 +174,13 @@ class ArraysZip(Expression, Func):
|
|
|
174
174
|
|
|
175
175
|
|
|
176
176
|
class ArrayToString(Expression, Func):
|
|
177
|
-
arg_types = {
|
|
177
|
+
arg_types = {
|
|
178
|
+
"this": True,
|
|
179
|
+
"expression": True,
|
|
180
|
+
"null": False,
|
|
181
|
+
"null_is_empty": False,
|
|
182
|
+
"null_delim_is_null": False,
|
|
183
|
+
}
|
|
178
184
|
_sql_names = ["ARRAY_TO_STRING", "ARRAY_JOIN"]
|
|
179
185
|
|
|
180
186
|
|
|
@@ -617,8 +617,8 @@ class Expression(Expr):
|
|
|
617
617
|
Returns the alias of the expression, or an empty string if it's not aliased.
|
|
618
618
|
"""
|
|
619
619
|
alias = self.args.get("alias")
|
|
620
|
-
if
|
|
621
|
-
return alias.name
|
|
620
|
+
if isinstance(alias, Expression):
|
|
621
|
+
return alias.name
|
|
622
622
|
return self.text("alias")
|
|
623
623
|
|
|
624
624
|
@property
|
|
@@ -177,6 +177,14 @@ class SplitPart(Expression, Func):
|
|
|
177
177
|
}
|
|
178
178
|
|
|
179
179
|
|
|
180
|
+
class Strtok(Expression, Func):
|
|
181
|
+
arg_types = {
|
|
182
|
+
"this": True,
|
|
183
|
+
"delimiter": False,
|
|
184
|
+
"part_index": False,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
|
|
180
188
|
class StartsWith(Expression, Func):
|
|
181
189
|
_sql_names = ["STARTS_WITH", "STARTSWITH"]
|
|
182
190
|
arg_types = {"this": True, "expression": True}
|
|
@@ -286,6 +286,24 @@ class Parser:
|
|
|
286
286
|
Default: 3
|
|
287
287
|
"""
|
|
288
288
|
|
|
289
|
+
__slots__ = (
|
|
290
|
+
"error_level",
|
|
291
|
+
"error_message_context",
|
|
292
|
+
"max_errors",
|
|
293
|
+
"dialect",
|
|
294
|
+
"sql",
|
|
295
|
+
"errors",
|
|
296
|
+
"_tokens",
|
|
297
|
+
"_index",
|
|
298
|
+
"_curr",
|
|
299
|
+
"_next",
|
|
300
|
+
"_prev",
|
|
301
|
+
"_prev_comments",
|
|
302
|
+
"_pipe_cte_counter",
|
|
303
|
+
"_chunks",
|
|
304
|
+
"_chunk_index",
|
|
305
|
+
)
|
|
306
|
+
|
|
289
307
|
FUNCTIONS: t.ClassVar[t.Dict[str, t.Callable]] = {
|
|
290
308
|
**{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()},
|
|
291
309
|
**dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce),
|
|
@@ -299,6 +317,8 @@ class Parser:
|
|
|
299
317
|
"ARRAY_APPEND": build_array_append,
|
|
300
318
|
"ARRAY_CAT": build_array_concat,
|
|
301
319
|
"ARRAY_CONCAT": build_array_concat,
|
|
320
|
+
"ARRAY_INTERSECT": lambda args: exp.ArrayIntersect(expressions=args),
|
|
321
|
+
"ARRAY_INTERSECTION": lambda args: exp.ArrayIntersect(expressions=args),
|
|
302
322
|
"ARRAY_PREPEND": build_array_prepend,
|
|
303
323
|
"ARRAY_REMOVE": build_array_remove,
|
|
304
324
|
"COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True),
|
|
@@ -559,6 +579,12 @@ class Parser:
|
|
|
559
579
|
TokenType.SOME: exp.Any,
|
|
560
580
|
}
|
|
561
581
|
|
|
582
|
+
SUBQUERY_TOKENS: t.ClassVar = {
|
|
583
|
+
TokenType.SELECT,
|
|
584
|
+
TokenType.WITH,
|
|
585
|
+
TokenType.FROM,
|
|
586
|
+
}
|
|
587
|
+
|
|
562
588
|
RESERVED_TOKENS: t.ClassVar = {
|
|
563
589
|
*Tokenizer.SINGLE_TOKENS.values(),
|
|
564
590
|
TokenType.SELECT,
|
|
@@ -691,6 +717,8 @@ class Parser:
|
|
|
691
717
|
TokenType.SET,
|
|
692
718
|
TokenType.SETTINGS,
|
|
693
719
|
TokenType.SHOW,
|
|
720
|
+
TokenType.STREAM,
|
|
721
|
+
TokenType.STREAMLIT,
|
|
694
722
|
TokenType.TEMPORARY,
|
|
695
723
|
TokenType.TOP,
|
|
696
724
|
TokenType.TRUE,
|
|
@@ -742,7 +770,7 @@ class Parser:
|
|
|
742
770
|
TRIM_TYPES: t.ClassVar = {"LEADING", "TRAILING", "BOTH"}
|
|
743
771
|
|
|
744
772
|
# Tokens that indicate a simple column reference
|
|
745
|
-
|
|
773
|
+
IDENTIFIER_TOKENS: t.ClassVar[t.FrozenSet] = frozenset({TokenType.VAR, TokenType.IDENTIFIER})
|
|
746
774
|
|
|
747
775
|
BRACKETS: t.ClassVar[t.FrozenSet] = frozenset({TokenType.L_BRACKET, TokenType.L_BRACE})
|
|
748
776
|
|
|
@@ -757,6 +785,17 @@ class Parser:
|
|
|
757
785
|
}
|
|
758
786
|
)
|
|
759
787
|
|
|
788
|
+
TABLE_POSTFIX_TOKENS: t.ClassVar[t.FrozenSet] = frozenset(
|
|
789
|
+
{
|
|
790
|
+
TokenType.L_PAREN,
|
|
791
|
+
TokenType.L_BRACKET,
|
|
792
|
+
TokenType.L_BRACE,
|
|
793
|
+
TokenType.PIVOT,
|
|
794
|
+
TokenType.UNPIVOT,
|
|
795
|
+
TokenType.TABLE_SAMPLE,
|
|
796
|
+
}
|
|
797
|
+
)
|
|
798
|
+
|
|
760
799
|
FUNC_TOKENS: t.ClassVar = {
|
|
761
800
|
TokenType.COLLATE,
|
|
762
801
|
TokenType.COMMAND,
|
|
@@ -898,6 +937,27 @@ class Parser:
|
|
|
898
937
|
|
|
899
938
|
JOIN_HINTS: t.ClassVar[t.Set[str]] = set()
|
|
900
939
|
|
|
940
|
+
# Tokens that unambiguously end a table reference on the fast path
|
|
941
|
+
TABLE_TERMINATORS: t.ClassVar[t.FrozenSet] = frozenset(
|
|
942
|
+
{
|
|
943
|
+
TokenType.COMMA,
|
|
944
|
+
TokenType.GROUP_BY,
|
|
945
|
+
TokenType.HAVING,
|
|
946
|
+
TokenType.JOIN,
|
|
947
|
+
TokenType.LIMIT,
|
|
948
|
+
TokenType.ON,
|
|
949
|
+
TokenType.ORDER_BY,
|
|
950
|
+
TokenType.R_PAREN,
|
|
951
|
+
TokenType.SEMICOLON,
|
|
952
|
+
TokenType.SENTINEL,
|
|
953
|
+
TokenType.WHERE,
|
|
954
|
+
*SET_OPERATIONS,
|
|
955
|
+
*JOIN_KINDS,
|
|
956
|
+
*JOIN_METHODS,
|
|
957
|
+
*JOIN_SIDES,
|
|
958
|
+
}
|
|
959
|
+
)
|
|
960
|
+
|
|
901
961
|
LAMBDAS: t.ClassVar = {
|
|
902
962
|
TokenType.ARROW: lambda self, expressions: self.expression(
|
|
903
963
|
exp.Lambda(
|
|
@@ -913,6 +973,13 @@ class Parser:
|
|
|
913
973
|
),
|
|
914
974
|
}
|
|
915
975
|
|
|
976
|
+
# Whether lambda args include type annotations, e.g. TRANSFORM(arr, x INT -> x + 1) in Snowflake
|
|
977
|
+
TYPED_LAMBDA_ARGS: t.ClassVar[bool] = False
|
|
978
|
+
|
|
979
|
+
LAMBDA_ARG_TERMINATORS: t.ClassVar[t.FrozenSet] = frozenset(
|
|
980
|
+
{TokenType.COMMA, TokenType.R_PAREN}
|
|
981
|
+
)
|
|
982
|
+
|
|
916
983
|
COLUMN_OPERATORS: t.ClassVar = {
|
|
917
984
|
TokenType.DOT: None,
|
|
918
985
|
TokenType.DOTCOLON: lambda self, this, to: self.expression(exp.JSONCast(this=this, to=to)),
|
|
@@ -1757,9 +1824,6 @@ class Parser:
|
|
|
1757
1824
|
self.error_message_context: int = error_message_context
|
|
1758
1825
|
self.max_errors: int = max_errors
|
|
1759
1826
|
self.dialect: t.Any = _resolve_dialect(dialect)
|
|
1760
|
-
self.reset()
|
|
1761
|
-
|
|
1762
|
-
def reset(self) -> None:
|
|
1763
1827
|
self.sql: str = ""
|
|
1764
1828
|
self.errors: t.List[ParseError] = []
|
|
1765
1829
|
self._tokens: t.List[Token] = []
|
|
@@ -1772,6 +1836,19 @@ class Parser:
|
|
|
1772
1836
|
self._chunks: t.List[t.List[Token]] = []
|
|
1773
1837
|
self._chunk_index: int = 0
|
|
1774
1838
|
|
|
1839
|
+
def reset(self) -> None:
|
|
1840
|
+
self.sql = ""
|
|
1841
|
+
self.errors = []
|
|
1842
|
+
self._tokens = []
|
|
1843
|
+
self._index = 0
|
|
1844
|
+
self._curr = SENTINEL_NONE
|
|
1845
|
+
self._next = SENTINEL_NONE
|
|
1846
|
+
self._prev = SENTINEL_NONE
|
|
1847
|
+
self._prev_comments = []
|
|
1848
|
+
self._pipe_cte_counter = 0
|
|
1849
|
+
self._chunks = []
|
|
1850
|
+
self._chunk_index = 0
|
|
1851
|
+
|
|
1775
1852
|
def _advance(self, times: int = 1) -> None:
|
|
1776
1853
|
index = self._index + times
|
|
1777
1854
|
self._index = index
|
|
@@ -3758,6 +3835,7 @@ class Parser:
|
|
|
3758
3835
|
this = self._parse_derived_table_values()
|
|
3759
3836
|
elif from_:
|
|
3760
3837
|
this = exp.select("*").from_(from_.this, copy=False)
|
|
3838
|
+
this = self._parse_query_modifiers(this)
|
|
3761
3839
|
elif self._match(TokenType.SUMMARIZE):
|
|
3762
3840
|
table = self._match(TokenType.TABLE)
|
|
3763
3841
|
this = self._parse_select() or self._parse_string() or self._parse_table()
|
|
@@ -4206,12 +4284,10 @@ class Parser:
|
|
|
4206
4284
|
|
|
4207
4285
|
def _parse_stream(self) -> t.Optional[exp.Stream]:
|
|
4208
4286
|
index = self._index
|
|
4209
|
-
if self.
|
|
4210
|
-
this
|
|
4211
|
-
if this:
|
|
4287
|
+
if self._match(TokenType.STREAM):
|
|
4288
|
+
if this := self._try_parse(self._parse_table):
|
|
4212
4289
|
return self.expression(exp.Stream(this=this))
|
|
4213
|
-
|
|
4214
|
-
self._retreat(index)
|
|
4290
|
+
self._retreat(index)
|
|
4215
4291
|
return None
|
|
4216
4292
|
|
|
4217
4293
|
def _parse_join_parts(
|
|
@@ -4433,11 +4509,81 @@ class Parser:
|
|
|
4433
4509
|
or self._parse_placeholder()
|
|
4434
4510
|
)
|
|
4435
4511
|
|
|
4512
|
+
def _parse_table_parts_fast(self) -> t.Optional[exp.Table]:
|
|
4513
|
+
index = self._index
|
|
4514
|
+
parts: t.Optional[t.List[exp.Identifier]] = None
|
|
4515
|
+
all_comments: t.Optional[t.List[str]] = None
|
|
4516
|
+
|
|
4517
|
+
while self._match_set(self.IDENTIFIER_TOKENS):
|
|
4518
|
+
token = self._prev
|
|
4519
|
+
comments = self._prev_comments
|
|
4520
|
+
|
|
4521
|
+
has_dot = self._match(TokenType.DOT)
|
|
4522
|
+
curr_tt = self._curr.token_type
|
|
4523
|
+
|
|
4524
|
+
if not has_dot:
|
|
4525
|
+
if curr_tt in self.TABLE_POSTFIX_TOKENS:
|
|
4526
|
+
self._retreat(index)
|
|
4527
|
+
return None
|
|
4528
|
+
elif curr_tt not in self.IDENTIFIER_TOKENS:
|
|
4529
|
+
self._retreat(index)
|
|
4530
|
+
return None
|
|
4531
|
+
|
|
4532
|
+
if parts is None:
|
|
4533
|
+
parts = []
|
|
4534
|
+
|
|
4535
|
+
if comments:
|
|
4536
|
+
if all_comments is None:
|
|
4537
|
+
all_comments = []
|
|
4538
|
+
all_comments.extend(comments)
|
|
4539
|
+
self._prev_comments = []
|
|
4540
|
+
|
|
4541
|
+
parts.append(
|
|
4542
|
+
self.expression(
|
|
4543
|
+
exp.Identifier(
|
|
4544
|
+
this=token.text, quoted=token.token_type == TokenType.IDENTIFIER
|
|
4545
|
+
),
|
|
4546
|
+
token,
|
|
4547
|
+
)
|
|
4548
|
+
)
|
|
4549
|
+
|
|
4550
|
+
if not has_dot:
|
|
4551
|
+
break
|
|
4552
|
+
|
|
4553
|
+
if parts is None:
|
|
4554
|
+
return None
|
|
4555
|
+
|
|
4556
|
+
n = len(parts)
|
|
4557
|
+
|
|
4558
|
+
if n == 1:
|
|
4559
|
+
table: exp.Table = exp.Table(this=parts[0])
|
|
4560
|
+
elif n == 2:
|
|
4561
|
+
table = exp.Table(this=parts[1], db=parts[0])
|
|
4562
|
+
elif n >= 3:
|
|
4563
|
+
this: exp.Identifier | exp.Dot = parts[2]
|
|
4564
|
+
for i in range(3, n):
|
|
4565
|
+
this = exp.Dot(this=this, expression=parts[i])
|
|
4566
|
+
|
|
4567
|
+
table = exp.Table(this=this, db=parts[1], catalog=parts[0])
|
|
4568
|
+
|
|
4569
|
+
if table is None:
|
|
4570
|
+
self._retreat(index)
|
|
4571
|
+
elif all_comments:
|
|
4572
|
+
table.add_comments(all_comments)
|
|
4573
|
+
return table
|
|
4574
|
+
|
|
4436
4575
|
def _parse_table_parts(
|
|
4437
|
-
self,
|
|
4438
|
-
|
|
4439
|
-
|
|
4440
|
-
|
|
4576
|
+
self,
|
|
4577
|
+
schema: bool = False,
|
|
4578
|
+
is_db_reference: bool = False,
|
|
4579
|
+
wildcard: bool = False,
|
|
4580
|
+
fast: bool = False,
|
|
4581
|
+
) -> t.Optional[exp.Table | exp.Dot]:
|
|
4582
|
+
if fast:
|
|
4583
|
+
return self._parse_table_parts_fast()
|
|
4584
|
+
|
|
4585
|
+
catalog: t.Optional[exp.Expr | str] = None
|
|
4586
|
+
db: t.Optional[exp.Expr | str] = None
|
|
4441
4587
|
table: t.Optional[exp.Expr | str] = self._parse_table_part(schema=schema)
|
|
4442
4588
|
|
|
4443
4589
|
while self._match(TokenType.DOT):
|
|
@@ -4463,9 +4609,6 @@ class Parser:
|
|
|
4463
4609
|
else:
|
|
4464
4610
|
table = exp.Identifier(this="*")
|
|
4465
4611
|
|
|
4466
|
-
# We bubble up comments from the Identifier to the Table
|
|
4467
|
-
comments = table.pop_comments() if isinstance(table, exp.Expr) else None
|
|
4468
|
-
|
|
4469
4612
|
if is_db_reference:
|
|
4470
4613
|
catalog = db
|
|
4471
4614
|
db = table
|
|
@@ -4476,7 +4619,15 @@ class Parser:
|
|
|
4476
4619
|
if not db and is_db_reference:
|
|
4477
4620
|
self.raise_error(f"Expected database name but got {self._curr}")
|
|
4478
4621
|
|
|
4479
|
-
table = self.expression(exp.Table(this=table, db=db, catalog=catalog)
|
|
4622
|
+
table = self.expression(exp.Table(this=table, db=db, catalog=catalog))
|
|
4623
|
+
|
|
4624
|
+
# Bubble up comments from identifier parts to the Table
|
|
4625
|
+
comments = []
|
|
4626
|
+
for part in table.parts:
|
|
4627
|
+
if part_comments := part.pop_comments():
|
|
4628
|
+
comments.extend(part_comments)
|
|
4629
|
+
if comments:
|
|
4630
|
+
table.add_comments(comments)
|
|
4480
4631
|
|
|
4481
4632
|
changes = self._parse_changes()
|
|
4482
4633
|
if changes:
|
|
@@ -4502,24 +4653,47 @@ class Parser:
|
|
|
4502
4653
|
parse_partition: bool = False,
|
|
4503
4654
|
consume_pipe: bool = False,
|
|
4504
4655
|
) -> t.Optional[exp.Expr]:
|
|
4505
|
-
|
|
4506
|
-
|
|
4656
|
+
if not schema and not is_db_reference and not consume_pipe and not joins:
|
|
4657
|
+
index = self._index
|
|
4658
|
+
table = self._parse_table_parts(fast=True)
|
|
4659
|
+
|
|
4660
|
+
if table is not None:
|
|
4661
|
+
curr_tt = self._curr.token_type
|
|
4662
|
+
next_tt = self._next.token_type
|
|
4663
|
+
|
|
4664
|
+
fast_terminators = self.TABLE_TERMINATORS
|
|
4665
|
+
|
|
4666
|
+
# only return the table if we're sure there are no other operators
|
|
4667
|
+
# MATCH_CONDITION is a special case because it accepts any alias before it like LIMIT
|
|
4668
|
+
if curr_tt in fast_terminators and next_tt != TokenType.MATCH_CONDITION:
|
|
4669
|
+
return table
|
|
4670
|
+
|
|
4671
|
+
postfix_tokens = self.TABLE_POSTFIX_TOKENS
|
|
4672
|
+
|
|
4673
|
+
if curr_tt not in postfix_tokens and next_tt not in postfix_tokens:
|
|
4674
|
+
if alias := self._parse_table_alias(
|
|
4675
|
+
alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
|
|
4676
|
+
):
|
|
4677
|
+
table.set("alias", alias)
|
|
4678
|
+
|
|
4679
|
+
if self._curr.token_type in fast_terminators:
|
|
4680
|
+
return table
|
|
4681
|
+
|
|
4682
|
+
self._retreat(index)
|
|
4683
|
+
|
|
4684
|
+
if stream := self._parse_stream():
|
|
4507
4685
|
return stream
|
|
4508
4686
|
|
|
4509
|
-
lateral
|
|
4510
|
-
if lateral:
|
|
4687
|
+
if lateral := self._parse_lateral():
|
|
4511
4688
|
return lateral
|
|
4512
4689
|
|
|
4513
|
-
unnest
|
|
4514
|
-
if unnest:
|
|
4690
|
+
if unnest := self._parse_unnest():
|
|
4515
4691
|
return unnest
|
|
4516
4692
|
|
|
4517
|
-
values
|
|
4518
|
-
if values:
|
|
4693
|
+
if values := self._parse_derived_table_values():
|
|
4519
4694
|
return values
|
|
4520
4695
|
|
|
4521
|
-
subquery
|
|
4522
|
-
if subquery:
|
|
4696
|
+
if subquery := self._parse_select(table=True, consume_pipe=consume_pipe):
|
|
4523
4697
|
if not subquery.args.get("pivots"):
|
|
4524
4698
|
subquery.set("pivots", self._parse_pivots())
|
|
4525
4699
|
return subquery
|
|
@@ -4551,7 +4725,7 @@ class Parser:
|
|
|
4551
4725
|
this.set("only", only)
|
|
4552
4726
|
|
|
4553
4727
|
# Postgres supports a wildcard (table) suffix operator, which is a no-op in this context
|
|
4554
|
-
self.
|
|
4728
|
+
self._match(TokenType.STAR)
|
|
4555
4729
|
|
|
4556
4730
|
parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION
|
|
4557
4731
|
if parse_partition and self._match(TokenType.PARTITION, advance=False):
|
|
@@ -5436,16 +5610,50 @@ class Parser:
|
|
|
5436
5610
|
return this
|
|
5437
5611
|
|
|
5438
5612
|
def _parse_disjunction(self) -> t.Optional[exp.Expr]:
|
|
5439
|
-
|
|
5613
|
+
this = self._parse_conjunction()
|
|
5614
|
+
while self._match_set(self.DISJUNCTION):
|
|
5615
|
+
comments = self._prev_comments
|
|
5616
|
+
this = self.expression(
|
|
5617
|
+
self.DISJUNCTION[self._prev.token_type](
|
|
5618
|
+
this=this, expression=self._parse_conjunction()
|
|
5619
|
+
),
|
|
5620
|
+
comments=comments,
|
|
5621
|
+
)
|
|
5622
|
+
return this
|
|
5440
5623
|
|
|
5441
5624
|
def _parse_conjunction(self) -> t.Optional[exp.Expr]:
|
|
5442
|
-
|
|
5625
|
+
this = self._parse_equality()
|
|
5626
|
+
while self._match_set(self.CONJUNCTION):
|
|
5627
|
+
comments = self._prev_comments
|
|
5628
|
+
this = self.expression(
|
|
5629
|
+
self.CONJUNCTION[self._prev.token_type](
|
|
5630
|
+
this=this, expression=self._parse_equality()
|
|
5631
|
+
),
|
|
5632
|
+
comments=comments,
|
|
5633
|
+
)
|
|
5634
|
+
return this
|
|
5443
5635
|
|
|
5444
5636
|
def _parse_equality(self) -> t.Optional[exp.Expr]:
|
|
5445
|
-
|
|
5637
|
+
this = self._parse_comparison()
|
|
5638
|
+
while self._match_set(self.EQUALITY):
|
|
5639
|
+
comments = self._prev_comments
|
|
5640
|
+
this = self.expression(
|
|
5641
|
+
self.EQUALITY[self._prev.token_type](
|
|
5642
|
+
this=this, expression=self._parse_comparison()
|
|
5643
|
+
),
|
|
5644
|
+
comments=comments,
|
|
5645
|
+
)
|
|
5646
|
+
return this
|
|
5446
5647
|
|
|
5447
5648
|
def _parse_comparison(self) -> t.Optional[exp.Expr]:
|
|
5448
|
-
|
|
5649
|
+
this = self._parse_range()
|
|
5650
|
+
while self._match_set(self.COMPARISON):
|
|
5651
|
+
comments = self._prev_comments
|
|
5652
|
+
this = self.expression(
|
|
5653
|
+
self.COMPARISON[self._prev.token_type](this=this, expression=self._parse_range()),
|
|
5654
|
+
comments=comments,
|
|
5655
|
+
)
|
|
5656
|
+
return this
|
|
5449
5657
|
|
|
5450
5658
|
def _parse_range(self, this: t.Optional[exp.Expr] = None) -> t.Optional[exp.Expr]:
|
|
5451
5659
|
this = this or self._parse_bitwise()
|
|
@@ -5727,7 +5935,14 @@ class Parser:
|
|
|
5727
5935
|
return this
|
|
5728
5936
|
|
|
5729
5937
|
def _parse_exponent(self) -> t.Optional[exp.Expr]:
|
|
5730
|
-
|
|
5938
|
+
this = self._parse_unary()
|
|
5939
|
+
while self._match_set(self.EXPONENT):
|
|
5940
|
+
comments = self._prev_comments
|
|
5941
|
+
this = self.expression(
|
|
5942
|
+
self.EXPONENT[self._prev.token_type](this=this, expression=self._parse_unary()),
|
|
5943
|
+
comments=comments,
|
|
5944
|
+
)
|
|
5945
|
+
return this
|
|
5731
5946
|
|
|
5732
5947
|
def _parse_unary(self) -> t.Optional[exp.Expr]:
|
|
5733
5948
|
if self._match_set(self.UNARY_PARSERS):
|
|
@@ -5737,24 +5952,8 @@ class Parser:
|
|
|
5737
5952
|
def _parse_type(
|
|
5738
5953
|
self, parse_interval: bool = True, fallback_to_identifier: bool = False
|
|
5739
5954
|
) -> t.Optional[exp.Expr]:
|
|
5740
|
-
|
|
5741
|
-
|
|
5742
|
-
|
|
5743
|
-
# fast path section for simple common cases
|
|
5744
|
-
# for columns, it's basic schema.table.col
|
|
5745
|
-
if not fallback_to_identifier and curr_token_type in self.FAST_COLUMN_TOKENS:
|
|
5746
|
-
return self._parse_column()
|
|
5747
|
-
|
|
5748
|
-
next_token_type = self._next.token_type
|
|
5749
|
-
|
|
5750
|
-
# similar fast path for literals, :: and -> have special behavior, so we use COLUMN_OPERATORS is clean enough
|
|
5751
|
-
if next_token_type not in self.COLUMN_OPERATORS:
|
|
5752
|
-
if curr_token_type == TokenType.STRING and next_token_type != TokenType.STRING:
|
|
5753
|
-
self._advance()
|
|
5754
|
-
return self.expression(exp.Literal(this=curr.text, is_string=True), curr)
|
|
5755
|
-
if curr_token_type == TokenType.NUMBER:
|
|
5756
|
-
self._advance()
|
|
5757
|
-
return self.expression(exp.Literal(this=curr.text, is_string=False), curr)
|
|
5955
|
+
if not fallback_to_identifier and (atom := self._parse_atom()) is not None:
|
|
5956
|
+
return atom
|
|
5758
5957
|
|
|
5759
5958
|
if interval := parse_interval and self._parse_interval():
|
|
5760
5959
|
return self._parse_column_ops(interval)
|
|
@@ -6145,8 +6344,33 @@ class Parser:
|
|
|
6145
6344
|
self.expression(exp.AtTimeZone(this=this, zone=self._parse_unary()))
|
|
6146
6345
|
)
|
|
6147
6346
|
|
|
6347
|
+
def _parse_atom(self) -> t.Optional[exp.Expr]:
|
|
6348
|
+
if (
|
|
6349
|
+
self._curr.token_type in self.IDENTIFIER_TOKENS
|
|
6350
|
+
and (column := self._parse_column()) is not None
|
|
6351
|
+
):
|
|
6352
|
+
return column
|
|
6353
|
+
|
|
6354
|
+
token = self._curr
|
|
6355
|
+
token_type = token.token_type
|
|
6356
|
+
|
|
6357
|
+
if not (primary_parser := self.PRIMARY_PARSERS.get(token_type)):
|
|
6358
|
+
return None
|
|
6359
|
+
|
|
6360
|
+
next_type = self._next.token_type
|
|
6361
|
+
|
|
6362
|
+
if (
|
|
6363
|
+
next_type in self.COLUMN_OPERATORS
|
|
6364
|
+
or next_type in self.COLUMN_POSTFIX_TOKENS
|
|
6365
|
+
or (token_type == TokenType.STRING and next_type == TokenType.STRING)
|
|
6366
|
+
):
|
|
6367
|
+
return None
|
|
6368
|
+
|
|
6369
|
+
self._advance()
|
|
6370
|
+
return primary_parser(self, token)
|
|
6371
|
+
|
|
6148
6372
|
def _parse_column(self) -> t.Optional[exp.Expr]:
|
|
6149
|
-
column: t.Optional[exp.Expr] = self.
|
|
6373
|
+
column: t.Optional[exp.Expr] = self._parse_column_parts_fast()
|
|
6150
6374
|
if column is None:
|
|
6151
6375
|
this = self._parse_column_reference()
|
|
6152
6376
|
if not this:
|
|
@@ -6161,7 +6385,7 @@ class Parser:
|
|
|
6161
6385
|
|
|
6162
6386
|
return column
|
|
6163
6387
|
|
|
6164
|
-
def
|
|
6388
|
+
def _parse_column_parts_fast(self) -> t.Optional[exp.Column | exp.Dot]:
|
|
6165
6389
|
"""Fast path for simple column and dot references (a, a.b, ...).
|
|
6166
6390
|
|
|
6167
6391
|
Greedily consumes VAR/IDENTIFIER tokens separated by DOTs, then checks
|
|
@@ -6172,7 +6396,7 @@ class Parser:
|
|
|
6172
6396
|
parts: t.Optional[t.List[exp.Identifier]] = None
|
|
6173
6397
|
all_comments: t.Optional[t.List[str]] = None
|
|
6174
6398
|
|
|
6175
|
-
while self._match_set(self.
|
|
6399
|
+
while self._match_set(self.IDENTIFIER_TOKENS):
|
|
6176
6400
|
token = self._prev
|
|
6177
6401
|
comments = self._prev_comments
|
|
6178
6402
|
|
|
@@ -6187,7 +6411,7 @@ class Parser:
|
|
|
6187
6411
|
if curr_tt in self.COLUMN_OPERATORS or curr_tt in self.COLUMN_POSTFIX_TOKENS:
|
|
6188
6412
|
self._retreat(index)
|
|
6189
6413
|
return None
|
|
6190
|
-
elif curr_tt not in self.
|
|
6414
|
+
elif curr_tt not in self.IDENTIFIER_TOKENS:
|
|
6191
6415
|
self._retreat(index)
|
|
6192
6416
|
return None
|
|
6193
6417
|
|
|
@@ -6569,7 +6793,7 @@ class Parser:
|
|
|
6569
6793
|
|
|
6570
6794
|
if subquery_predicate:
|
|
6571
6795
|
expr = None
|
|
6572
|
-
if self._curr.token_type in
|
|
6796
|
+
if self._curr.token_type in self.SUBQUERY_TOKENS:
|
|
6573
6797
|
expr = self._parse_select()
|
|
6574
6798
|
self._match_r_paren()
|
|
6575
6799
|
elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE):
|
|
@@ -6706,6 +6930,15 @@ class Parser:
|
|
|
6706
6930
|
return self._parse_id_var()
|
|
6707
6931
|
|
|
6708
6932
|
def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expr]:
|
|
6933
|
+
next_token_type = self._next.token_type
|
|
6934
|
+
|
|
6935
|
+
# Fast path: simple atom (column, literal, null, bool) followed by , or )
|
|
6936
|
+
if (
|
|
6937
|
+
next_token_type in self.LAMBDA_ARG_TERMINATORS
|
|
6938
|
+
and (atom := self._parse_atom()) is not None
|
|
6939
|
+
):
|
|
6940
|
+
return atom
|
|
6941
|
+
|
|
6709
6942
|
index = self._index
|
|
6710
6943
|
|
|
6711
6944
|
if self._match(TokenType.L_PAREN):
|
|
@@ -6715,13 +6948,17 @@ class Parser:
|
|
|
6715
6948
|
|
|
6716
6949
|
if not self._match(TokenType.R_PAREN):
|
|
6717
6950
|
self._retreat(index)
|
|
6718
|
-
|
|
6951
|
+
elif self._match_set(self.LAMBDAS):
|
|
6952
|
+
return self.LAMBDAS[self._prev.token_type](self, expressions)
|
|
6953
|
+
else:
|
|
6954
|
+
self._retreat(index)
|
|
6955
|
+
elif self.TYPED_LAMBDA_ARGS or next_token_type in self.LAMBDAS:
|
|
6719
6956
|
expressions = [self._parse_lambda_arg()]
|
|
6720
6957
|
|
|
6721
|
-
|
|
6722
|
-
|
|
6958
|
+
if self._match_set(self.LAMBDAS):
|
|
6959
|
+
return self.LAMBDAS[self._prev.token_type](self, expressions)
|
|
6723
6960
|
|
|
6724
|
-
|
|
6961
|
+
self._retreat(index)
|
|
6725
6962
|
|
|
6726
6963
|
this: t.Optional[exp.Expr]
|
|
6727
6964
|
|
|
@@ -7004,6 +7241,8 @@ class Parser:
|
|
|
7004
7241
|
return result
|
|
7005
7242
|
|
|
7006
7243
|
def _parse_unique_key(self) -> t.Optional[exp.Expr]:
|
|
7244
|
+
if self._curr and self._curr.text.upper() in self.CONSTRAINT_PARSERS:
|
|
7245
|
+
return None
|
|
7007
7246
|
return self._parse_id_var(any_token=False)
|
|
7008
7247
|
|
|
7009
7248
|
def _parse_unique(self) -> exp.UniqueColumnConstraint:
|
|
@@ -7800,10 +8039,11 @@ class Parser:
|
|
|
7800
8039
|
return self._parse_window(self._parse_id_var(), alias=True)
|
|
7801
8040
|
|
|
7802
8041
|
def _parse_respect_or_ignore_nulls(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]:
|
|
7803
|
-
if self.
|
|
7804
|
-
|
|
7805
|
-
|
|
7806
|
-
|
|
8042
|
+
if self._curr.token_type == TokenType.VAR:
|
|
8043
|
+
if self._match_text_seq("IGNORE", "NULLS"):
|
|
8044
|
+
return self.expression(exp.IgnoreNulls(this=this))
|
|
8045
|
+
if self._match_text_seq("RESPECT", "NULLS"):
|
|
8046
|
+
return self.expression(exp.RespectNulls(this=this))
|
|
7807
8047
|
return this
|
|
7808
8048
|
|
|
7809
8049
|
def _parse_having_max(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]:
|
|
@@ -8105,20 +8345,6 @@ class Parser:
|
|
|
8105
8345
|
|
|
8106
8346
|
return items
|
|
8107
8347
|
|
|
8108
|
-
def _parse_tokens(
|
|
8109
|
-
self, parse_method: t.Callable[[], t.Optional[exp.Expr]], expressions: t.Dict
|
|
8110
|
-
) -> t.Optional[exp.Expr]:
|
|
8111
|
-
this = parse_method()
|
|
8112
|
-
|
|
8113
|
-
while self._match_set(expressions):
|
|
8114
|
-
comments = self._prev_comments
|
|
8115
|
-
this = self.expression(
|
|
8116
|
-
expressions[self._prev.token_type](this=this, expression=parse_method()),
|
|
8117
|
-
comments=comments,
|
|
8118
|
-
)
|
|
8119
|
-
|
|
8120
|
-
return this
|
|
8121
|
-
|
|
8122
8348
|
def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expr]:
|
|
8123
8349
|
return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
|
|
8124
8350
|
|
|
@@ -378,12 +378,19 @@ class BigQueryParser(parser.Parser):
|
|
|
378
378
|
return this
|
|
379
379
|
|
|
380
380
|
def _parse_table_parts(
|
|
381
|
-
self,
|
|
382
|
-
|
|
381
|
+
self,
|
|
382
|
+
schema: bool = False,
|
|
383
|
+
is_db_reference: bool = False,
|
|
384
|
+
wildcard: bool = False,
|
|
385
|
+
fast: bool = False,
|
|
386
|
+
) -> t.Optional[exp.Table | exp.Dot]:
|
|
383
387
|
table = super()._parse_table_parts(
|
|
384
|
-
schema=schema, is_db_reference=is_db_reference, wildcard=True
|
|
388
|
+
schema=schema, is_db_reference=is_db_reference, wildcard=True, fast=fast
|
|
385
389
|
)
|
|
386
390
|
|
|
391
|
+
if not isinstance(table, exp.Table):
|
|
392
|
+
return table
|
|
393
|
+
|
|
387
394
|
# proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here
|
|
388
395
|
if not table.catalog:
|
|
389
396
|
if table.db:
|
|
@@ -55,7 +55,13 @@ class PRQLParser(parser.Parser):
|
|
|
55
55
|
}
|
|
56
56
|
|
|
57
57
|
def _parse_equality(self) -> t.Optional[exp.Expr]:
|
|
58
|
-
eq = self.
|
|
58
|
+
eq = self._parse_comparison()
|
|
59
|
+
while self._match_set(self.EQUALITY):
|
|
60
|
+
comments = self._prev_comments
|
|
61
|
+
eq = self.expression(
|
|
62
|
+
self.EQUALITY[self._prev.token_type](this=eq, expression=self._parse_comparison()),
|
|
63
|
+
comments=comments,
|
|
64
|
+
)
|
|
59
65
|
if not isinstance(eq, (exp.EQ, exp.NEQ)):
|
|
60
66
|
return eq
|
|
61
67
|
|
|
@@ -23,18 +23,6 @@ if t.TYPE_CHECKING:
|
|
|
23
23
|
from sqlglot._typing import B, E
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
def _build_strtok(args: t.List) -> exp.SplitPart:
|
|
27
|
-
# Add default delimiter (space) if missing - per Snowflake docs
|
|
28
|
-
if len(args) == 1:
|
|
29
|
-
args.append(exp.Literal.string(" "))
|
|
30
|
-
|
|
31
|
-
# Add default part_index (1) if missing
|
|
32
|
-
if len(args) == 2:
|
|
33
|
-
args.append(exp.Literal.number(1))
|
|
34
|
-
|
|
35
|
-
return exp.SplitPart.from_arg_list(args)
|
|
36
|
-
|
|
37
|
-
|
|
38
26
|
def _build_approx_top_k(args: t.List) -> exp.ApproxTopK:
|
|
39
27
|
"""
|
|
40
28
|
Normalizes APPROX_TOP_K arguments to match Snowflake semantics.
|
|
@@ -327,6 +315,7 @@ def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[SnowflakeParser],
|
|
|
327
315
|
|
|
328
316
|
class SnowflakeParser(parser.Parser):
|
|
329
317
|
IDENTIFY_PIVOT_STRINGS = True
|
|
318
|
+
TYPED_LAMBDA_ARGS = True
|
|
330
319
|
DEFAULT_SAMPLING_METHOD = "BERNOULLI"
|
|
331
320
|
COLON_IS_VARIANT_EXTRACT = True
|
|
332
321
|
JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True
|
|
@@ -433,6 +422,12 @@ class SnowflakeParser(parser.Parser):
|
|
|
433
422
|
),
|
|
434
423
|
"ARRAY_SORT": _build_array_sort,
|
|
435
424
|
"ARRAY_FLATTEN": exp.Flatten.from_arg_list,
|
|
425
|
+
"ARRAY_TO_STRING": lambda args: exp.ArrayToString(
|
|
426
|
+
this=seq_get(args, 0),
|
|
427
|
+
expression=seq_get(args, 1),
|
|
428
|
+
null_is_empty=True,
|
|
429
|
+
null_delim_is_null=True,
|
|
430
|
+
),
|
|
436
431
|
"ARRAYS_OVERLAP": lambda args: exp.ArrayOverlaps(
|
|
437
432
|
this=seq_get(args, 0), expression=seq_get(args, 1), null_safe=True
|
|
438
433
|
),
|
|
@@ -561,7 +556,6 @@ class SnowflakeParser(parser.Parser):
|
|
|
561
556
|
),
|
|
562
557
|
"SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)),
|
|
563
558
|
"STDDEV_SAMP": exp.Stddev.from_arg_list,
|
|
564
|
-
"STRTOK": _build_strtok,
|
|
565
559
|
"SYSDATE": lambda args: exp.CurrentTimestamp(this=seq_get(args, 0), sysdate=True),
|
|
566
560
|
"TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)),
|
|
567
561
|
"TIMEADD": lambda args: exp.TimeAdd(
|
|
@@ -687,6 +681,11 @@ class SnowflakeParser(parser.Parser):
|
|
|
687
681
|
part_index_zero_as_one=True,
|
|
688
682
|
empty_delimiter_returns_whole=True,
|
|
689
683
|
),
|
|
684
|
+
"STRTOK": lambda args: exp.Strtok(
|
|
685
|
+
this=seq_get(args, 0),
|
|
686
|
+
delimiter=seq_get(args, 1) or exp.Literal.string(" "),
|
|
687
|
+
part_index=seq_get(args, 2) or exp.Literal.number("1"),
|
|
688
|
+
),
|
|
690
689
|
"SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list,
|
|
691
690
|
"WEEKISO": exp.WeekOfYear.from_arg_list,
|
|
692
691
|
"WEEKOFYEAR": exp.Week.from_arg_list,
|
|
@@ -853,7 +852,7 @@ class SnowflakeParser(parser.Parser):
|
|
|
853
852
|
|
|
854
853
|
def _parse_directory(self) -> exp.DirectoryStage:
|
|
855
854
|
table = self._parse_table_parts()
|
|
856
|
-
this
|
|
855
|
+
this = table.this if isinstance(table, exp.Table) else table
|
|
857
856
|
return self.expression(exp.DirectoryStage(this=this))
|
|
858
857
|
|
|
859
858
|
def _parse_describe(self) -> exp.Describe:
|
|
@@ -987,8 +986,12 @@ class SnowflakeParser(parser.Parser):
|
|
|
987
986
|
return lateral
|
|
988
987
|
|
|
989
988
|
def _parse_table_parts(
|
|
990
|
-
self,
|
|
991
|
-
|
|
989
|
+
self,
|
|
990
|
+
schema: bool = False,
|
|
991
|
+
is_db_reference: bool = False,
|
|
992
|
+
wildcard: bool = False,
|
|
993
|
+
fast: bool = False,
|
|
994
|
+
) -> t.Optional[exp.Table | exp.Dot]:
|
|
992
995
|
# https://docs.snowflake.com/en/user-guide/querying-stage
|
|
993
996
|
if self._match(TokenType.STRING, advance=False):
|
|
994
997
|
table = self._parse_string()
|
|
@@ -1016,7 +1019,11 @@ class SnowflakeParser(parser.Parser):
|
|
|
1016
1019
|
|
|
1017
1020
|
table = self.expression(exp.Table(this=table, format=file_format, pattern=pattern))
|
|
1018
1021
|
else:
|
|
1019
|
-
table = super()._parse_table_parts(
|
|
1022
|
+
table = super()._parse_table_parts(
|
|
1023
|
+
schema=schema,
|
|
1024
|
+
is_db_reference=is_db_reference,
|
|
1025
|
+
fast=fast,
|
|
1026
|
+
)
|
|
1020
1027
|
|
|
1021
1028
|
return table
|
|
1022
1029
|
|
|
@@ -99,6 +99,7 @@ class TokenType(IntEnum):
|
|
|
99
99
|
TABLE = auto()
|
|
100
100
|
WAREHOUSE = auto()
|
|
101
101
|
STAGE = auto()
|
|
102
|
+
STREAM = auto()
|
|
102
103
|
STREAMLIT = auto()
|
|
103
104
|
VAR = auto()
|
|
104
105
|
BIT_STRING = auto()
|
|
@@ -625,9 +626,6 @@ class TokenizerCore:
|
|
|
625
626
|
self.numbers_can_be_underscore_separated = numbers_can_be_underscore_separated
|
|
626
627
|
self.identifiers_can_start_with_digit = identifiers_can_start_with_digit
|
|
627
628
|
self.unescaped_sequences = unescaped_sequences
|
|
628
|
-
self.reset()
|
|
629
|
-
|
|
630
|
-
def reset(self) -> None:
|
|
631
629
|
self.sql = ""
|
|
632
630
|
self.size = 0
|
|
633
631
|
self.tokens: t.List[Token] = []
|
|
@@ -641,6 +639,20 @@ class TokenizerCore:
|
|
|
641
639
|
self._peek = ""
|
|
642
640
|
self._prev_token_line = -1
|
|
643
641
|
|
|
642
|
+
def reset(self) -> None:
|
|
643
|
+
self.sql = ""
|
|
644
|
+
self.size = 0
|
|
645
|
+
self.tokens = []
|
|
646
|
+
self._start = 0
|
|
647
|
+
self._current = 0
|
|
648
|
+
self._line = 1
|
|
649
|
+
self._col = 0
|
|
650
|
+
self._comments = []
|
|
651
|
+
self._char = ""
|
|
652
|
+
self._end = False
|
|
653
|
+
self._peek = ""
|
|
654
|
+
self._prev_token_line = -1
|
|
655
|
+
|
|
644
656
|
def tokenize(self, sql: str) -> t.List[Token]:
|
|
645
657
|
"""Returns a list of tokens corresponding to the SQL string `sql`."""
|
|
646
658
|
self.reset()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|