PyPI - sqlglot - Versions diffs - 26.30.0__py3-none-any.whl → 26.32.0__py3-none-any.whl - Mend

sqlglot 26.30.0py3-none-any.whl → 26.32.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

sqlglot/_version.py +2 -2
sqlglot/dialects/__init__.py +2 -0
sqlglot/dialects/athena.py +237 -116
sqlglot/dialects/bigquery.py +9 -4
sqlglot/dialects/clickhouse.py +5 -0
sqlglot/dialects/databricks.py +2 -0
sqlglot/dialects/dialect.py +39 -19
sqlglot/dialects/dremio.py +53 -0
sqlglot/dialects/duckdb.py +45 -0
sqlglot/dialects/exasol.py +89 -0
sqlglot/dialects/fabric.py +60 -33
sqlglot/dialects/presto.py +6 -0
sqlglot/dialects/redshift.py +10 -2
sqlglot/dialects/snowflake.py +3 -1
sqlglot/dialects/spark2.py +2 -0
sqlglot/dialects/tsql.py +7 -5
sqlglot/expressions.py +44 -2
sqlglot/generator.py +3 -3
sqlglot/jsonpath.py +1 -1
sqlglot/optimizer/annotate_types.py +13 -0
sqlglot/optimizer/pushdown_predicates.py +2 -1
sqlglot/optimizer/scope.py +13 -3
sqlglot/parser.py +4 -3
sqlglot/tokens.py +7 -1
sqlglot/transforms.py +15 -1
{sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/METADATA +2 -2
{sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/RECORD +30 -28
{sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/WHEEL +0 -0
{sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/top_level.txt +0 -0

sqlglot/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '26.30.0'
-__version_tuple__ = version_tuple = (26, 30, 0)
+__version__ = version = '26.32.0'
+__version_tuple__ = version_tuple = (26, 32, 0)

sqlglot/dialects/__init__.py CHANGED Viewed

@@ -70,6 +70,7 @@ DIALECTS = [
     "ClickHouse",
     "Databricks",
     "Doris",
+    "Dremio",
     "Drill",
     "Druid",
     "DuckDB",
@@ -93,6 +94,7 @@ DIALECTS = [
     "Teradata",
     "Trino",
     "TSQL",
+    "Exasol",
 ]
 MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}

sqlglot/dialects/athena.py CHANGED Viewed

@@ -2,46 +2,218 @@ from __future__ import annotations
 import typing as t
-from sqlglot import exp
-from sqlglot.dialects.trino import Trino
-from sqlglot.dialects.hive import Hive
-from sqlglot.tokens import TokenType
+from sqlglot import exp, generator, parser, tokens
+from sqlglot.dialects import Dialect, Hive, Trino
+from sqlglot.tokens import TokenType, Token
+class Athena(Dialect):
+    """
+    Over the years, it looks like AWS has taken various execution engines, bolted on AWS-specific
+    modifications and then built the Athena service around them.
+    Thus, Athena is not simply hosted Trino, it's more like a router that routes SQL queries to an
+    execution engine depending on the query type.
+    As at 2024-09-10, assuming your Athena workgroup is configured to use "Athena engine version 3",
+    the following engines exist:
+    Hive:
+     - Accepts mostly the same syntax as Hadoop / Hive
+     - Uses backticks to quote identifiers
+     - Has a distinctive DDL syntax (around things like setting table properties, storage locations etc)
+       that is different from Trino
+     - Used for *most* DDL, with some exceptions that get routed to the Trino engine instead:
+        - CREATE [EXTERNAL] TABLE (without AS SELECT)
+        - ALTER
+        - DROP
+    Trino:
+      - Uses double quotes to quote identifiers
+      - Used for DDL operations that involve SELECT queries, eg:
+        - CREATE VIEW / DROP VIEW
+        - CREATE TABLE... AS SELECT
+      - Used for DML operations
+        - SELECT, INSERT, UPDATE, DELETE, MERGE
+    The SQLGlot Athena dialect tries to identify which engine a query would be routed to and then uses the
+    tokenizer / parser / generator for that engine. This is unfortunately necessary, as there are certain
+    incompatibilities between the engines' dialects and thus can't be handled by a single, unifying dialect.
+    References:
+    - https://docs.aws.amazon.com/athena/latest/ug/ddl-reference.html
+    - https://docs.aws.amazon.com/athena/latest/ug/dml-queries-functions-operators.html
+    """
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._hive = Hive(**kwargs)
+        self._trino = Trino(**kwargs)
+    def tokenize(self, sql: str, **opts) -> t.List[Token]:
+        opts["hive"] = self._hive
+        opts["trino"] = self._trino
+        return super().tokenize(sql, **opts)
+    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
+        opts["hive"] = self._hive
+        opts["trino"] = self._trino
+        return super().parse(sql, **opts)
+    def parse_into(
+        self, expression_type: exp.IntoType, sql: str, **opts
+    ) -> t.List[t.Optional[exp.Expression]]:
+        opts["hive"] = self._hive
+        opts["trino"] = self._trino
+        return super().parse_into(expression_type, sql, **opts)
+    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
+        opts["hive"] = self._hive
+        opts["trino"] = self._trino
+        return super().generate(expression, copy=copy, **opts)
+    # This Tokenizer consumes a combination of HiveQL and Trino SQL and then processes the tokens
+    # to disambiguate which dialect needs to be actually used in order to tokenize correctly.
+    class Tokenizer(tokens.Tokenizer):
+        IDENTIFIERS = Trino.Tokenizer.IDENTIFIERS + Hive.Tokenizer.IDENTIFIERS
+        STRING_ESCAPES = Trino.Tokenizer.STRING_ESCAPES + Hive.Tokenizer.STRING_ESCAPES
+        HEX_STRINGS = Trino.Tokenizer.HEX_STRINGS + Hive.Tokenizer.HEX_STRINGS
+        UNICODE_STRINGS = Trino.Tokenizer.UNICODE_STRINGS + Hive.Tokenizer.UNICODE_STRINGS
+        NUMERIC_LITERALS = {
+            **Trino.Tokenizer.NUMERIC_LITERALS,
+            **Hive.Tokenizer.NUMERIC_LITERALS,
+        }
+        KEYWORDS = {
+            **Hive.Tokenizer.KEYWORDS,
+            **Trino.Tokenizer.KEYWORDS,
+            "UNLOAD": TokenType.COMMAND,
+        }
+        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
+            hive = kwargs.pop("hive", None) or Hive()
+            trino = kwargs.pop("trino", None) or Trino()
+            super().__init__(*args, **kwargs)
+            self._hive_tokenizer = hive.tokenizer(*args, **{**kwargs, "dialect": hive})
+            self._trino_tokenizer = _TrinoTokenizer(*args, **{**kwargs, "dialect": trino})
+        def tokenize(self, sql: str) -> t.List[Token]:
+            tokens = super().tokenize(sql)
+            if _tokenize_as_hive(tokens):
+                return [Token(TokenType.HIVE_TOKEN_STREAM, "")] + self._hive_tokenizer.tokenize(sql)
+            return self._trino_tokenizer.tokenize(sql)
+    class Parser(parser.Parser):
+        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
+            hive = kwargs.pop("hive", None) or Hive()
+            trino = kwargs.pop("trino", None) or Trino()
+            super().__init__(*args, **kwargs)
+            self._hive_parser = hive.parser(*args, **{**kwargs, "dialect": hive})
+            self._trino_parser = _TrinoParser(*args, **{**kwargs, "dialect": trino})
+        def parse(
+            self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
+        ) -> t.List[t.Optional[exp.Expression]]:
+            if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
+                return self._hive_parser.parse(raw_tokens[1:], sql)
+            return self._trino_parser.parse(raw_tokens, sql)
+        def parse_into(
+            self,
+            expression_types: exp.IntoType,
+            raw_tokens: t.List[Token],
+            sql: t.Optional[str] = None,
+        ) -> t.List[t.Optional[exp.Expression]]:
+            if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
+                return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
+            return self._trino_parser.parse_into(expression_types, raw_tokens, sql)
+    class Generator(generator.Generator):
+        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
+            hive = kwargs.pop("hive", None) or Hive()
+            trino = kwargs.pop("trino", None) or Trino()
+            super().__init__(*args, **kwargs)
+            self._hive_generator = _HiveGenerator(*args, **{**kwargs, "dialect": hive})
+            self._trino_generator = _TrinoGenerator(*args, **{**kwargs, "dialect": trino})
+        def generate(self, expression: exp.Expression, copy: bool = True) -> str:
+            if _generate_as_hive(expression):
+                generator = self._hive_generator
+            else:
+                generator = self._trino_generator
+            return generator.generate(expression, copy=copy)
+def _tokenize_as_hive(tokens: t.List[Token]) -> bool:
+    if len(tokens) < 2:
+        return False
+    first, second, *rest = tokens
+    first_type = first.token_type
+    first_text = first.text.upper()
+    second_type = second.token_type
+    second_text = second.text.upper()
+    if first_type in (TokenType.DESCRIBE, TokenType.SHOW) or first_text == "MSCK REPAIR":
+        return True
+    if first_type in (TokenType.ALTER, TokenType.CREATE, TokenType.DROP):
+        if second_text in ("DATABASE", "EXTERNAL", "SCHEMA"):
+            return True
+        if second_type == TokenType.VIEW:
+            return False
+        return all(t.token_type != TokenType.SELECT for t in rest)
+    return False
 def _generate_as_hive(expression: exp.Expression) -> bool:
     if isinstance(expression, exp.Create):
         if expression.kind == "TABLE":
-            properties: t.Optional[exp.Properties] = expression.args.get("properties")
+            properties = expression.args.get("properties")
+            # CREATE EXTERNAL TABLE is Hive
             if properties and properties.find(exp.ExternalProperty):
-                return True  # CREATE EXTERNAL TABLE is Hive
+                return True
+            # Any CREATE TABLE other than CREATE TABLE ... AS <query> is Hive
             if not isinstance(expression.expression, exp.Query):
-                return True  # any CREATE TABLE other than CREATE TABLE AS SELECT is Hive
+                return True
         else:
-            return expression.kind != "VIEW"  # CREATE VIEW is never Hive but CREATE SCHEMA etc is
-    # https://docs.aws.amazon.com/athena/latest/ug/ddl-reference.html
-    elif isinstance(expression, (exp.Alter, exp.Drop, exp.Describe)):
+            # CREATE VIEW is Trino, but CREATE SCHEMA, CREATE DATABASE, etc, is Hive
+            return expression.kind != "VIEW"
+    elif isinstance(expression, (exp.Alter, exp.Drop, exp.Describe, exp.Show)):
         if isinstance(expression, exp.Drop) and expression.kind == "VIEW":
-            # DROP VIEW is Trino (I guess because CREATE VIEW is)
+            # DROP VIEW is Trino, because CREATE VIEW is as well
             return False
-        # Everything else is Hive
+        # Everything else, e.g., ALTER statements, is Hive
         return True
     return False
 def _is_iceberg_table(properties: exp.Properties) -> bool:
-    table_type_property = next(
-        (
-            p
-            for p in properties.expressions
-            if isinstance(p, exp.Property) and p.name == "table_type"
-        ),
-        None,
-    )
-    return bool(table_type_property and table_type_property.text("value").lower() == "iceberg")
+    for p in properties.expressions:
+        if isinstance(p, exp.Property) and p.name == "table_type":
+            return p.text("value").lower() == "iceberg"
+    return False
 def _location_property_sql(self: Athena.Generator, e: exp.LocationProperty):
@@ -64,6 +236,7 @@ def _partitioned_by_property_sql(self: Athena.Generator, e: exp.PartitionedByPro
     # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
     prop_name = "partitioned_by"
     if isinstance(e.parent, exp.Properties):
         if _is_iceberg_table(e.parent):
             prop_name = "partitioning"
@@ -71,97 +244,45 @@ def _partitioned_by_property_sql(self: Athena.Generator, e: exp.PartitionedByPro
     return f"{prop_name}={self.sql(e, 'this')}"
-class Athena(Trino):
-    """
-    Over the years, it looks like AWS has taken various execution engines, bolted on AWS-specific modifications and then
-    built the Athena service around them.
-    Thus, Athena is not simply hosted Trino, it's more like a router that routes SQL queries to an execution engine depending
-    on the query type.
-    As at 2024-09-10, assuming your Athena workgroup is configured to use "Athena engine version 3", the following engines exist:
-    Hive:
-     - Accepts mostly the same syntax as Hadoop / Hive
-     - Uses backticks to quote identifiers
-     - Has a distinctive DDL syntax (around things like setting table properties, storage locations etc) that is different from Trino
-     - Used for *most* DDL, with some exceptions that get routed to the Trino engine instead:
-        - CREATE [EXTERNAL] TABLE (without AS SELECT)
-        - ALTER
-        - DROP
-    Trino:
-      - Uses double quotes to quote identifiers
-      - Used for DDL operations that involve SELECT queries, eg:
-        - CREATE VIEW / DROP VIEW
-        - CREATE TABLE... AS SELECT
-      - Used for DML operations
-        - SELECT, INSERT, UPDATE, DELETE, MERGE
-    The SQLGlot Athena dialect tries to identify which engine a query would be routed to and then uses the parser / generator for that engine
-    rather than trying to create a universal syntax that can handle both types.
-    """
-    class Tokenizer(Trino.Tokenizer):
-        """
-        The Tokenizer is flexible enough to tokenize queries across both the Hive and Trino engines
-        """
-        IDENTIFIERS = ['"', "`"]
-        STRING_ESCAPES = ["'", "\\"]
-        KEYWORDS = {
-            **Hive.Tokenizer.KEYWORDS,
-            **Trino.Tokenizer.KEYWORDS,
-            "UNLOAD": TokenType.COMMAND,
-        }
-    class Parser(Trino.Parser):
-        """
-        Parse queries for the Athena Trino execution engine
-        """
-        STATEMENT_PARSERS = {
-            **Trino.Parser.STATEMENT_PARSERS,
-            TokenType.USING: lambda self: self._parse_as_command(self._prev),
-        }
-    class _HiveGenerator(Hive.Generator):
-        def alter_sql(self, expression: exp.Alter) -> str:
-            # package any ALTER TABLE ADD actions into a Schema object
-            # so it gets generated as `ALTER TABLE .. ADD COLUMNS(...)`
-            # instead of `ALTER TABLE ... ADD COLUMN` which is invalid syntax on Athena
-            if isinstance(expression, exp.Alter) and expression.kind == "TABLE":
-                if expression.actions and isinstance(expression.actions[0], exp.ColumnDef):
-                    new_actions = exp.Schema(expressions=expression.actions)
-                    expression.set("actions", [new_actions])
-            return super().alter_sql(expression)
-    class Generator(Trino.Generator):
-        """
-        Generate queries for the Athena Trino execution engine
-        """
-        PROPERTIES_LOCATION = {
-            **Trino.Generator.PROPERTIES_LOCATION,
-            exp.LocationProperty: exp.Properties.Location.POST_WITH,
-        }
-        TRANSFORMS = {
-            **Trino.Generator.TRANSFORMS,
-            exp.PartitionedByProperty: _partitioned_by_property_sql,
-            exp.LocationProperty: _location_property_sql,
-        }
-        def __init__(self, *args, **kwargs):
-            super().__init__(*args, **kwargs)
-            hive_kwargs = {**kwargs, "dialect": "hive"}
-            self._hive_generator = Athena._HiveGenerator(*args, **hive_kwargs)
-        def generate(self, expression: exp.Expression, copy: bool = True) -> str:
-            if _generate_as_hive(expression):
-                return self._hive_generator.generate(expression, copy)
-            return super().generate(expression, copy)
+# Athena extensions to Hive's generator
+class _HiveGenerator(Hive.Generator):
+    def alter_sql(self, expression: exp.Alter) -> str:
+        # Package any ALTER TABLE ADD actions into a Schema object, so it gets generated as
+        # `ALTER TABLE .. ADD COLUMNS(...)`, instead of `ALTER TABLE ... ADD COLUMN`, which
+        # is invalid syntax on Athena
+        if isinstance(expression, exp.Alter) and expression.kind == "TABLE":
+            if expression.actions and isinstance(expression.actions[0], exp.ColumnDef):
+                new_actions = exp.Schema(expressions=expression.actions)
+                expression.set("actions", [new_actions])
+        return super().alter_sql(expression)
+# Athena extensions to Trino's tokenizer
+class _TrinoTokenizer(Trino.Tokenizer):
+    KEYWORDS = {
+        **Trino.Tokenizer.KEYWORDS,
+        "UNLOAD": TokenType.COMMAND,
+    }
+# Athena extensions to Trino's parser
+class _TrinoParser(Trino.Parser):
+    STATEMENT_PARSERS = {
+        **Trino.Parser.STATEMENT_PARSERS,
+        TokenType.USING: lambda self: self._parse_as_command(self._prev),
+    }
+# Athena extensions to Trino's generator
+class _TrinoGenerator(Trino.Generator):
+    PROPERTIES_LOCATION = {
+        **Trino.Generator.PROPERTIES_LOCATION,
+        exp.LocationProperty: exp.Properties.Location.POST_WITH,
+    }
+    TRANSFORMS = {
+        **Trino.Generator.TRANSFORMS,
+        exp.PartitionedByProperty: _partitioned_by_property_sql,
+        exp.LocationProperty: _location_property_sql,
+    }

sqlglot/dialects/bigquery.py CHANGED Viewed

@@ -30,6 +30,7 @@ from sqlglot.dialects.dialect import (
     unit_to_var,
     strposition_sql,
     groupconcat_sql,
+    space_sql,
 )
 from sqlglot.helper import seq_get, split_num_words
 from sqlglot.tokens import TokenType
@@ -444,6 +445,7 @@ class BigQuery(Dialect):
                 exp.Substring,
             )
         },
+        exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
         exp.Concat: _annotate_concat,
         exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
         exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True),
@@ -543,7 +545,7 @@ class BigQuery(Dialect):
             "DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
             "DATE_SUB": build_date_delta_with_interval(exp.DateSub),
             "DATE_TRUNC": lambda args: exp.DateTrunc(
-                unit=exp.Literal.string(str(seq_get(args, 1))),
+                unit=seq_get(args, 1),
                 this=seq_get(args, 0),
                 zone=seq_get(args, 2),
             ),
@@ -963,9 +965,6 @@ class BigQuery(Dialect):
             exp.DateSub: date_add_interval_sql("DATE", "SUB"),
             exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"),
             exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"),
-            exp.DateTrunc: lambda self, e: self.func(
-                "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone")
-            ),
             exp.FromTimeZone: lambda self, e: self.func(
                 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'"
             ),
@@ -1014,6 +1013,7 @@ class BigQuery(Dialect):
             ),
             exp.SHA: rename_func("SHA1"),
             exp.SHA2: sha256_sql,
+            exp.Space: space_sql,
             exp.StabilityProperty: lambda self, e: (
                 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
             ),
@@ -1195,6 +1195,11 @@ class BigQuery(Dialect):
             "within",
         }
+        def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
+            unit = expression.unit
+            unit_sql = unit.name if unit.is_string else self.sql(unit)
+            return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone"))
         def mod_sql(self, expression: exp.Mod) -> str:
             this = expression.this
             expr = expression.expression

sqlglot/dialects/clickhouse.py CHANGED Viewed

@@ -303,6 +303,8 @@ class ClickHouse(Dialect):
             **parser.Parser.FUNCTIONS,
             "ANY": exp.AnyValue.from_arg_list,
             "ARRAYSUM": exp.ArraySum.from_arg_list,
+            "ARRAYREVERSE": exp.ArrayReverse.from_arg_list,
+            "ARRAYSLICE": exp.ArraySlice.from_arg_list,
             "COUNTIF": _build_count_if,
             "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None),
             "DATEADD": build_date_delta(exp.DateAdd, default_unit=None),
@@ -330,6 +332,7 @@ class ClickHouse(Dialect):
             "MD5": exp.MD5Digest.from_arg_list,
             "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
             "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
+            "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list,  # alias for camel-case substringIndex
             "EDITDISTANCE": exp.Levenshtein.from_arg_list,
             "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
         }
@@ -1065,6 +1068,8 @@ class ClickHouse(Dialect):
             exp.ArrayConcat: rename_func("arrayConcat"),
             exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this),
             exp.ArrayRemove: remove_from_array_using_filter,
+            exp.ArrayReverse: rename_func("arrayReverse"),
+            exp.ArraySlice: rename_func("arraySlice"),
             exp.ArraySum: rename_func("arraySum"),
             exp.ArgMax: arg_max_or_min_no_count("argMax"),
             exp.ArgMin: arg_max_or_min_no_count("argMin"),

sqlglot/dialects/databricks.py CHANGED Viewed

@@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
     build_date_delta,
     timestamptrunc_sql,
     build_formatted_time,
+    groupconcat_sql,
 )
 from sqlglot.dialects.spark import Spark
 from sqlglot.tokens import TokenType
@@ -87,6 +88,7 @@ class Databricks(Spark):
                 e.this,
             ),
             exp.DatetimeTrunc: timestamptrunc_sql(),
+            exp.GroupConcat: groupconcat_sql,
             exp.Select: transforms.preprocess(
                 [
                     transforms.eliminate_distinct_on,

sqlglot/dialects/dialect.py CHANGED Viewed

@@ -73,6 +73,7 @@ class Dialects(str, Enum):
     CLICKHOUSE = "clickhouse"
     DATABRICKS = "databricks"
     DORIS = "doris"
+    DREMIO = "dremio"
     DRILL = "drill"
     DRUID = "druid"
     DUCKDB = "duckdb"
@@ -96,6 +97,7 @@ class Dialects(str, Enum):
     TERADATA = "teradata"
     TRINO = "trino"
     TSQL = "tsql"
+    EXASOL = "exasol"
 class NormalizationStrategy(str, AutoName):
@@ -700,6 +702,9 @@ class Dialect(metaclass=_Dialect):
             exp.TimeAdd,
             exp.TimeSub,
         },
+        exp.DataType.Type.TIMESTAMPTZ: {
+            exp.CurrentTimestampLTZ,
+        },
         exp.DataType.Type.TIMESTAMP: {
             exp.CurrentTimestamp,
             exp.StrToTime,
@@ -755,6 +760,12 @@ class Dialect(metaclass=_Dialect):
         exp.Array: lambda self, e: self._annotate_by_args(e, "expressions", array=True),
         exp.ArrayAgg: lambda self, e: self._annotate_by_args(e, "this", array=True),
         exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
+        exp.ArrayConcatAgg: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.ArrayToString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TEXT),
+        exp.ArrayFirst: lambda self, e: self._annotate_by_array_element(e),
+        exp.ArrayLast: lambda self, e: self._annotate_by_array_element(e),
+        exp.ArrayReverse: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.ArraySlice: lambda self, e: self._annotate_by_args(e, "this"),
         exp.Bracket: lambda self, e: self._annotate_bracket(e),
         exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
         exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"),
@@ -1024,22 +1035,20 @@ class Dialect(metaclass=_Dialect):
             for expression in self.parse(sql)
         ]
-    def tokenize(self, sql: str) -> t.List[Token]:
-        return self.tokenizer.tokenize(sql)
+    def tokenize(self, sql: str, **opts) -> t.List[Token]:
+        return self.tokenizer(**opts).tokenize(sql)
-    @property
-    def tokenizer(self) -> Tokenizer:
-        return self.tokenizer_class(dialect=self)
+    def tokenizer(self, **opts) -> Tokenizer:
+        return self.tokenizer_class(**{"dialect": self, **opts})
-    @property
-    def jsonpath_tokenizer(self) -> JSONPathTokenizer:
-        return self.jsonpath_tokenizer_class(dialect=self)
+    def jsonpath_tokenizer(self, **opts) -> JSONPathTokenizer:
+        return self.jsonpath_tokenizer_class(**{"dialect": self, **opts})
     def parser(self, **opts) -> Parser:
-        return self.parser_class(dialect=self, **opts)
+        return self.parser_class(**{"dialect": self, **opts})
     def generator(self, **opts) -> Generator:
-        return self.generator_class(dialect=self, **opts)
+        return self.generator_class(**{"dialect": self, **opts})
     def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]:
         return [
@@ -1906,21 +1915,32 @@ def groupconcat_sql(
 def build_timetostr_or_tochar(args: t.List, dialect: Dialect) -> exp.TimeToStr | exp.ToChar:
-    this = seq_get(args, 0)
-    format = seq_get(args, 1)
-    if this:
+    if len(args) == 2:
+        this = args[0]
         if not this.type:
             from sqlglot.optimizer.annotate_types import annotate_types
             annotate_types(this, dialect=dialect)
-        from sqlglot.dialects import Snowflake
-        if this.is_type(*exp.DataType.TEMPORAL_TYPES) or (
-            isinstance(format, exp.Literal) and format.name in Snowflake.TIME_MAPPING
-        ):
+        if this.is_type(*exp.DataType.TEMPORAL_TYPES):
             dialect_name = dialect.__class__.__name__.lower()
             return build_formatted_time(exp.TimeToStr, dialect_name, default=True)(args)
     return exp.ToChar.from_arg_list(args)
+def build_replace_with_optional_replacement(args: t.List) -> exp.Replace:
+    return exp.Replace(
+        this=seq_get(args, 0),
+        expression=seq_get(args, 1),
+        replacement=seq_get(args, 2) or exp.Literal.string(""),
+    )
+def space_sql(self: Generator, expression: exp.Space) -> str:
+    return self.sql(
+        exp.Repeat(
+            this=exp.Literal.string(" "),
+            times=expression.this,
+        )
+    )

sqlglot/dialects/dremio.py ADDED Viewed

@@ -0,0 +1,53 @@
+from sqlglot import expressions as exp
+from sqlglot import parser, generator, tokens
+from sqlglot.dialects.dialect import Dialect
+class Dremio(Dialect):
+    SUPPORTS_USER_DEFINED_TYPES = False
+    CONCAT_COALESCE = True
+    TYPED_DIVISION = True
+    SUPPORTS_SEMI_ANTI_JOIN = False
+    NULL_ORDERING = "nulls_are_last"
+    SUPPORTS_VALUES_DEFAULT = False
+    class Parser(parser.Parser):
+        LOG_DEFAULTS_TO_LN = True
+    class Generator(generator.Generator):
+        NVL2_SUPPORTED = False
+        SUPPORTS_CONVERT_TIMEZONE = True
+        INTERVAL_ALLOWS_PLURAL_FORM = False
+        JOIN_HINTS = False
+        LIMIT_ONLY_LITERALS = True
+        MULTI_ARG_DISTINCT = False
+        # https://docs.dremio.com/current/reference/sql/data-types/
+        TYPE_MAPPING = {
+            **generator.Generator.TYPE_MAPPING,
+            exp.DataType.Type.SMALLINT: "INT",
+            exp.DataType.Type.TINYINT: "INT",
+            exp.DataType.Type.BINARY: "VARBINARY",
+            exp.DataType.Type.TEXT: "VARCHAR",
+            exp.DataType.Type.NCHAR: "VARCHAR",
+            exp.DataType.Type.CHAR: "VARCHAR",
+            exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP",
+            exp.DataType.Type.DATETIME: "TIMESTAMP",
+            exp.DataType.Type.ARRAY: "LIST",
+            exp.DataType.Type.BIT: "BOOLEAN",
+        }
+        def datatype_sql(self, expression: exp.DataType) -> str:
+            """
+            Reject time-zone–aware TIMESTAMPs, which Dremio does not accept
+            """
+            if expression.is_type(
+                exp.DataType.Type.TIMESTAMPTZ,
+                exp.DataType.Type.TIMESTAMPLTZ,
+            ):
+                self.unsupported("Dremio does not support time-zone-aware TIMESTAMP")
+            return super().datatype_sql(expression)
+    class Tokenizer(tokens.Tokenizer):
+        COMMENTS = ["--", "//", ("/*", "*/")]

sqlglot 26.30.0__py3-none-any.whl → 26.32.0__py3-none-any.whl

sqlglot 26.30.0py3-none-any.whl → 26.32.0py3-none-any.whl