PyPI - sqlglot - Versions diffs - 27.29.0__py3-none-any.whl → 28.4.1__py3-none-any.whl - Mend

sqlglot 27.29.0py3-none-any.whl → 28.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

sqlglot/__main__.py +6 -4
sqlglot/_version.py +2 -2
sqlglot/dialects/bigquery.py +116 -295
sqlglot/dialects/clickhouse.py +67 -2
sqlglot/dialects/databricks.py +38 -1
sqlglot/dialects/dialect.py +327 -286
sqlglot/dialects/dremio.py +4 -1
sqlglot/dialects/duckdb.py +718 -22
sqlglot/dialects/exasol.py +243 -10
sqlglot/dialects/hive.py +8 -8
sqlglot/dialects/mysql.py +11 -2
sqlglot/dialects/oracle.py +29 -0
sqlglot/dialects/postgres.py +46 -24
sqlglot/dialects/presto.py +47 -16
sqlglot/dialects/redshift.py +16 -0
sqlglot/dialects/risingwave.py +3 -0
sqlglot/dialects/singlestore.py +12 -3
sqlglot/dialects/snowflake.py +199 -271
sqlglot/dialects/spark.py +2 -2
sqlglot/dialects/spark2.py +11 -48
sqlglot/dialects/sqlite.py +9 -0
sqlglot/dialects/teradata.py +5 -8
sqlglot/dialects/trino.py +6 -0
sqlglot/dialects/tsql.py +61 -25
sqlglot/diff.py +4 -2
sqlglot/errors.py +69 -0
sqlglot/expressions.py +484 -84
sqlglot/generator.py +143 -41
sqlglot/helper.py +2 -2
sqlglot/optimizer/annotate_types.py +247 -140
sqlglot/optimizer/canonicalize.py +6 -1
sqlglot/optimizer/eliminate_joins.py +1 -1
sqlglot/optimizer/eliminate_subqueries.py +2 -2
sqlglot/optimizer/merge_subqueries.py +5 -5
sqlglot/optimizer/normalize.py +20 -13
sqlglot/optimizer/normalize_identifiers.py +17 -3
sqlglot/optimizer/optimizer.py +4 -0
sqlglot/optimizer/pushdown_predicates.py +1 -1
sqlglot/optimizer/qualify.py +14 -6
sqlglot/optimizer/qualify_columns.py +113 -352
sqlglot/optimizer/qualify_tables.py +112 -70
sqlglot/optimizer/resolver.py +374 -0
sqlglot/optimizer/scope.py +27 -16
sqlglot/optimizer/simplify.py +1074 -964
sqlglot/optimizer/unnest_subqueries.py +12 -2
sqlglot/parser.py +276 -160
sqlglot/planner.py +2 -2
sqlglot/schema.py +15 -4
sqlglot/tokens.py +42 -7
sqlglot/transforms.py +77 -22
sqlglot/typing/__init__.py +316 -0
sqlglot/typing/bigquery.py +376 -0
sqlglot/typing/hive.py +12 -0
sqlglot/typing/presto.py +24 -0
sqlglot/typing/snowflake.py +505 -0
sqlglot/typing/spark2.py +58 -0
sqlglot/typing/tsql.py +9 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/METADATA +2 -2
sqlglot-28.4.1.dist-info/RECORD +92 -0
sqlglot-27.29.0.dist-info/RECORD +0 -84
{sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/WHEEL +0 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/licenses/LICENSE +0 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/top_level.txt +0 -0

sqlglot/dialects/spark.py CHANGED Viewed

@@ -4,7 +4,6 @@ import typing as t
 from sqlglot import exp
 from sqlglot.dialects.dialect import (
-    Version,
     rename_func,
     build_like,
     unit_to_var,
@@ -100,7 +99,7 @@ def _dateadd_sql(self: Spark.Generator, expression: exp.TsOrDsAdd | exp.Timestam
 def _groupconcat_sql(self: Spark.Generator, expression: exp.GroupConcat) -> str:
-    if self.dialect.version < Version("4.0.0"):
+    if self.dialect.version < (4,):
         expr = exp.ArrayToString(
             this=exp.ArrayAgg(this=expression.this),
             expression=expression.args.get("separator") or exp.Literal.string(""),
@@ -112,6 +111,7 @@ def _groupconcat_sql(self: Spark.Generator, expression: exp.GroupConcat) -> str:
 class Spark(Spark2):
     SUPPORTS_ORDER_BY_ALL = True
+    SUPPORTS_NULL_TYPE = True
     class Tokenizer(Spark2.Tokenizer):
         STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS = False

sqlglot/dialects/spark2.py CHANGED Viewed

@@ -13,7 +13,7 @@ from sqlglot.dialects.dialect import (
     unit_to_str,
 )
 from sqlglot.dialects.hive import Hive
-from sqlglot.helper import seq_get, ensure_list
+from sqlglot.helper import seq_get
 from sqlglot.tokens import TokenType
 from sqlglot.transforms import (
     preprocess,
@@ -21,11 +21,7 @@ from sqlglot.transforms import (
     ctas_with_tmp_tables_to_create_tmp_view,
     move_schema_columns_to_partitioned_by,
 )
-if t.TYPE_CHECKING:
-    from sqlglot._typing import E
-    from sqlglot.optimizer.annotate_types import TypeAnnotator
+from sqlglot.typing.spark2 import EXPRESSION_METADATA
 def _map_sql(self: Spark2.Generator, expression: exp.Map) -> str:
@@ -118,51 +114,15 @@ def temporary_storage_provider(expression: exp.Expression) -> exp.Expression:
     return expression
-def _annotate_by_similar_args(
-    self: TypeAnnotator, expression: E, *args: str, target_type: exp.DataType | exp.DataType.Type
-) -> E:
-    """
-    Infers the type of the expression according to the following rules:
-    - If all args are of the same type OR any arg is of target_type, the expr is inferred as such
-    - If any arg is of UNKNOWN type and none of target_type, the expr is inferred as UNKNOWN
-    """
-    self._annotate_args(expression)
-    expressions: t.List[exp.Expression] = []
-    for arg in args:
-        arg_expr = expression.args.get(arg)
-        expressions.extend(expr for expr in ensure_list(arg_expr) if expr)
-    last_datatype = None
-    has_unknown = False
-    for expr in expressions:
-        if expr.is_type(exp.DataType.Type.UNKNOWN):
-            has_unknown = True
-        elif expr.is_type(target_type):
-            has_unknown = False
-            last_datatype = target_type
-            break
-        else:
-            last_datatype = expr.type
-    self._set_type(expression, exp.DataType.Type.UNKNOWN if has_unknown else last_datatype)
-    return expression
 class Spark2(Hive):
     ALTER_TABLE_SUPPORTS_CASCADE = False
-    ANNOTATORS = {
-        **Hive.ANNOTATORS,
-        exp.Substring: lambda self, e: self._annotate_by_args(e, "this"),
-        exp.Concat: lambda self, e: _annotate_by_similar_args(
-            self, e, "expressions", target_type=exp.DataType.Type.TEXT
-        ),
-        exp.Pad: lambda self, e: _annotate_by_similar_args(
-            self, e, "this", "fill_pattern", target_type=exp.DataType.Type.TEXT
-        ),
-    }
+    EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
+    # https://spark.apache.org/docs/latest/api/sql/index.html#initcap
+    # https://docs.databricks.com/aws/en/sql/language-manual/functions/initcap
+    # https://github.com/apache/spark/blob/master/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java#L859-L905
+    INITCAP_DEFAULT_DELIMITER_CHARS = " "
     class Tokenizer(Hive.Tokenizer):
         HEX_STRINGS = [("X'", "'"), ("x'", "'")]
@@ -322,6 +282,9 @@ class Spark2(Hive):
                     transforms.any_to_exists,
                 ]
             ),
+            exp.SHA2Digest: lambda self, e: self.func(
+                "SHA2", e.this, e.args.get("length") or exp.Literal.number(256)
+            ),
             exp.StrToDate: _str_to_date,
             exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)),
             exp.TimestampTrunc: lambda self, e: self.func("DATE_TRUNC", unit_to_str(e), e.this),

sqlglot/dialects/sqlite.py CHANGED Viewed

@@ -18,6 +18,7 @@ from sqlglot.dialects.dialect import (
     strposition_sql,
 )
 from sqlglot.generator import unsupported_args
+from sqlglot.parser import binary_range_parser
 from sqlglot.tokens import TokenType
@@ -101,6 +102,8 @@ class SQLite(Dialect):
             **tokens.Tokenizer.KEYWORDS,
             "ATTACH": TokenType.ATTACH,
             "DETACH": TokenType.DETACH,
+            "INDEXED BY": TokenType.INDEXED_BY,
+            "MATCH": TokenType.MATCH,
         }
         KEYWORDS.pop("/*+")
@@ -127,6 +130,12 @@ class SQLite(Dialect):
             TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False),
         }
+        RANGE_PARSERS = {
+            **parser.Parser.RANGE_PARSERS,
+            # https://www.sqlite.org/lang_expr.html
+            TokenType.MATCH: binary_range_parser(exp.Match),
+        }
         def _parse_unique(self) -> exp.UniqueColumnConstraint:
             # Do not consume more tokens if UNIQUE is used as a standalone constraint, e.g:
             # CREATE TABLE foo (bar TEXT UNIQUE REFERENCES baz ...)

sqlglot/dialects/teradata.py CHANGED Viewed

@@ -213,13 +213,10 @@ class Teradata(Dialect):
         def _parse_update(self) -> exp.Update:
             return self.expression(
                 exp.Update,
-                **{  # type: ignore
-                    "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
-                    "from": self._parse_from(joins=True),
-                    "expressions": self._match(TokenType.SET)
-                    and self._parse_csv(self._parse_equality),
-                    "where": self._parse_where(),
-                },
+                this=self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
+                from_=self._parse_from(joins=True),
+                expressions=self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
+                where=self._parse_where(),
             )
         def _parse_rangen(self):
@@ -387,7 +384,7 @@ class Teradata(Dialect):
         # https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/Teradata-VantageTM-SQL-Data-Manipulation-Language-17.20/Statement-Syntax/UPDATE/UPDATE-Syntax-Basic-Form-FROM-Clause
         def update_sql(self, expression: exp.Update) -> str:
             this = self.sql(expression, "this")
-            from_sql = self.sql(expression, "from")
+            from_sql = self.sql(expression, "from_")
             set_sql = self.expressions(expression, flat=True)
             where_sql = self.sql(expression, "where")
             sql = f"UPDATE {this}{from_sql} SET {set_sql}{where_sql}"

sqlglot/dialects/trino.py CHANGED Viewed

@@ -16,6 +16,12 @@ class Trino(Presto):
     SUPPORTS_USER_DEFINED_TYPES = False
     LOG_BASE_FIRST = True
+    class Tokenizer(Presto.Tokenizer):
+        KEYWORDS = {
+            **Presto.Tokenizer.KEYWORDS,
+            "REFRESH": TokenType.REFRESH,
+        }
     class Parser(Presto.Parser):
         FUNCTION_PARSERS = {
             **Presto.Parser.FUNCTION_PARSERS,

sqlglot/dialects/tsql.py CHANGED Viewed

@@ -20,11 +20,13 @@ from sqlglot.dialects.dialect import (
     strposition_sql,
     timestrtotime_sql,
     trim_sql,
+    map_date_part,
 )
 from sqlglot.helper import seq_get
 from sqlglot.parser import build_coalesce
 from sqlglot.time import format_time
 from sqlglot.tokens import TokenType
+from sqlglot.typing.tsql import EXPRESSION_METADATA
 if t.TYPE_CHECKING:
     from sqlglot._typing import E
@@ -56,6 +58,11 @@ DATE_DELTA_INTERVAL = {
     "d": "day",
 }
+DATE_PART_UNMAPPING = {
+    "WEEKISO": "ISO_WEEK",
+    "DAYOFWEEK": "WEEKDAY",
+    "TIMEZONE_MINUTE": "TZOFFSET",
+}
 DATE_FMT_RE = re.compile("([dD]{1,2})|([mM]{1,2})|([yY]{1,4})|([hH]{1,2})|([sS]{1,2})")
@@ -200,20 +207,12 @@ def _build_hashbytes(args: t.List) -> exp.Expression:
     return exp.func("HASHBYTES", *args)
-DATEPART_ONLY_FORMATS = {"DW", "WK", "HOUR", "QUARTER", "ISO_WEEK"}
 def _format_sql(self: TSQL.Generator, expression: exp.NumberToStr | exp.TimeToStr) -> str:
     fmt = expression.args["format"]
     if not isinstance(expression, exp.NumberToStr):
         if fmt.is_string:
             mapped_fmt = format_time(fmt.name, TSQL.INVERSE_TIME_MAPPING)
-            name = (mapped_fmt or "").upper()
-            if name in DATEPART_ONLY_FORMATS:
-                return self.func("DATEPART", name, expression.this)
             fmt_sql = self.sql(exp.Literal.string(mapped_fmt))
         else:
             fmt_sql = self.format_time(expression) or self.sql(fmt)
@@ -243,7 +242,7 @@ def _string_agg_sql(self: TSQL.Generator, expression: exp.GroupConcat) -> str:
 def _build_date_delta(
-    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None
+    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None, big_int: bool = False
 ) -> t.Callable[[t.List], E]:
     def _builder(args: t.List) -> E:
         unit = seq_get(args, 0)
@@ -259,12 +258,15 @@ def _build_date_delta(
             else:
                 # We currently don't handle float values, i.e. they're not converted to equivalent DATETIMEs.
                 # This is not a problem when generating T-SQL code, it is when transpiling to other dialects.
-                return exp_class(this=seq_get(args, 2), expression=start_date, unit=unit)
+                return exp_class(
+                    this=seq_get(args, 2), expression=start_date, unit=unit, big_int=big_int
+                )
         return exp_class(
             this=exp.TimeStrToTime(this=seq_get(args, 2)),
             expression=exp.TimeStrToTime(this=start_date),
             unit=unit,
+            big_int=big_int,
         )
     return _builder
@@ -412,9 +414,22 @@ class TSQL(Dialect):
     TIME_FORMAT = "'yyyy-mm-dd hh:mm:ss'"
-    ANNOTATORS = {
-        **Dialect.ANNOTATORS,
-        exp.Radians: lambda self, e: self._annotate_by_args(e, "this"),
+    EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
+    DATE_PART_MAPPING = {
+        **Dialect.DATE_PART_MAPPING,
+        "QQ": "QUARTER",
+        "M": "MONTH",
+        "Y": "DAYOFYEAR",
+        "WW": "WEEK",
+        "N": "MINUTE",
+        "SS": "SECOND",
+        "MCS": "MICROSECOND",
+        "TZOFFSET": "TIMEZONE_MINUTE",
+        "TZ": "TIMEZONE_MINUTE",
+        "ISO_WEEK": "WEEKISO",
+        "ISOWK": "WEEKISO",
+        "ISOWW": "WEEKISO",
     }
     TIME_MAPPING = {
@@ -426,9 +441,9 @@ class TSQL(Dialect):
         "week": "%W",
         "ww": "%W",
         "wk": "%W",
-        "isowk": "%IW",
-        "isoww": "%IW",
-        "iso_week": "%IW",
+        "isowk": "%V",
+        "isoww": "%V",
+        "iso_week": "%V",
         "hour": "%h",
         "hh": "%I",
         "minute": "%M",
@@ -574,7 +589,7 @@ class TSQL(Dialect):
         QUERY_MODIFIER_PARSERS = {
             **parser.Parser.QUERY_MODIFIER_PARSERS,
             TokenType.OPTION: lambda self: ("options", self._parse_options()),
-            TokenType.FOR: lambda self: ("for", self._parse_for()),
+            TokenType.FOR: lambda self: ("for_", self._parse_for()),
         }
         # T-SQL does not allow BEGIN to be used as an identifier
@@ -599,8 +614,10 @@ class TSQL(Dialect):
             ),
             "DATEADD": build_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL),
             "DATEDIFF": _build_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL),
+            "DATEDIFF_BIG": _build_date_delta(
+                exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL, big_int=True
+            ),
             "DATENAME": _build_formatted_time(exp.TimeToStr, full_format_mapping=True),
-            "DATEPART": _build_formatted_time(exp.TimeToStr),
             "DATETIMEFROMPARTS": _build_datetimefromparts,
             "EOMONTH": _build_eomonth,
             "FORMAT": _build_format,
@@ -666,6 +683,7 @@ class TSQL(Dialect):
                 order=self._parse_order(),
                 null_handling=self._parse_on_handling("NULL", "NULL", "ABSENT"),
             ),
+            "DATEPART": lambda self: self._parse_datepart(),
         }
         # The DCOLON (::) operator serves as a scope resolution (exp.ScopeResolution) operator in T-SQL
@@ -684,6 +702,13 @@ class TSQL(Dialect):
             "ts": exp.Timestamp,
         }
+        def _parse_datepart(self) -> exp.Extract:
+            this = self._parse_var()
+            expression = self._match(TokenType.COMMA) and self._parse_bitwise()
+            name = map_date_part(this, self.dialect)
+            return self.expression(exp.Extract, this=name, expression=expression)
         def _parse_alter_table_set(self) -> exp.AlterSet:
             return self._parse_wrapped(super()._parse_alter_table_set)
@@ -821,7 +846,6 @@ class TSQL(Dialect):
             args = [this, *self._parse_csv(self._parse_assignment)]
             convert = exp.Convert.from_arg_list(args)
             convert.set("safe", safe)
-            convert.set("strict", strict)
             return convert
         def _parse_column_def(
@@ -878,7 +902,7 @@ class TSQL(Dialect):
             this = super()._parse_id_var(any_token=any_token, tokens=tokens)
             if this:
                 if is_global:
-                    this.set("global", True)
+                    this.set("global_", True)
                 elif is_temporary:
                     this.set("temporary", True)
@@ -1033,15 +1057,14 @@ class TSQL(Dialect):
             exp.AnyValue: any_value_to_max_sql,
             exp.ArrayToString: rename_func("STRING_AGG"),
             exp.AutoIncrementColumnConstraint: lambda *_: "IDENTITY",
+            exp.Ceil: rename_func("CEILING"),
             exp.Chr: rename_func("CHAR"),
             exp.DateAdd: date_delta_sql("DATEADD"),
-            exp.DateDiff: date_delta_sql("DATEDIFF"),
             exp.CTE: transforms.preprocess([qualify_derived_table_outputs]),
             exp.CurrentDate: rename_func("GETDATE"),
             exp.CurrentTimestamp: rename_func("GETDATE"),
             exp.CurrentTimestampLTZ: rename_func("SYSDATETIMEOFFSET"),
             exp.DateStrToDate: datestrtodate_sql,
-            exp.Extract: rename_func("DATEPART"),
             exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql,
             exp.GroupConcat: _string_agg_sql,
             exp.If: rename_func("IIF"),
@@ -1069,6 +1092,9 @@ class TSQL(Dialect):
             ),
             exp.Subquery: transforms.preprocess([qualify_derived_table_outputs]),
             exp.SHA: lambda self, e: self.func("HASHBYTES", exp.Literal.string("SHA1"), e.this),
+            exp.SHA1Digest: lambda self, e: self.func(
+                "HASHBYTES", exp.Literal.string("SHA1"), e.this
+            ),
             exp.SHA2: lambda self, e: self.func(
                 "HASHBYTES", exp.Literal.string(f"SHA2_{e.args.get('length', 256)}"), e.this
             ),
@@ -1163,6 +1189,12 @@ class TSQL(Dialect):
                 "PARSENAME", this, exp.Literal.number(split_count + 1 - part_index.to_py())
             )
+        def extract_sql(self, expression: exp.Extract) -> str:
+            part = expression.this
+            name = DATE_PART_UNMAPPING.get(part.name.upper()) or part
+            return self.func("DATEPART", name, expression.expression)
         def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
             nano = expression.args.get("nano")
             if nano is not None:
@@ -1238,12 +1270,12 @@ class TSQL(Dialect):
             if kind == "VIEW":
                 expression.this.set("catalog", None)
-                with_ = expression.args.get("with")
+                with_ = expression.args.get("with_")
                 if ctas_expression and with_:
                     # We've already preprocessed the Create expression to bubble up any nested CTEs,
                     # but CREATE VIEW actually requires the WITH clause to come after it so we need
                     # to amend the AST by moving the CTEs to the CREATE VIEW statement's query.
-                    ctas_expression.set("with", with_.pop())
+                    ctas_expression.set("with_", with_.pop())
             table = expression.find(exp.Table)
@@ -1301,6 +1333,10 @@ class TSQL(Dialect):
             func_name = "COUNT_BIG" if expression.args.get("big_int") else "COUNT"
             return rename_func(func_name)(self, expression)
+        def datediff_sql(self, expression: exp.DateDiff) -> str:
+            func_name = "DATEDIFF_BIG" if expression.args.get("big_int") else "DATEDIFF"
+            return date_delta_sql(func_name)(self, expression)
         def offset_sql(self, expression: exp.Offset) -> str:
             return f"{super().offset_sql(expression)} ROWS"
@@ -1355,7 +1391,7 @@ class TSQL(Dialect):
         def identifier_sql(self, expression: exp.Identifier) -> str:
             identifier = super().identifier_sql(expression)
-            if expression.args.get("global"):
+            if expression.args.get("global_"):
                 identifier = f"##{identifier}"
             elif expression.args.get("temporary"):
                 identifier = f"#{identifier}"

sqlglot/diff.py CHANGED Viewed

@@ -393,8 +393,10 @@ def _get_expression_leaves(expression: exp.Expression) -> t.Iterator[exp.Express
 def _get_non_expression_leaves(expression: exp.Expression) -> t.Iterator[t.Tuple[str, t.Any]]:
     for arg, value in expression.args.items():
-        if isinstance(value, exp.Expression) or (
-            isinstance(value, list) and isinstance(seq_get(value, 0), exp.Expression)
+        if (
+            value is None
+            or isinstance(value, exp.Expression)
+            or (isinstance(value, list) and isinstance(seq_get(value, 0), exp.Expression))
         ):
             continue

sqlglot/errors.py CHANGED Viewed

@@ -6,6 +6,12 @@ from enum import auto
 from sqlglot.helper import AutoName
+# ANSI escape codes for error formatting
+ANSI_UNDERLINE = "\033[4m"
+ANSI_RESET = "\033[0m"
+ERROR_MESSAGE_CONTEXT_DEFAULT = 100
 class ErrorLevel(AutoName):
     IGNORE = auto()
     """Ignore all errors."""
@@ -81,6 +87,69 @@ class ExecuteError(SqlglotError):
     pass
+def highlight_sql(
+    sql: str,
+    positions: t.List[t.Tuple[int, int]],
+    context_length: int = ERROR_MESSAGE_CONTEXT_DEFAULT,
+) -> t.Tuple[str, str, str, str]:
+    """
+    Highlight a SQL string using ANSI codes at the given positions.
+    Args:
+        sql: The complete SQL string.
+        positions: List of (start, end) tuples where both start and end are inclusive 0-based
+            indexes. For example, to highlight "foo" in "SELECT foo", use (7, 9).
+            The positions will be sorted and de-duplicated if they overlap.
+        context_length: Number of characters to show before the first highlight and after
+            the last highlight.
+    Returns:
+        A tuple of (formatted_sql, start_context, highlight, end_context) where:
+        - formatted_sql: The SQL with ANSI underline codes applied to highlighted sections
+        - start_context: Plain text before the first highlight
+        - highlight: Plain text from the first highlight start to the last highlight end,
+            including any non-highlighted text in between (no ANSI)
+        - end_context: Plain text after the last highlight
+    Note:
+        If positions is empty, raises a ValueError.
+    """
+    if not positions:
+        raise ValueError("positions must contain at least one (start, end) tuple")
+    start_context = ""
+    end_context = ""
+    first_highlight_start = 0
+    formatted_parts = []
+    previous_part_end = 0
+    sorted_positions = sorted(positions, key=lambda pos: pos[0])
+    if sorted_positions[0][0] > 0:
+        first_highlight_start = sorted_positions[0][0]
+        start_context = sql[max(0, first_highlight_start - context_length) : first_highlight_start]
+        formatted_parts.append(start_context)
+        previous_part_end = first_highlight_start
+    for start, end in sorted_positions:
+        highlight_start = max(start, previous_part_end)
+        highlight_end = end + 1
+        if highlight_start >= highlight_end:
+            continue  # Skip invalid or overlapping highlights
+        if highlight_start > previous_part_end:
+            formatted_parts.append(sql[previous_part_end:highlight_start])
+        formatted_parts.append(f"{ANSI_UNDERLINE}{sql[highlight_start:highlight_end]}{ANSI_RESET}")
+        previous_part_end = highlight_end
+    if previous_part_end < len(sql):
+        end_context = sql[previous_part_end : previous_part_end + context_length]
+        formatted_parts.append(end_context)
+    formatted_sql = "".join(formatted_parts)
+    highlight = sql[first_highlight_start:previous_part_end]
+    return formatted_sql, start_context, highlight, end_context
 def concat_messages(errors: t.Sequence[t.Any], maximum: int) -> str:
     msg = [str(e) for e in errors[:maximum]]
     remaining = len(errors) - maximum

sqlglot 27.29.0__py3-none-any.whl → 28.4.1__py3-none-any.whl

sqlglot 27.29.0py3-none-any.whl → 28.4.1py3-none-any.whl