PyPI - sqlglotc - Versions diffs - 28.10.1.dev130__tar.gz → 29.0.1__tar.gz - Mend

sqlglotc 28.10.1.dev130tar.gz → 29.0.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{sqlglotc-28.10.1.dev130/sqlglotc.egg-info → sqlglotc-29.0.1}/PKG-INFO RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.4
 Name: sqlglotc
-Version: 28.10.1.dev130
+Version: 29.0.1
 Summary: mypyc-compiled extensions for sqlglot
 Author-email: Toby Mao <toby.mao@gmail.com>
-License: MIT
+License-Expression: MIT
 Project-URL: Homepage, https://sqlglot.com/
 Project-URL: Repository, https://github.com/tobymao/sqlglot
 Requires-Python: >=3.9

{sqlglotc-28.10.1.dev130 → sqlglotc-29.0.1}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "sqlglotc"
 dynamic = ["version"]
 description = "mypyc-compiled extensions for sqlglot"
 authors = [{ name = "Toby Mao", email = "toby.mao@gmail.com" }]
-license = {text = "MIT"}
+license = "MIT"
 requires-python = ">= 3.9"
 [project.urls]
@@ -25,3 +25,7 @@ local_scheme = "no-local-version"
 [tool.mypy]
 # Allow mypyc to resolve sqlglot.* from the repo root (../sqlglot/) or sdist root (./sqlglot/).
 mypy_path = [".", ".."]
+[[tool.mypy.overrides]]
+module = "sqlglot._version"
+ignore_missing_imports = true

{sqlglotc-28.10.1.dev130 → sqlglotc-29.0.1}/setup.py RENAMED Viewed

@@ -10,7 +10,17 @@ sqlglot_src = os.path.join(here, "..", "sqlglot")
 from mypyc.build import mypycify
-SOURCE_FILES = ["expression_core.py", "helper.py", "trie.py", "tokenizer_core.py"]
+SOURCE_FILES = [
+    "errors.py",
+    "expression_core.py",
+    "helper.py",
+    "parser_core.py",
+    "schema.py",
+    "serde.py",
+    "time.py",
+    "tokenizer_core.py",
+    "trie.py",
+]
 def _source_paths():

sqlglotc-29.0.1/sqlglot/errors.py ADDED Viewed

@@ -0,0 +1,162 @@
+from __future__ import annotations
+import typing as t
+from enum import auto
+from sqlglot.helper import AutoName
+# ANSI escape codes for error formatting
+ANSI_UNDERLINE = "\033[4m"
+ANSI_RESET = "\033[0m"
+ERROR_MESSAGE_CONTEXT_DEFAULT = 100
+class ErrorLevel(AutoName):
+    IGNORE = auto()
+    """Ignore all errors."""
+    WARN = auto()
+    """Log all errors."""
+    RAISE = auto()
+    """Collect all errors and raise a single exception."""
+    IMMEDIATE = auto()
+    """Immediately raise an exception on the first error found."""
+class SqlglotError(Exception):
+    pass
+class UnsupportedError(SqlglotError):
+    pass
+class ParseError(SqlglotError):
+    def __init__(
+        self,
+        message: str,
+        errors: t.Optional[t.List[t.Dict[str, t.Any]]] = None,
+    ):
+        super().__init__(message)
+        self.errors = errors or []
+    @classmethod
+    def new(
+        cls,
+        message: str,
+        description: t.Optional[str] = None,
+        line: t.Optional[int] = None,
+        col: t.Optional[int] = None,
+        start_context: t.Optional[str] = None,
+        highlight: t.Optional[str] = None,
+        end_context: t.Optional[str] = None,
+        into_expression: t.Optional[str] = None,
+    ) -> ParseError:
+        return cls(
+            message,
+            [
+                {
+                    "description": description,
+                    "line": line,
+                    "col": col,
+                    "start_context": start_context,
+                    "highlight": highlight,
+                    "end_context": end_context,
+                    "into_expression": into_expression,
+                }
+            ],
+        )
+class TokenError(SqlglotError):
+    pass
+class OptimizeError(SqlglotError):
+    pass
+class SchemaError(SqlglotError):
+    pass
+class ExecuteError(SqlglotError):
+    pass
+def highlight_sql(
+    sql: str,
+    positions: t.List[t.Tuple[int, int]],
+    context_length: int = ERROR_MESSAGE_CONTEXT_DEFAULT,
+) -> t.Tuple[str, str, str, str]:
+    """
+    Highlight a SQL string using ANSI codes at the given positions.
+    Args:
+        sql: The complete SQL string.
+        positions: List of (start, end) tuples where both start and end are inclusive 0-based
+            indexes. For example, to highlight "foo" in "SELECT foo", use (7, 9).
+            The positions will be sorted and de-duplicated if they overlap.
+        context_length: Number of characters to show before the first highlight and after
+            the last highlight.
+    Returns:
+        A tuple of (formatted_sql, start_context, highlight, end_context) where:
+        - formatted_sql: The SQL with ANSI underline codes applied to highlighted sections
+        - start_context: Plain text before the first highlight
+        - highlight: Plain text from the first highlight start to the last highlight end,
+            including any non-highlighted text in between (no ANSI)
+        - end_context: Plain text after the last highlight
+    Note:
+        If positions is empty, raises a ValueError.
+    """
+    if not positions:
+        raise ValueError("positions must contain at least one (start, end) tuple")
+    start_context = ""
+    end_context = ""
+    first_highlight_start = 0
+    formatted_parts = []
+    previous_part_end = 0
+    sorted_positions = sorted(positions, key=lambda pos: pos[0])
+    if sorted_positions[0][0] > 0:
+        first_highlight_start = sorted_positions[0][0]
+        start_context = sql[max(0, first_highlight_start - context_length) : first_highlight_start]
+        formatted_parts.append(start_context)
+        previous_part_end = first_highlight_start
+    for start, end in sorted_positions:
+        highlight_start = max(start, previous_part_end)
+        highlight_end = end + 1
+        if highlight_start >= highlight_end:
+            continue  # Skip invalid or overlapping highlights
+        if highlight_start > previous_part_end:
+            formatted_parts.append(sql[previous_part_end:highlight_start])
+        formatted_parts.append(f"{ANSI_UNDERLINE}{sql[highlight_start:highlight_end]}{ANSI_RESET}")
+        previous_part_end = highlight_end
+    if previous_part_end < len(sql):
+        end_context = sql[previous_part_end : previous_part_end + context_length]
+        formatted_parts.append(end_context)
+    formatted_sql = "".join(formatted_parts)
+    highlight = sql[first_highlight_start:previous_part_end]
+    return formatted_sql, start_context, highlight, end_context
+def concat_messages(errors: t.Sequence[t.Any], maximum: int) -> str:
+    msg = [str(e) for e in errors[:maximum]]
+    remaining = len(errors) - maximum
+    if remaining > 0:
+        msg.append(f"... and {remaining} more")
+    return "\n\n".join(msg)
+def merge_errors(errors: t.Sequence[ParseError]) -> t.List[t.Dict[str, t.Any]]:
+    return [e_dict for error in errors for e_dict in error.errors]

{sqlglotc-28.10.1.dev130 → sqlglotc-29.0.1}/sqlglot/expression_core.py RENAMED Viewed

@@ -5,15 +5,8 @@ import typing as t
 from collections import deque
 from copy import deepcopy
-try:
-    from mypy_extensions import mypyc_attr
-except ImportError:
-    def mypyc_attr(*attrs: str, **kwattrs: object) -> t.Callable[[t.Any], t.Any]:  # type: ignore[misc]
-        return lambda f: f
-from sqlglot.helper import to_bool
+from sqlglot.helper import mypyc_attr, to_bool
+from sqlglot.tokenizer_core import Token
 EC = t.TypeVar("EC", bound="ExpressionCore")
@@ -44,20 +37,20 @@ class ExpressionCore:
     is_func: t.ClassVar[bool] = False
     _hash_raw_args: t.ClassVar[bool] = False
-    def __init__(self, **args: t.Any) -> None:
+    def __init__(self, **args: object) -> None:
         self.args: t.Dict[str, t.Any] = args
         self.parent: t.Optional[ExpressionCore] = None
         self.arg_key: t.Optional[str] = None
         self.index: t.Optional[int] = None
         self.comments: t.Optional[t.List[str]] = None
-        self._type: t.Optional[t.Any] = None
+        self._type: t.Optional[ExpressionCore] = None
         self._meta: t.Optional[t.Dict[str, t.Any]] = None
         self._hash: t.Optional[int] = None
         for arg_key, value in self.args.items():
             self._set_parent(arg_key, value)
-    def _set_parent(self, arg_key: str, value: t.Any, index: t.Optional[int] = None) -> None:
+    def _set_parent(self, arg_key: str, value: object, index: t.Optional[int] = None) -> None:
         if isinstance(value, ExpressionCore):
             value.parent = self
             value.arg_key = arg_key
@@ -137,11 +130,11 @@ class ExpressionCore:
             return self.parent.depth + 1
         return 0
-    def find_ancestor(self, *expression_types: t.Any) -> t.Optional[t.Any]:
+    def find_ancestor(self, *expression_types: t.Type[EC]) -> t.Optional[EC]:
         ancestor = self.parent
         while ancestor and not isinstance(ancestor, expression_types):
             ancestor = ancestor.parent
-        return ancestor
+        return ancestor  # type: ignore[return-value]
     @property
     def same_parent(self) -> bool:
@@ -217,7 +210,7 @@ class ExpressionCore:
     def update_positions(
         self: EC,
-        other: t.Optional[t.Any] = None,
+        other: t.Optional[ExpressionCore | Token] = None,
         line: t.Optional[int] = None,
         col: t.Optional[int] = None,
         start: t.Optional[int] = None,
@@ -297,14 +290,14 @@ class ExpressionCore:
                 copy._hash = node._hash
             for k, vs in node.args.items():
-                if hasattr(vs, "parent"):
+                if isinstance(vs, ExpressionCore):
                     stack.append((vs, vs.__class__()))
                     copy.set(k, stack[-1][-1])
                 elif type(vs) is list:
                     copy.args[k] = []
                     for v in vs:
-                        if hasattr(v, "parent"):
+                        if isinstance(v, ExpressionCore):
                             stack.append((v, v.__class__()))
                             copy.append(k, stack[-1][-1])
                         else:
@@ -327,8 +320,7 @@ class ExpressionCore:
                 if meta:
                     for kv in "".join(meta).split(","):
                         k, *v = kv.split("=")
-                        value: t.Any = v[0].strip() if v else True
-                        self.meta[k.strip()] = to_bool(value)
+                        self.meta[k.strip()] = to_bool(v[0].strip() if v else True)
                 if not prepend:
                     self.comments.append(comment)
@@ -339,7 +331,7 @@ class ExpressionCore:
     def set(
         self,
         arg_key: str,
-        value: t.Any,
+        value: object,
         index: t.Optional[int] = None,
         overwrite: bool = True,
     ) -> None:
@@ -380,10 +372,10 @@ class ExpressionCore:
         self.args[arg_key] = value
         self._set_parent(arg_key, value, index)
-    def find(self, *expression_types: t.Any, bfs: bool = True) -> t.Optional[t.Any]:
+    def find(self, *expression_types: t.Type[EC], bfs: bool = True) -> t.Optional[EC]:
         return next(self.find_all(*expression_types, bfs=bfs), None)
-    def find_all(self, *expression_types: t.Any, bfs: bool = True) -> t.Iterator[t.Any]:
+    def find_all(self, *expression_types: t.Type[EC], bfs: bool = True) -> t.Iterator[EC]:
         for expression in self.walk(bfs=bfs):
             if isinstance(expression, expression_types):
                 yield expression
@@ -427,14 +419,16 @@ class ExpressionCore:
         self.replace(None)
         return self
-    def assert_is(self, type_: t.Any) -> t.Any:
+    def assert_is(self, type_: t.Type[EC]) -> EC:
         if not isinstance(self, type_):
             raise AssertionError(f"{self} is not {type_}.")
         return self
-    def transform(self, fun: t.Callable, *args: t.Any, copy: bool = True, **kwargs: t.Any) -> t.Any:
-        root: t.Optional[t.Any] = None
-        new_node: t.Optional[t.Any] = None
+    def transform(
+        self, fun: t.Callable, *args: object, copy: bool = True, **kwargs: object
+    ) -> t.Any:
+        root: t.Any = None
+        new_node: t.Any = None
         for node in (self.copy() if copy else self).dfs(prune=lambda n: n is not new_node):
             parent, arg_key, index = node.parent, node.arg_key, node.index

{sqlglotc-28.10.1.dev130 → sqlglotc-29.0.1}/sqlglot/helper.py RENAMED Viewed

@@ -12,6 +12,17 @@ from difflib import get_close_matches
 from enum import Enum
 from itertools import count
+try:
+    from mypy_extensions import mypyc_attr, trait
+except ImportError:
+    def mypyc_attr(*attrs: str, **kwattrs: object) -> t.Callable[[t.Any], t.Any]:  # type: ignore[misc]
+        return lambda f: f
+    def trait(f: t.Any) -> t.Any:  # type: ignore[misc]
+        return f
 T = t.TypeVar("T")
 E = t.TypeVar("E")

sqlglotc-29.0.1/sqlglot/parser_core.py ADDED Viewed

@@ -0,0 +1,190 @@
+from __future__ import annotations
+import typing as t
+from sqlglot.errors import ErrorLevel, ParseError, highlight_sql
+from sqlglot.tokenizer_core import Token, TokenType
+class ParserCore:
+    __slots__ = (
+        "error_level",
+        "error_message_context",
+        "max_errors",
+        "dialect",
+        "sql",
+        "errors",
+        "_tokens",
+        "_index",
+        "_curr",
+        "_next",
+        "_prev",
+        "_prev_comments",
+        "_pipe_cte_counter",
+        "_chunks",
+        "_chunk_index",
+    )
+    def __init__(
+        self,
+        error_level: ErrorLevel,
+        error_message_context: int,
+        max_errors: int,
+        dialect: t.Any,
+    ) -> None:
+        self.error_level: ErrorLevel = error_level
+        self.error_message_context = error_message_context
+        self.max_errors = max_errors
+        self.dialect: t.Any = dialect
+        self.reset()
+    def reset(self) -> None:
+        self.sql: str = ""
+        self.errors: t.List[ParseError] = []
+        self._tokens: t.List[Token] = []
+        self._index: int = 0
+        self._curr: t.Optional[Token] = None
+        self._next: t.Optional[Token] = None
+        self._prev: t.Optional[Token] = None
+        self._prev_comments: t.Optional[t.List[str]] = None
+        self._pipe_cte_counter: int = 0
+        self._chunks: t.List[t.List[Token]] = []
+        self._chunk_index: int = 0
+    def _advance(self, times: int = 1) -> None:
+        index = self._index + times
+        self._index = index
+        tokens = self._tokens
+        size = len(tokens)
+        self._curr = tokens[index] if index < size else None
+        self._next = tokens[index + 1] if index + 1 < size else None
+        if index > 0:
+            prev = tokens[index - 1]
+            self._prev = prev
+            self._prev_comments = prev.comments
+        else:
+            self._prev = None
+            self._prev_comments = None
+    def _advance_chunk(self) -> None:
+        self._index = -1
+        self._tokens = self._chunks[self._chunk_index]
+        self._chunk_index += 1
+        self._advance()
+    def _retreat(self, index: int) -> None:
+        if index != self._index:
+            self._advance(index - self._index)
+    def _add_comments(self, expression: t.Any) -> None:
+        if expression and self._prev_comments:
+            expression.add_comments(self._prev_comments)
+            self._prev_comments = None
+    def _match(self, token_type: TokenType, advance: bool = True, expression: t.Any = None) -> bool:
+        curr = self._curr
+        if curr and curr.token_type == token_type:
+            if advance:
+                self._advance()
+            self._add_comments(expression)
+            return True
+        return False
+    def _match_set(self, types: t.Any, advance: bool = True) -> bool:
+        curr = self._curr
+        if curr and curr.token_type in types:
+            if advance:
+                self._advance()
+            return True
+        return False
+    def _match_pair(
+        self, token_type_a: TokenType, token_type_b: TokenType, advance: bool = True
+    ) -> bool:
+        curr = self._curr
+        next_ = self._next
+        if curr and next_ and curr.token_type == token_type_a and next_.token_type == token_type_b:
+            if advance:
+                self._advance(2)
+            return True
+        return False
+    def _match_texts(self, texts: t.Any, advance: bool = True) -> bool:
+        curr = self._curr
+        if curr and curr.token_type != TokenType.STRING and curr.text.upper() in texts:
+            if advance:
+                self._advance()
+            return True
+        return False
+    def _match_text_seq(self, *texts: str, advance: bool = True) -> bool:
+        index = self._index
+        string_type = TokenType.STRING
+        for text in texts:
+            curr = self._curr
+            if curr and curr.token_type != string_type and curr.text.upper() == text:
+                self._advance()
+            else:
+                self._retreat(index)
+                return False
+        if not advance:
+            self._retreat(index)
+        return True
+    def _is_connected(self) -> bool:
+        prev = self._prev
+        curr = self._curr
+        return bool(prev and curr and prev.end + 1 == curr.start)
+    def _find_sql(self, start: Token, end: Token) -> str:
+        return self.sql[start.start : end.end + 1]
+    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
+        token = token or self._curr or self._prev or Token.string("")
+        formatted_sql, start_context, highlight, end_context = highlight_sql(
+            sql=self.sql,
+            positions=[(token.start, token.end)],
+            context_length=self.error_message_context,
+        )
+        formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n  {formatted_sql}"
+        error = ParseError.new(
+            formatted_message,
+            description=message,
+            line=token.line,
+            col=token.col,
+            start_context=start_context,
+            highlight=highlight,
+            end_context=end_context,
+        )
+        if self.error_level == ErrorLevel.IMMEDIATE:
+            raise error
+        self.errors.append(error)
+    def validate_expression(self, expression: t.Any, args: t.Optional[t.List] = None) -> t.Any:
+        if self.error_level != ErrorLevel.IGNORE:
+            for error_message in expression.error_messages(args):
+                self.raise_error(error_message)
+        return expression
+    def _try_parse(self, parse_method: t.Callable, retreat: bool = False) -> t.Optional[t.Any]:
+        index = self._index
+        error_level = self.error_level
+        this: t.Optional[t.Any] = None
+        self.error_level = ErrorLevel.IMMEDIATE
+        try:
+            this = parse_method()
+        except ParseError:
+            this = None
+        finally:
+            if not this or retreat:
+                self._retreat(index)
+            self.error_level = error_level
+        return this

sqlglotc 28.10.1.dev130__tar.gz → 29.0.1__tar.gz

sqlglotc 28.10.1.dev130tar.gz → 29.0.1tar.gz