PyPI - syncraft - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl - Mend

syncraft 0.2.5py3-none-any.whl → 0.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of syncraft might be problematic. Click here for more details.

Files changed (21) hide show

syncraft/__init__.py +30 -9
syncraft/algebra.py +143 -214
syncraft/ast.py +62 -7
syncraft/cache.py +113 -0
syncraft/constraint.py +184 -134
syncraft/dev.py +9 -0
syncraft/finder.py +17 -12
syncraft/generator.py +80 -78
syncraft/lexer.py +131 -0
syncraft/parser.py +75 -224
syncraft/syntax.py +187 -100
syncraft/utils.py +214 -0
syncraft/walker.py +147 -0
syncraft-0.2.6.dist-info/METADATA +56 -0
syncraft-0.2.6.dist-info/RECORD +20 -0
syncraft/diagnostic.py +0 -70
syncraft-0.2.5.dist-info/METADATA +0 -113
syncraft-0.2.5.dist-info/RECORD +0 -16
{syncraft-0.2.5.dist-info → syncraft-0.2.6.dist-info}/WHEEL +0 -0
{syncraft-0.2.5.dist-info → syncraft-0.2.6.dist-info}/licenses/LICENSE +0 -0
{syncraft-0.2.5.dist-info → syncraft-0.2.6.dist-info}/top_level.txt +0 -0

syncraft/parser.py CHANGED Viewed

@@ -3,22 +3,25 @@ import re
 from sqlglot import tokenize, TokenType, Parser as GlotParser, exp
 from typing import (
     Optional, List, Any, Tuple, TypeVar,
-    Generic
+    Generic, Generator
 )
+from syncraft.cache import Cache
 from syncraft.constraint import FrozenDict
 from syncraft.algebra import (
-    Either, Left, Right, Error, Algebra
+    Either, Left, Right, Error, Algebra, Incomplete
 )
 from dataclasses import dataclass, field, replace
 from enum import Enum
-from functools import reduce
-from syncraft.syntax import Syntax
-from syncraft.ast import Token, TokenSpec, AST, TokenProtocol
+from syncraft.syntax import Syntax, token
+from syncraft.ast import Token, TokenSpec, AST, TokenProtocol, SyncraftError
 from syncraft.constraint import Bindable
 T = TypeVar('T', bound=TokenProtocol)
 @dataclass(frozen=True)
 class ParserState(Bindable, Generic[T]):
     """Immutable state for the SQL token stream during parsing.
@@ -32,14 +35,30 @@ class ParserState(Bindable, Generic[T]):
     """
     input: Tuple[T, ...] = field(default_factory=tuple)
     index: int = 0
+    final: bool = False  # Whether this is a final state (for error reporting)
+    def __repr__(self) -> str:
+        return (f"ParserState("
+                f"input=[{self.before() + (' ' if len(self.before())>0 else '')}\u25cf{(' ' if len(self.after()) > 0 else '') + self.after()}], "
+                f"ended={self.ended()}, "
+                f"pending={self.pending()})")
+    def __str__(self) -> str:
+        return self.__repr__()
+    def __add__(self, other: 'ParserState[T]') -> 'ParserState[T]':
+        if not isinstance(other, ParserState):
+            raise SyncraftError("Can only concatenate ParserState with another ParserState", offending=self, expect="ParserState")
+        if self.final:
+            raise SyncraftError("Cannot concatenate to a final ParserState", offending=self, expect="not final")
+        return replace(self, input=self.input + other.input, final=other.final)
     def token_sample_string(self)-> str:
         def encode_tokens(*tokens:T) -> str:
             return ",".join(f"{token.token_type.name}({token.text})" for token in tokens)
         return encode_tokens(*self.input[self.index:self.index + 2])
-    def before(self, length: Optional[int] = 5)->str:
+    def before(self, length: Optional[int] = 3)->str:
         """Return a string with up to ``length`` tokens before the cursor.
         Args:
@@ -51,7 +70,7 @@ class ParserState(Bindable, Generic[T]):
         length = min(self.index, length) if length is not None else self.index
         return " ".join(token.text for token in self.input[self.index - length:self.index])
-    def after(self, length: Optional[int] = 5)->str:
+    def after(self, length: Optional[int] = 3)->str:
         """Return a string with up to ``length`` tokens from the cursor on.
         Args:
@@ -61,7 +80,8 @@ class ParserState(Bindable, Generic[T]):
             str: Space-separated token texts starting at the current index.
         """
         length = min(length, len(self.input) - self.index) if length is not None else len(self.input) - self.index
-        return " ".join(token.text for token in self.input[self.index:self.index + length])
+        ret = " ".join(token.text for token in self.input[self.index:self.index + length])
+        return ret
     def current(self)->T:
@@ -73,29 +93,26 @@ class ParserState(Bindable, Generic[T]):
         Raises:
             IndexError: If attempting to read past the end of the stream.
         """
-        if self.ended():
-            raise IndexError("Attempted to access token beyond end of stream")
+        if self.index >= len(self.input):
+            raise SyncraftError("Attempted to access token beyond end of stream", offending=self, expect="index < len(input)")
         return self.input[self.index]
+    def pending(self) -> bool:
+        return self.index >= len(self.input) and not self.final
     def ended(self) -> bool:
         """Whether the cursor is at or past the end of the token stream."""
-        return self.index >= len(self.input)
+        return self.index >= len(self.input) and self.final
     def advance(self) -> ParserState[T]:
         """Return a new state advanced by one token (bounded at end)."""
         return replace(self, index=min(self.index + 1, len(self.input)))
-    def delta(self, new_state: ParserState[T]) -> Tuple[T, ...]:
-        assert self.input is new_state.input, "Cannot calculate differences between different input streams"
-        assert 0 <= self.index <= new_state.index <= len(self.input), "Segment indices out of bounds"
-        return self.input[self.index:new_state.index]
-    def copy(self) -> ParserState[T]:
-        return self.__class__(input=self.input, index=self.index)
     @classmethod
     def from_tokens(cls, tokens: Tuple[T, ...]) -> ParserState[T]:
-        return cls(input=tokens, index=0)
+        return cls(input=tokens, index=0, final=True)
@@ -104,7 +121,7 @@ class ParserState(Bindable, Generic[T]):
 @dataclass(frozen=True)
 class Parser(Algebra[T, ParserState[T]]):
     @classmethod
-    def state(cls, sql: str, dialect: str) -> ParserState[T]:
+    def state(cls, sql: str, dialect: str) -> ParserState[T]: # type: ignore
         """Tokenize SQL text into an initial ``ParserState``.
         Uses ``sqlglot.tokenize`` for the given dialect and wraps tokens into
@@ -122,6 +139,8 @@ class Parser(Algebra[T, ParserState[T]]):
     @classmethod
     def token(cls,
+              *,
+              cache: Cache,
               token_type: Optional[Enum] = None,
               text: Optional[str] = None,
               case_sensitive: bool = False,
@@ -143,14 +162,19 @@ class Parser(Algebra[T, ParserState[T]]):
             Algebra[T, ParserState[T]]: An algebra yielding the matched token.
         """
         spec = TokenSpec(token_type=token_type, text=text, case_sensitive=case_sensitive, regex=regex)
-        def token_run(state: ParserState[T], use_cache:bool) -> Either[Any, Tuple[T, ParserState[T]]]:
-            if state.ended():
-                return Left(state)
-            token = state.current()
-            if token is None or not spec.is_valid(token):
-                return Left(state)
-            return Right((Token(token_type = token.token_type, text=token.text), state.advance()))  # type: ignore
-        captured: Algebra[T, ParserState[T]] = cls(token_run, name=cls.__name__ + f'.token({token_type}, {text})')
+        def token_run(state: ParserState[T], use_cache:bool) -> Generator[Incomplete[ParserState[T]],ParserState[T], Either[Any, Tuple[T, ParserState[T]]]]:
+            while True:
+                if state.ended():
+                    return Left(state)
+                elif state.pending():
+                    state = yield Incomplete(state)
+                else:
+                    token = state.current()
+                    if token is None or not spec.is_valid(token):
+                        return Left(state)
+                    else:
+                        return Right((Token(token_type = token.token_type, text=token.text), state.advance()))  # type: ignore
+        captured: Algebra[T, ParserState[T]] = cls(token_run, name=cls.__name__ + f'.token({token_type}, {text})', cache=cache)
         def error_fn(err: Any) -> Error:
             if isinstance(err, ParserState):
                 return Error(message=f"Cannot match token at {err}", this=captured, state=err)
@@ -161,107 +185,7 @@ class Parser(Algebra[T, ParserState[T]]):
         return captured
-    @classmethod
-    def until(cls,
-              *open_close: Tuple[Algebra[Any, ParserState[T]], Algebra[Any, ParserState[T]]],
-              terminator: Optional[Algebra[Any, ParserState[T]]] = None,
-              inclusive: bool = True,
-              strict: bool = True) -> Algebra[Any, ParserState[T]]:
-        """Consume tokens until a terminator while respecting nested pairs.
-        Tracks nesting of one or more opener/closer parser pairs. When not
-        nested, an optional ``terminator`` may end the scan. If ``inclusive``
-        is true, boundary tokens (openers/closers/terminator) are included in
-        the returned tuple. If ``strict`` is true, the next token must match an
-        opener before scanning continues; otherwise content may start
-        immediately.
-        Args:
-            open_close: One or more pairs of (open, close) parsers.
-            terminator: Optional parser that ends scanning at top level.
-            inclusive: Include matched structural tokens in the result.
-            strict: Require the very next token to be an opener when provided.
-        Returns:
-            Algebra[Any, ParserState[T]]: An algebra yielding a tuple of
-            collected tokens upon success.
-        """
-        def until_run(state: ParserState[T], use_cache:bool) -> Either[Any, Tuple[Any, ParserState[T]]]:
-            # Use a stack to enforce proper nesting across multiple open/close pairs.
-            tokens: List[Any] = []
-            if not terminator and len(open_close) == 0:
-                return Left(Error(this=until_run, message="No terminator and no open/close parsers, nothing to parse", state=state))
-            # Helper to try matching any of the parsers once, returning early on first match
-            def try_match(s: ParserState[T], *parsers: Algebra[Any, ParserState[T]]) -> Tuple[bool, Optional[int], Optional[Any], ParserState[T]]:
-                for i, p in enumerate(parsers):
-                    res = p.run(s, use_cache)
-                    if isinstance(res, Right):
-                        val, ns = res.value
-                        return True, i, val, ns
-                return False, None, None, s
-            opens, closes = zip(*open_close) if len(open_close) > 0 else ((), ())
-            tmp_state: ParserState[T] = state.copy()
-            stack: List[int] = []  # indices into open_close indicating expected closer
-            # If strict, require the very next token to be an opener of any kind
-            if strict and len(opens) > 0:
-                c = reduce(lambda a, b: a.or_else(b), opens).run(tmp_state, use_cache)
-                if c.is_left():
-                    return Left(Error(this=until_run, message="No opening parser matched", state=tmp_state))
-            while not tmp_state.ended():
-                # Try to open
-                o_matched, o_idx, o_tok, o_state = try_match(tmp_state, *opens)
-                if o_matched and o_idx is not None:
-                    stack.append(o_idx)
-                    if inclusive:
-                        tokens.append(o_tok)
-                    tmp_state = o_state
-                    continue
-                # Try to close
-                c_matched, c_idx, c_tok, c_state = try_match(tmp_state, *closes)
-                if c_matched and c_idx is not None:
-                    if not stack or stack[-1] != c_idx:
-                        return Left(Error(this=until_run, message="Mismatched closing parser", state=tmp_state))
-                    stack.pop()
-                    if inclusive:
-                        tokens.append(c_tok)
-                    tmp_state = c_state
-                    # After closing, if stack empty, we may terminate on a terminator
-                    if len(stack) == 0:
-                        if terminator:
-                            term = terminator.run(tmp_state, use_cache)
-                            if isinstance(term, Right):
-                                if inclusive:
-                                    tokens.append(term.value[0])
-                                return Right((tuple(tokens), term.value[1]))
-                        else:
-                            return Right((tuple(tokens), tmp_state))
-                    continue
-                # If nothing structural matched, check termination when not nested
-                if len(stack) == 0:
-                    if terminator:
-                        term2 = terminator.run(tmp_state, use_cache)
-                        if isinstance(term2, Right):
-                            if inclusive:
-                                tokens.append(term2.value[0])
-                            return Right((tuple(tokens), term2.value[1]))
-                    else:
-                        return Right((tuple(tokens), tmp_state))
-                # Otherwise, consume one token as payload and continue
-                tokens.append(tmp_state.current())
-                tmp_state = tmp_state.advance()
-            # Reached end of input
-            if len(stack) != 0:
-                return Left(Error(this=until_run, message="Unterminated group", state=tmp_state))
-            return Right((tuple(tokens), tmp_state))
-        return cls(until_run, name=cls.__name__ + '.until')
 def sqlglot(parser: Syntax[Any, Any],
             dialect: str) -> Syntax[List[exp.Expression], ParserState[Any]]:
@@ -282,52 +206,6 @@ def sqlglot(parser: Syntax[Any, Any],
     return parser.map(lambda tokens: [e for e in gp.parse(raw_tokens=tokens) if e is not None])
-def parse(syntax: Syntax[Any, Any], sql: str, dialect: str) -> Tuple[AST, FrozenDict[str, Tuple[AST, ...]]] | Tuple[Any, None]:
-    """Parse SQL text with a ``Syntax`` using the ``Parser`` backend.
-    Tokenizes the SQL with the specified dialect and executes ``syntax``.
-    Args:
-        syntax: The high-level syntax to run.
-        sql: SQL text to tokenize and parse.
-        dialect: sqlglot dialect name used for tokenization.
-    Returns:
-        Tuple[AST, FrozenDict[str, Tuple[AST, ...]]] | Tuple[Any, None]:
-        The produced AST and collected marks, or a tuple signaling failure.
-    """
-    from syncraft.syntax import run
-    return run(syntax, Parser, True, sql=sql, dialect=dialect)
-def token(token_type: Optional[Enum] = None,
-          text: Optional[str] = None,
-          case_sensitive: bool = False,
-          regex: Optional[re.Pattern[str]] = None
-          ) -> Syntax[Any, Any]:
-    """Build a ``Syntax`` that matches a single token.
-    Convenience wrapper around ``Parser.token``. You can match by
-    type, exact text, or regex.
-    Args:
-        token_type: Expected token enum type.
-        text: Exact token text to match.
-        case_sensitive: Whether text matching respects case.
-        regex: Pattern to match token text.
-    Returns:
-        Syntax[Any, Any]: A syntax that matches one token.
-    """
-    token_type_txt = token_type.name if token_type is not None else None
-    token_value_txt = text if text is not None else None
-    msg = 'token(' + ','.join([x for x in [token_type_txt, token_value_txt, str(regex)] if x is not None]) + ')'
-    return Syntax(
-        lambda cls: cls.factory('token', token_type=token_type, text=text, case_sensitive=case_sensitive, regex=regex)
-        ).describe(name=msg, fixity='prefix')
 def identifier(value: str | None = None) -> Syntax[Any, Any]:
     """Match an identifier token, optionally with exact text.
@@ -339,9 +217,9 @@ def identifier(value: str | None = None) -> Syntax[Any, Any]:
         Syntax[Any, Any]: A syntax matching one identifier token.
     """
     if value is None:
-        return token(TokenType.IDENTIFIER)
+        return token(token_type=TokenType.IDENTIFIER)
     else:
-        return token(TokenType.IDENTIFIER, text=value)
+        return token(token_type=TokenType.IDENTIFIER, text=value)
 def variable(value: str | None = None) -> Syntax[Any, Any]:
     """Match a variable token, optionally with exact text.
@@ -353,69 +231,42 @@ def variable(value: str | None = None) -> Syntax[Any, Any]:
         Syntax[Any, Any]: A syntax matching one variable token.
     """
     if value is None:
-        return token(TokenType.VAR)
+        return token(token_type=TokenType.VAR)
     else:
-        return token(TokenType.VAR, text=value)
-def literal(lit: str) -> Syntax[Any, Any]:
-    """Match an exact literal string (case-sensitive)."""
-    return token(token_type=None, text=lit, case_sensitive=True)
-def regex(regex: re.Pattern[str]) -> Syntax[Any, Any]:
-    """Match a token whose text satisfies the given regular expression."""
-    return token(token_type=None, regex=regex, case_sensitive=True)
-def lift(value: Any)-> Syntax[Any, Any]:
-    """Lift a Python value into the nearest matching token syntax.
+        return token(token_type=TokenType.VAR, text=value)
-    - ``str`` -> ``literal``
-    - ``re.Pattern`` -> ``token`` with regex
-    - ``Enum`` -> ``token`` with type
-    - otherwise -> succeed with the value
-    """
-    if isinstance(value, str):
-        return literal(value)
-    elif isinstance(value, re.Pattern):
-        return token(regex=value)
-    elif isinstance(value, Enum):
-        return token(value)
-    else:
-        return Syntax(lambda cls: cls.success(value))
 def number() -> Syntax[Any, Any]:
     """Match a number token."""
-    return token(TokenType.NUMBER)
+    return token(token_type=TokenType.NUMBER)
 def string() -> Syntax[Any, Any]:
     """Match a string literal token."""
-    return token(TokenType.STRING)
+    return token(token_type=TokenType.STRING)
-def until(*open_close: Tuple[Syntax[Tuple[T, ...] | T, ParserState[T]], Syntax[Tuple[T, ...] | T, ParserState[T]]],
-          terminator: Optional[Syntax[Tuple[T, ...] | T, ParserState[T]]] = None,
-          inclusive: bool = True,
-          strict: bool = True) -> Syntax[Any, Any]:
-    """Syntax wrapper to scan until a terminator while handling nesting.
-    Equivalent to ``Parser.until`` but at the ``Syntax`` layer, converting the
-    provided syntaxes into parser algebras under the hood.
+def parse(syntax: Syntax[Any, Any], sql: str, dialect: str) -> Tuple[Any, None | FrozenDict[str, Tuple[AST, ...]]]:
+    """Parse SQL text with a ``Syntax`` using the ``Parser`` backend.
+    Tokenizes the SQL with the specified dialect and executes ``syntax``.
     Args:
-        open_close: One or more pairs of (open, close) syntaxes.
-        terminator: Optional syntax that ends scanning at top level.
-        inclusive: Include matched boundary tokens in the result.
-        strict: Require the very next token to be an opener when provided.
+        syntax: The high-level syntax to run.
+        sql: SQL text to tokenize and parse.
+        dialect: sqlglot dialect name used for tokenization.
     Returns:
-        Syntax[Any, Any]: A syntax yielding a tuple of collected tokens.
+        Tuple[AST, FrozenDict[str, Tuple[AST, ...]]] | Tuple[Any, None]:
+        The produced AST and collected marks, or a tuple signaling failure.
     """
-    return Syntax(
-        lambda cls: cls.factory('until',
-                           *[(left.alg(cls), right.alg(cls)) for left, right in open_close],
-                           terminator=terminator.alg(cls) if terminator else None,
-                           inclusive=inclusive,
-                           strict=strict)
-        ).describe(name="until", fixity='prefix')
+    from syncraft.syntax import run
+    v, s = run(syntax=syntax, alg=Parser, use_cache=True, sql=sql, dialect=dialect)
+    if s is not None:
+        return v, s.binding.bound()
+    else:
+        return v, None

syncraft 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

Potentially problematic release.

syncraft 0.2.5py3-none-any.whl → 0.2.6py3-none-any.whl