syncraft 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- syncraft/__init__.py +59 -0
- syncraft/algebra.py +230 -25
- syncraft/ast.py +101 -4
- syncraft/constraint.py +41 -0
- syncraft/finder.py +71 -14
- syncraft/generator.py +181 -4
- syncraft/parser.py +162 -0
- syncraft/syntax.py +339 -105
- syncraft-0.2.4.dist-info/METADATA +113 -0
- syncraft-0.2.4.dist-info/RECORD +16 -0
- syncraft-0.2.2.dist-info/METADATA +0 -34
- syncraft-0.2.2.dist-info/RECORD +0 -16
- {syncraft-0.2.2.dist-info → syncraft-0.2.4.dist-info}/WHEEL +0 -0
- {syncraft-0.2.2.dist-info → syncraft-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {syncraft-0.2.2.dist-info → syncraft-0.2.4.dist-info}/top_level.txt +0 -0
syncraft/parser.py
CHANGED
|
@@ -21,6 +21,15 @@ from syncraft.constraint import Bindable
|
|
|
21
21
|
T = TypeVar('T', bound=TokenProtocol)
|
|
22
22
|
@dataclass(frozen=True)
|
|
23
23
|
class ParserState(Bindable, Generic[T]):
|
|
24
|
+
"""Immutable state for the SQL token stream during parsing.
|
|
25
|
+
|
|
26
|
+
Keeps a tuple of tokens and the current index. The state is passed through
|
|
27
|
+
parser combinators and can be copied or advanced safely.
|
|
28
|
+
|
|
29
|
+
Attributes:
|
|
30
|
+
input: The full, immutable sequence of tokens.
|
|
31
|
+
index: Current position within ``input``.
|
|
32
|
+
"""
|
|
24
33
|
input: Tuple[T, ...] = field(default_factory=tuple)
|
|
25
34
|
index: int = 0
|
|
26
35
|
|
|
@@ -31,23 +40,49 @@ class ParserState(Bindable, Generic[T]):
|
|
|
31
40
|
return encode_tokens(*self.input[self.index:self.index + 2])
|
|
32
41
|
|
|
33
42
|
def before(self, length: Optional[int] = 5)->str:
|
|
43
|
+
"""Return a string with up to ``length`` tokens before the cursor.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
length: Maximum number of tokens to include.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
str: Space-separated token texts before the current index.
|
|
50
|
+
"""
|
|
34
51
|
length = min(self.index, length) if length is not None else self.index
|
|
35
52
|
return " ".join(token.text for token in self.input[self.index - length:self.index])
|
|
36
53
|
|
|
37
54
|
def after(self, length: Optional[int] = 5)->str:
|
|
55
|
+
"""Return a string with up to ``length`` tokens from the cursor on.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
length: Maximum number of tokens to include.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
str: Space-separated token texts starting at the current index.
|
|
62
|
+
"""
|
|
38
63
|
length = min(length, len(self.input) - self.index) if length is not None else len(self.input) - self.index
|
|
39
64
|
return " ".join(token.text for token in self.input[self.index:self.index + length])
|
|
40
65
|
|
|
41
66
|
|
|
42
67
|
def current(self)->T:
|
|
68
|
+
"""Get the current token at ``index``.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
T: The token at the current index.
|
|
72
|
+
|
|
73
|
+
Raises:
|
|
74
|
+
IndexError: If attempting to read past the end of the stream.
|
|
75
|
+
"""
|
|
43
76
|
if self.ended():
|
|
44
77
|
raise IndexError("Attempted to access token beyond end of stream")
|
|
45
78
|
return self.input[self.index]
|
|
46
79
|
|
|
47
80
|
def ended(self) -> bool:
|
|
81
|
+
"""Whether the cursor is at or past the end of the token stream."""
|
|
48
82
|
return self.index >= len(self.input)
|
|
49
83
|
|
|
50
84
|
def advance(self) -> ParserState[T]:
|
|
85
|
+
"""Return a new state advanced by one token (bounded at end)."""
|
|
51
86
|
return replace(self, index=min(self.index + 1, len(self.input)))
|
|
52
87
|
|
|
53
88
|
def delta(self, new_state: ParserState[T]) -> Tuple[T, ...]:
|
|
@@ -70,6 +105,18 @@ class ParserState(Bindable, Generic[T]):
|
|
|
70
105
|
class Parser(Algebra[T, ParserState[T]]):
|
|
71
106
|
@classmethod
|
|
72
107
|
def state(cls, sql: str, dialect: str) -> ParserState[T]:
|
|
108
|
+
"""Tokenize SQL text into an initial ``ParserState``.
|
|
109
|
+
|
|
110
|
+
Uses ``sqlglot.tokenize`` for the given dialect and wraps tokens into
|
|
111
|
+
the project's ``Token`` type.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
sql: The SQL text to tokenize.
|
|
115
|
+
dialect: The sqlglot dialect name (e.g. "sqlite", "duckdb").
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
ParserState[T]: Initial parser state at index 0.
|
|
119
|
+
"""
|
|
73
120
|
tokens = tuple([Token(token_type=token.token_type, text=token.text) for token in tokenize(sql, dialect=dialect)])
|
|
74
121
|
return ParserState.from_tokens(tokens) # type: ignore
|
|
75
122
|
|
|
@@ -80,6 +127,21 @@ class Parser(Algebra[T, ParserState[T]]):
|
|
|
80
127
|
case_sensitive: bool = False,
|
|
81
128
|
regex: Optional[re.Pattern[str]] = None
|
|
82
129
|
)-> Algebra[T, ParserState[T]]:
|
|
130
|
+
"""Match a single token according to a specification.
|
|
131
|
+
|
|
132
|
+
Succeeds when the current token satisfies the provided
|
|
133
|
+
``TokenSpec`` (by type, exact text, or regex). On failure,
|
|
134
|
+
an informative ``Error`` is produced with location context.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
token_type: Expected enum type of the token.
|
|
138
|
+
text: Exact token text to match.
|
|
139
|
+
case_sensitive: Whether text matching is case sensitive.
|
|
140
|
+
regex: Regular expression pattern to match token text.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Algebra[T, ParserState[T]]: An algebra yielding the matched token.
|
|
144
|
+
"""
|
|
83
145
|
spec = TokenSpec(token_type=token_type, text=text, case_sensitive=case_sensitive, regex=regex)
|
|
84
146
|
def token_run(state: ParserState[T], use_cache:bool) -> Either[Any, Tuple[T, ParserState[T]]]:
|
|
85
147
|
if state.ended():
|
|
@@ -105,6 +167,25 @@ class Parser(Algebra[T, ParserState[T]]):
|
|
|
105
167
|
terminator: Optional[Algebra[Any, ParserState[T]]] = None,
|
|
106
168
|
inclusive: bool = True,
|
|
107
169
|
strict: bool = True) -> Algebra[Any, ParserState[T]]:
|
|
170
|
+
"""Consume tokens until a terminator while respecting nested pairs.
|
|
171
|
+
|
|
172
|
+
Tracks nesting of one or more opener/closer parser pairs. When not
|
|
173
|
+
nested, an optional ``terminator`` may end the scan. If ``inclusive``
|
|
174
|
+
is true, boundary tokens (openers/closers/terminator) are included in
|
|
175
|
+
the returned tuple. If ``strict`` is true, the next token must match an
|
|
176
|
+
opener before scanning continues; otherwise content may start
|
|
177
|
+
immediately.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
open_close: One or more pairs of (open, close) parsers.
|
|
181
|
+
terminator: Optional parser that ends scanning at top level.
|
|
182
|
+
inclusive: Include matched structural tokens in the result.
|
|
183
|
+
strict: Require the very next token to be an opener when provided.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Algebra[Any, ParserState[T]]: An algebra yielding a tuple of
|
|
187
|
+
collected tokens upon success.
|
|
188
|
+
"""
|
|
108
189
|
def until_run(state: ParserState[T], use_cache:bool) -> Either[Any, Tuple[Any, ParserState[T]]]:
|
|
109
190
|
# Use a stack to enforce proper nesting across multiple open/close pairs.
|
|
110
191
|
tokens: List[Any] = []
|
|
@@ -184,11 +265,37 @@ class Parser(Algebra[T, ParserState[T]]):
|
|
|
184
265
|
|
|
185
266
|
def sqlglot(parser: Syntax[Any, Any],
|
|
186
267
|
dialect: str) -> Syntax[List[exp.Expression], ParserState[Any]]:
|
|
268
|
+
"""Map token tuples into sqlglot expressions for a given dialect.
|
|
269
|
+
|
|
270
|
+
Wraps a ``Syntax`` so its result is parsed by ``sqlglot.Parser``
|
|
271
|
+
using ``raw_tokens`` and returns only non-``None`` expressions.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
parser: A syntax that produces a sequence of tokens.
|
|
275
|
+
dialect: sqlglot dialect name used to parse tokens.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Syntax[List[exp.Expression], ParserState[Any]]: Syntax yielding a list
|
|
279
|
+
of parsed expressions.
|
|
280
|
+
"""
|
|
187
281
|
gp = GlotParser(dialect=dialect)
|
|
188
282
|
return parser.map(lambda tokens: [e for e in gp.parse(raw_tokens=tokens) if e is not None])
|
|
189
283
|
|
|
190
284
|
|
|
191
285
|
def parse(syntax: Syntax[Any, Any], sql: str, dialect: str) -> Tuple[AST, FrozenDict[str, Tuple[AST, ...]]] | Tuple[Any, None]:
|
|
286
|
+
"""Parse SQL text with a ``Syntax`` using the ``Parser`` backend.
|
|
287
|
+
|
|
288
|
+
Tokenizes the SQL with the specified dialect and executes ``syntax``.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
syntax: The high-level syntax to run.
|
|
292
|
+
sql: SQL text to tokenize and parse.
|
|
293
|
+
dialect: sqlglot dialect name used for tokenization.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
Tuple[AST, FrozenDict[str, Tuple[AST, ...]]] | Tuple[Any, None]:
|
|
297
|
+
The produced AST and collected marks, or a tuple signaling failure.
|
|
298
|
+
"""
|
|
192
299
|
from syncraft.syntax import run
|
|
193
300
|
return run(syntax, Parser, True, sql=sql, dialect=dialect)
|
|
194
301
|
|
|
@@ -200,6 +307,20 @@ def token(token_type: Optional[Enum] = None,
|
|
|
200
307
|
case_sensitive: bool = False,
|
|
201
308
|
regex: Optional[re.Pattern[str]] = None
|
|
202
309
|
) -> Syntax[Any, Any]:
|
|
310
|
+
"""Build a ``Syntax`` that matches a single token.
|
|
311
|
+
|
|
312
|
+
Convenience wrapper around ``Parser.token``. You can match by
|
|
313
|
+
type, exact text, or regex.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
token_type: Expected token enum type.
|
|
317
|
+
text: Exact token text to match.
|
|
318
|
+
case_sensitive: Whether text matching respects case.
|
|
319
|
+
regex: Pattern to match token text.
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
Syntax[Any, Any]: A syntax that matches one token.
|
|
323
|
+
"""
|
|
203
324
|
token_type_txt = token_type.name if token_type is not None else None
|
|
204
325
|
token_value_txt = text if text is not None else None
|
|
205
326
|
msg = 'token(' + ','.join([x for x in [token_type_txt, token_value_txt, str(regex)] if x is not None]) + ')'
|
|
@@ -209,24 +330,49 @@ def token(token_type: Optional[Enum] = None,
|
|
|
209
330
|
|
|
210
331
|
|
|
211
332
|
def identifier(value: str | None = None) -> Syntax[Any, Any]:
|
|
333
|
+
"""Match an identifier token, optionally with exact text.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
value: Exact identifier text to match, or ``None`` for any identifier.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Syntax[Any, Any]: A syntax matching one identifier token.
|
|
340
|
+
"""
|
|
212
341
|
if value is None:
|
|
213
342
|
return token(TokenType.IDENTIFIER)
|
|
214
343
|
else:
|
|
215
344
|
return token(TokenType.IDENTIFIER, text=value)
|
|
216
345
|
|
|
217
346
|
def variable(value: str | None = None) -> Syntax[Any, Any]:
|
|
347
|
+
"""Match a variable token, optionally with exact text.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
value: Exact variable text to match, or ``None`` for any variable.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
Syntax[Any, Any]: A syntax matching one variable token.
|
|
354
|
+
"""
|
|
218
355
|
if value is None:
|
|
219
356
|
return token(TokenType.VAR)
|
|
220
357
|
else:
|
|
221
358
|
return token(TokenType.VAR, text=value)
|
|
222
359
|
|
|
223
360
|
def literal(lit: str) -> Syntax[Any, Any]:
|
|
361
|
+
"""Match an exact literal string (case-sensitive)."""
|
|
224
362
|
return token(token_type=None, text=lit, case_sensitive=True)
|
|
225
363
|
|
|
226
364
|
def regex(regex: re.Pattern[str]) -> Syntax[Any, Any]:
|
|
365
|
+
"""Match a token whose text satisfies the given regular expression."""
|
|
227
366
|
return token(token_type=None, regex=regex, case_sensitive=True)
|
|
228
367
|
|
|
229
368
|
def lift(value: Any)-> Syntax[Any, Any]:
|
|
369
|
+
"""Lift a Python value into the nearest matching token syntax.
|
|
370
|
+
|
|
371
|
+
- ``str`` -> ``literal``
|
|
372
|
+
- ``re.Pattern`` -> ``token`` with regex
|
|
373
|
+
- ``Enum`` -> ``token`` with type
|
|
374
|
+
- otherwise -> succeed with the value
|
|
375
|
+
"""
|
|
230
376
|
if isinstance(value, str):
|
|
231
377
|
return literal(value)
|
|
232
378
|
elif isinstance(value, re.Pattern):
|
|
@@ -237,10 +383,12 @@ def lift(value: Any)-> Syntax[Any, Any]:
|
|
|
237
383
|
return Syntax(lambda cls: cls.success(value))
|
|
238
384
|
|
|
239
385
|
def number() -> Syntax[Any, Any]:
|
|
386
|
+
"""Match a number token."""
|
|
240
387
|
return token(TokenType.NUMBER)
|
|
241
388
|
|
|
242
389
|
|
|
243
390
|
def string() -> Syntax[Any, Any]:
|
|
391
|
+
"""Match a string literal token."""
|
|
244
392
|
return token(TokenType.STRING)
|
|
245
393
|
|
|
246
394
|
|
|
@@ -249,6 +397,20 @@ def until(*open_close: Tuple[Syntax[Tuple[T, ...] | T, ParserState[T]], Syntax[T
|
|
|
249
397
|
terminator: Optional[Syntax[Tuple[T, ...] | T, ParserState[T]]] = None,
|
|
250
398
|
inclusive: bool = True,
|
|
251
399
|
strict: bool = True) -> Syntax[Any, Any]:
|
|
400
|
+
"""Syntax wrapper to scan until a terminator while handling nesting.
|
|
401
|
+
|
|
402
|
+
Equivalent to ``Parser.until`` but at the ``Syntax`` layer, converting the
|
|
403
|
+
provided syntaxes into parser algebras under the hood.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
open_close: One or more pairs of (open, close) syntaxes.
|
|
407
|
+
terminator: Optional syntax that ends scanning at top level.
|
|
408
|
+
inclusive: Include matched boundary tokens in the result.
|
|
409
|
+
strict: Require the very next token to be an opener when provided.
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
Syntax[Any, Any]: A syntax yielding a tuple of collected tokens.
|
|
413
|
+
"""
|
|
252
414
|
return Syntax(
|
|
253
415
|
lambda cls: cls.factory('until',
|
|
254
416
|
*[(left.alg(cls), right.alg(cls)) for left, right in open_close],
|