syncraft 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of syncraft might be problematic. Click here for more details.
- syncraft/__init__.py +30 -9
- syncraft/algebra.py +143 -214
- syncraft/ast.py +62 -7
- syncraft/cache.py +113 -0
- syncraft/constraint.py +184 -134
- syncraft/dev.py +9 -0
- syncraft/finder.py +17 -12
- syncraft/generator.py +80 -78
- syncraft/lexer.py +131 -0
- syncraft/parser.py +75 -224
- syncraft/syntax.py +187 -100
- syncraft/utils.py +214 -0
- syncraft/walker.py +147 -0
- syncraft-0.2.6.dist-info/METADATA +56 -0
- syncraft-0.2.6.dist-info/RECORD +20 -0
- syncraft/diagnostic.py +0 -70
- syncraft-0.2.5.dist-info/METADATA +0 -113
- syncraft-0.2.5.dist-info/RECORD +0 -16
- {syncraft-0.2.5.dist-info → syncraft-0.2.6.dist-info}/WHEEL +0 -0
- {syncraft-0.2.5.dist-info → syncraft-0.2.6.dist-info}/licenses/LICENSE +0 -0
- {syncraft-0.2.5.dist-info → syncraft-0.2.6.dist-info}/top_level.txt +0 -0
syncraft/finder.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from typing import (
|
|
4
|
-
Any, Tuple, Generator as
|
|
4
|
+
Any, Tuple, Generator as PyGenerator, TypeVar, Generic
|
|
5
5
|
)
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from syncraft.algebra import (
|
|
8
|
-
Algebra, Either, Right,
|
|
8
|
+
Algebra, Either, Right, Incomplete
|
|
9
9
|
)
|
|
10
10
|
from syncraft.ast import TokenProtocol, ParseResult, Choice, Many, Then, Marked, Collect
|
|
11
11
|
|
|
12
12
|
from syncraft.generator import GenState, Generator
|
|
13
|
-
|
|
13
|
+
from syncraft.cache import Cache
|
|
14
14
|
from syncraft.syntax import Syntax
|
|
15
15
|
|
|
16
16
|
|
|
@@ -24,7 +24,7 @@ class Finder(Generator[T], Generic[T]):
|
|
|
24
24
|
such as ``matches`` and ``find``.
|
|
25
25
|
"""
|
|
26
26
|
@classmethod
|
|
27
|
-
def anything(cls)->Algebra[Any, GenState[T]]:
|
|
27
|
+
def anything(cls, cache: Cache)->Algebra[Any, GenState[T]]:
|
|
28
28
|
"""Match any node and return it unchanged.
|
|
29
29
|
|
|
30
30
|
Succeeds on any input ``GenState`` and returns the current AST node as
|
|
@@ -35,15 +35,16 @@ class Finder(Generator[T], Generic[T]):
|
|
|
35
35
|
Algebra[Any, GenState[T]]: An algebra that always succeeds with the
|
|
36
36
|
tuple ``(input.ast, input)``.
|
|
37
37
|
"""
|
|
38
|
-
def anything_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[Any, GenState[T]]]:
|
|
38
|
+
def anything_run(input: GenState[T], use_cache:bool) -> PyGenerator[Incomplete[GenState[T]] ,GenState[T],Either[Any, Tuple[Any, GenState[T]]]]:
|
|
39
|
+
yield from ()
|
|
39
40
|
return Right((input.ast, input))
|
|
40
|
-
return cls(anything_run, name=cls.__name__ + '.anything')
|
|
41
|
+
return cls(anything_run, name=cls.__name__ + '.anything', cache=cache)
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
|
|
44
45
|
#: A ``Syntax`` that matches any node and returns it as the result without
|
|
45
46
|
#: consuming or modifying state.
|
|
46
|
-
anything = Syntax(lambda cls: cls.factory('anything')).describe(name="
|
|
47
|
+
anything = Syntax(lambda cls, cache: cls.factory('anything', cache=cache)).describe(name="anything", fixity='infix')
|
|
47
48
|
|
|
48
49
|
def _matches(alg: Algebra[Any, GenState[Any]], data: ParseResult[Any])-> bool:
|
|
49
50
|
state = GenState[Any].from_ast(ast = data, restore_pruned=True)
|
|
@@ -51,7 +52,7 @@ def _matches(alg: Algebra[Any, GenState[Any]], data: ParseResult[Any])-> bool:
|
|
|
51
52
|
return isinstance(result, Right)
|
|
52
53
|
|
|
53
54
|
|
|
54
|
-
def _find(alg: Algebra[Any, GenState[Any]], data: ParseResult[Any]) ->
|
|
55
|
+
def _find(alg: Algebra[Any, GenState[Any]], data: ParseResult[Any]) -> PyGenerator[ParseResult[Any], None, None]:
|
|
55
56
|
if not isinstance(data, (Marked, Collect)):
|
|
56
57
|
if _matches(alg, data):
|
|
57
58
|
yield data
|
|
@@ -88,14 +89,14 @@ def matches(syntax: Syntax[Any, Any], data: ParseResult[Any])-> bool:
|
|
|
88
89
|
Returns:
|
|
89
90
|
bool: ``True`` if the syntax succeeds on ``data``, ``False`` otherwise.
|
|
90
91
|
"""
|
|
91
|
-
gen = syntax(Finder)
|
|
92
|
+
gen = syntax(Finder, Cache())
|
|
92
93
|
if isinstance(data, (Marked, Collect)):
|
|
93
94
|
return _matches(gen, data.value)
|
|
94
95
|
else:
|
|
95
96
|
return _matches(gen, data)
|
|
96
97
|
|
|
97
98
|
|
|
98
|
-
def find(syntax: Syntax[Any, Any], data: ParseResult[Any]) ->
|
|
99
|
+
def find(syntax: Syntax[Any, Any], data: ParseResult[Any]) -> PyGenerator[ParseResult[Any], None, None]:
|
|
99
100
|
"""Yield all subtrees that match a syntax.
|
|
100
101
|
|
|
101
102
|
Performs a depth‑first traversal of ``data`` and yields each node where the
|
|
@@ -111,5 +112,9 @@ def find(syntax: Syntax[Any, Any], data: ParseResult[Any]) -> YieldGen[ParseResu
|
|
|
111
112
|
ParseResult[Any]: Each node that satisfies ``syntax`` (pre‑order: the
|
|
112
113
|
current node is tested before visiting its children).
|
|
113
114
|
"""
|
|
114
|
-
gen = syntax(Finder)
|
|
115
|
-
yield from _find(gen, data)
|
|
115
|
+
gen = syntax(Finder, Cache())
|
|
116
|
+
yield from _find(gen, data)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
|
syncraft/generator.py
CHANGED
|
@@ -2,19 +2,20 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import (
|
|
4
4
|
Any, TypeVar, Tuple, Optional, Callable, Generic,
|
|
5
|
-
List,
|
|
5
|
+
List, Generator as PyGenerator
|
|
6
6
|
)
|
|
7
7
|
from functools import cached_property
|
|
8
8
|
from dataclasses import dataclass, replace
|
|
9
9
|
from syncraft.algebra import (
|
|
10
|
-
Algebra, Either, Left, Right, Error
|
|
10
|
+
Algebra, Either, Left, Right, Error, Incomplete
|
|
11
11
|
)
|
|
12
|
+
from syncraft.cache import Cache
|
|
12
13
|
|
|
13
14
|
from syncraft.ast import (
|
|
14
15
|
ParseResult, AST, Token, TokenSpec,
|
|
15
16
|
Nothing, TokenProtocol,
|
|
16
17
|
Choice, Many, ChoiceKind,
|
|
17
|
-
Then, ThenKind,
|
|
18
|
+
Then, ThenKind, SyncraftError
|
|
18
19
|
)
|
|
19
20
|
from syncraft.constraint import FrozenDict
|
|
20
21
|
from syncraft.syntax import Syntax
|
|
@@ -142,27 +143,6 @@ class GenState(Bindable, Generic[T]):
|
|
|
142
143
|
if isinstance(self.ast, Then) and (self.ast.kind != ThenKind.LEFT or self.restore_pruned):
|
|
143
144
|
return replace(self, ast=self.ast.right)
|
|
144
145
|
return replace(self, ast=None)
|
|
145
|
-
|
|
146
|
-
def down(self, index: int) -> GenState[T]:
|
|
147
|
-
"""Descend through wrapper nodes to reach the contained value.
|
|
148
|
-
|
|
149
|
-
Currently unwraps ``Marked`` nodes. Raises ``TypeError`` for other
|
|
150
|
-
node types.
|
|
151
|
-
|
|
152
|
-
Args:
|
|
153
|
-
index: Placeholder for a future multi-child descent API.
|
|
154
|
-
|
|
155
|
-
Returns:
|
|
156
|
-
GenState[T]: State focused on the unwrapped child or unchanged when
|
|
157
|
-
pruned.
|
|
158
|
-
"""
|
|
159
|
-
if self.ast is None:
|
|
160
|
-
return self
|
|
161
|
-
match self.ast:
|
|
162
|
-
case Marked(value=value):
|
|
163
|
-
return replace(self, ast=value)
|
|
164
|
-
case _:
|
|
165
|
-
raise TypeError(f"Invalid AST type({self.ast}) for down traversal")
|
|
166
146
|
|
|
167
147
|
@classmethod
|
|
168
148
|
def from_ast(cls,
|
|
@@ -236,8 +216,9 @@ class TokenGen(TokenSpec):
|
|
|
236
216
|
|
|
237
217
|
@dataclass(frozen=True)
|
|
238
218
|
class Generator(Algebra[ParseResult[T], GenState[T]]):
|
|
219
|
+
|
|
239
220
|
@classmethod
|
|
240
|
-
def state(cls, ast: Optional[ParseResult[T]] = None, seed: int = 0, restore_pruned: bool = False)->GenState[T]:
|
|
221
|
+
def state(cls, ast: Optional[ParseResult[T]] = None, seed: int = 0, restore_pruned: bool = False)->GenState[T]: # type: ignore
|
|
241
222
|
"""Create an initial ``GenState`` for generation or checking.
|
|
242
223
|
|
|
243
224
|
Args:
|
|
@@ -263,32 +244,26 @@ class Generator(Algebra[ParseResult[T], GenState[T]]):
|
|
|
263
244
|
Returns:
|
|
264
245
|
Algebra[B, GenState[T]]: An algebra yielding the final result.
|
|
265
246
|
"""
|
|
266
|
-
def flat_map_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[B, GenState[T]]]:
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
message=str(e),
|
|
287
|
-
this=self,
|
|
288
|
-
state=input,
|
|
289
|
-
error=e
|
|
290
|
-
))
|
|
291
|
-
return self.__class__(run_f = flat_map_run, name=self.name) # type: ignore
|
|
247
|
+
def flat_map_run(input: GenState[T], use_cache:bool) -> PyGenerator[Incomplete[GenState[T]], GenState[T], Either[Any, Tuple[B, GenState[T]]]]:
|
|
248
|
+
if not isinstance(input.ast, Then) or isinstance(input.ast, Nothing):
|
|
249
|
+
return Left(Error(this=self,
|
|
250
|
+
message=f"Expect Then got {input.ast}",
|
|
251
|
+
state=input))
|
|
252
|
+
lft = input.left()
|
|
253
|
+
self_result = yield from self.run(lft, use_cache=use_cache)
|
|
254
|
+
match self_result:
|
|
255
|
+
case Left(error):
|
|
256
|
+
return Left(error)
|
|
257
|
+
case Right((value, next_input)):
|
|
258
|
+
r = input.right()
|
|
259
|
+
other_result = yield from f(value).run(r, use_cache)
|
|
260
|
+
match other_result:
|
|
261
|
+
case Left(e):
|
|
262
|
+
return Left(e)
|
|
263
|
+
case Right((result, next_input)):
|
|
264
|
+
return Right((result, next_input))
|
|
265
|
+
raise SyncraftError("flat_map should always return a value or an error.", offending=self_result, expect=(Left, Right))
|
|
266
|
+
return self.__class__(flat_map_run, name=self.name, cache=self.cache) # type: ignore
|
|
292
267
|
|
|
293
268
|
|
|
294
269
|
def many(self, *, at_least: int, at_most: Optional[int]) -> Algebra[Many[ParseResult[T]], GenState[T]]:
|
|
@@ -311,15 +286,16 @@ class Generator(Algebra[ParseResult[T], GenState[T]]):
|
|
|
311
286
|
ValueError: If bounds are invalid.
|
|
312
287
|
"""
|
|
313
288
|
if at_least <=0 or (at_most is not None and at_most < at_least):
|
|
314
|
-
raise
|
|
315
|
-
def many_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[Many[ParseResult[T]], GenState[T]]]:
|
|
289
|
+
raise SyncraftError(f"Invalid arguments for many: at_least={at_least}, at_most={at_most}", offending=(at_least, at_most), expect="at_least>0 and (at_most is None or at_most>=at_least)")
|
|
290
|
+
def many_run(input: GenState[T], use_cache:bool) -> PyGenerator[Incomplete[GenState[T]], GenState[T], Either[Any, Tuple[Many[ParseResult[T]], GenState[T]]]]:
|
|
316
291
|
if input.pruned:
|
|
317
292
|
upper = at_most if at_most is not None else at_least + 2
|
|
318
293
|
count = input.rng("many").randint(at_least, upper)
|
|
319
294
|
ret: List[Any] = []
|
|
320
295
|
for i in range(count):
|
|
321
296
|
forked_input = input.fork(tag=len(ret))
|
|
322
|
-
|
|
297
|
+
self_result = yield from self.run(forked_input, use_cache)
|
|
298
|
+
match self_result:
|
|
323
299
|
case Right((value, _)):
|
|
324
300
|
ret.append(value)
|
|
325
301
|
case Left(_):
|
|
@@ -331,8 +307,9 @@ class Generator(Algebra[ParseResult[T], GenState[T]]):
|
|
|
331
307
|
message=f"Expect Many got {input.ast}",
|
|
332
308
|
state=input))
|
|
333
309
|
ret = []
|
|
334
|
-
for x in input.ast.value:
|
|
335
|
-
|
|
310
|
+
for x in input.ast.value:
|
|
311
|
+
self_result = yield from self.run(input.inject(x), use_cache)
|
|
312
|
+
match self_result:
|
|
336
313
|
case Right((value, _)):
|
|
337
314
|
ret.append(value)
|
|
338
315
|
if at_most is not None and len(ret) > at_most:
|
|
@@ -350,7 +327,7 @@ class Generator(Algebra[ParseResult[T], GenState[T]]):
|
|
|
350
327
|
state=input.inject(x)
|
|
351
328
|
))
|
|
352
329
|
return Right((Many(value=tuple(ret)), input))
|
|
353
|
-
return self.__class__(many_run, name=f"many({self.name})") # type: ignore
|
|
330
|
+
return self.__class__(many_run, name=f"many({self.name})", cache=self.cache) # type: ignore
|
|
354
331
|
|
|
355
332
|
|
|
356
333
|
def or_else(self, # type: ignore
|
|
@@ -368,58 +345,70 @@ class Generator(Algebra[ParseResult[T], GenState[T]]):
|
|
|
368
345
|
Algebra[Choice[ParseResult[T], ParseResult[T]], GenState[T]]: An
|
|
369
346
|
algebra yielding which branch succeeded and its value.
|
|
370
347
|
"""
|
|
371
|
-
def or_else_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[Choice[ParseResult[T], ParseResult[T]], GenState[T]]]:
|
|
348
|
+
def or_else_run(input: GenState[T], use_cache:bool) -> PyGenerator[Incomplete[GenState[T]], GenState[T], Either[Any, Tuple[Choice[ParseResult[T], ParseResult[T]], GenState[T]]]]:
|
|
372
349
|
def exec(kind: ChoiceKind | None,
|
|
373
350
|
left: GenState[T],
|
|
374
|
-
right: GenState[T])->Either[Any, Tuple[Choice[ParseResult[T], ParseResult[T]], GenState[T]]]:
|
|
351
|
+
right: GenState[T]) -> PyGenerator[Incomplete[GenState[T]], GenState[T], Either[Any, Tuple[Choice[ParseResult[T], ParseResult[T]], GenState[T]]]]:
|
|
375
352
|
match kind:
|
|
376
353
|
case ChoiceKind.LEFT:
|
|
377
|
-
|
|
354
|
+
self_result = yield from self.run(left, use_cache)
|
|
355
|
+
match self_result:
|
|
378
356
|
case Right((value, next_input)):
|
|
379
357
|
return Right((Choice(kind=ChoiceKind.LEFT, value=value), next_input))
|
|
380
358
|
case Left(error):
|
|
381
359
|
return Left(error)
|
|
382
360
|
case ChoiceKind.RIGHT:
|
|
383
|
-
|
|
361
|
+
other_result = yield from other.run(right, use_cache)
|
|
362
|
+
match other_result:
|
|
384
363
|
case Right((value, next_input)):
|
|
385
364
|
return Right((Choice(kind=ChoiceKind.RIGHT, value=value), next_input))
|
|
386
365
|
case Left(error):
|
|
387
366
|
return Left(error)
|
|
388
367
|
case None:
|
|
389
|
-
|
|
368
|
+
self_result = yield from self.run(left, use_cache)
|
|
369
|
+
match self_result:
|
|
390
370
|
case Right((value, next_input)):
|
|
391
371
|
return Right((Choice(kind=ChoiceKind.LEFT, value=value), next_input))
|
|
392
372
|
case Left(error):
|
|
393
|
-
if isinstance(error, Error)
|
|
394
|
-
|
|
395
|
-
|
|
373
|
+
if isinstance(error, Error):
|
|
374
|
+
if error.fatal:
|
|
375
|
+
return Left(error)
|
|
376
|
+
elif error.committed:
|
|
377
|
+
return Left(replace(error, committed=False))
|
|
378
|
+
other_result = yield from other.run(right, use_cache)
|
|
379
|
+
match other_result:
|
|
396
380
|
case Right((value, next_input)):
|
|
397
381
|
return Right((Choice(kind=ChoiceKind.RIGHT, value=value), next_input))
|
|
398
382
|
case Left(error):
|
|
399
383
|
return Left(error)
|
|
400
|
-
raise
|
|
384
|
+
raise SyncraftError(f"Invalid ChoiceKind: {kind}", offending=kind, expect=(ChoiceKind.LEFT, ChoiceKind.RIGHT, None))
|
|
401
385
|
|
|
402
386
|
if input.pruned:
|
|
403
387
|
forked_input = input.fork(tag="or_else")
|
|
404
388
|
which = forked_input.rng("or_else").choice((ChoiceKind.LEFT, ChoiceKind.RIGHT))
|
|
405
|
-
|
|
389
|
+
result = yield from exec(which, forked_input, forked_input)
|
|
390
|
+
return result
|
|
406
391
|
else:
|
|
407
392
|
if not isinstance(input.ast, Choice) or isinstance(input.ast, Nothing):
|
|
408
393
|
return Left(Error(this=self,
|
|
409
394
|
message=f"Expect Choice got {input.ast}",
|
|
410
395
|
state=input))
|
|
411
396
|
else:
|
|
412
|
-
|
|
397
|
+
result = yield from exec(input.ast.kind,
|
|
413
398
|
input.inject(input.ast.value),
|
|
414
399
|
input.inject(input.ast.value))
|
|
415
|
-
|
|
400
|
+
return result
|
|
401
|
+
|
|
402
|
+
return self.__class__(or_else_run, name=f"or_else({self.name} | {other.name})", cache=self.cache | other.cache) # type: ignore
|
|
416
403
|
|
|
417
404
|
@classmethod
|
|
418
405
|
def token(cls,
|
|
406
|
+
*,
|
|
407
|
+
cache: Cache,
|
|
419
408
|
token_type: Optional[TokenType] = None,
|
|
420
409
|
text: Optional[str] = None,
|
|
421
410
|
case_sensitive: bool = False,
|
|
422
|
-
regex: Optional[re.Pattern[str]] = None
|
|
411
|
+
regex: Optional[re.Pattern[str]] = None,
|
|
423
412
|
)-> Algebra[ParseResult[T], GenState[T]]:
|
|
424
413
|
"""Match or synthesize a single token.
|
|
425
414
|
|
|
@@ -439,7 +428,8 @@ class Generator(Algebra[ParseResult[T], GenState[T]]):
|
|
|
439
428
|
"""
|
|
440
429
|
gen = TokenGen(token_type=token_type, text=text, case_sensitive=case_sensitive, regex=regex)
|
|
441
430
|
lazy_self: Algebra[ParseResult[T], GenState[T]]
|
|
442
|
-
def token_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[ParseResult[Token], GenState[T]]]:
|
|
431
|
+
def token_run(input: GenState[T], use_cache:bool) -> PyGenerator[Incomplete[GenState[T]], GenState[T], Either[Any, Tuple[ParseResult[Token], GenState[T]]]]:
|
|
432
|
+
yield from ()
|
|
443
433
|
if input.pruned:
|
|
444
434
|
return Right((gen.gen(), input))
|
|
445
435
|
else:
|
|
@@ -449,7 +439,7 @@ class Generator(Algebra[ParseResult[T], GenState[T]]):
|
|
|
449
439
|
message=f"Expected a Token({gen.text}), but got {current}.",
|
|
450
440
|
state=input))
|
|
451
441
|
return Right((current, input))
|
|
452
|
-
lazy_self = cls(token_run, name=cls.__name__ + f'.token({token_type or text or regex})') # type: ignore
|
|
442
|
+
lazy_self = cls(token_run, name=cls.__name__ + f'.token({token_type or text or regex})', cache=cache) # type: ignore
|
|
453
443
|
return lazy_self
|
|
454
444
|
|
|
455
445
|
|
|
@@ -459,7 +449,7 @@ def generate_with(
|
|
|
459
449
|
data: Optional[ParseResult[Any]] = None,
|
|
460
450
|
seed: int = 0,
|
|
461
451
|
restore_pruned: bool = False
|
|
462
|
-
) -> Tuple[AST, FrozenDict[str, Tuple[AST, ...]]]
|
|
452
|
+
) -> Tuple[AST, None | FrozenDict[str, Tuple[AST, ...]]]:
|
|
463
453
|
"""
|
|
464
454
|
Generate an AST from the given syntax, optionally constrained by a partial parse result.
|
|
465
455
|
|
|
@@ -473,13 +463,17 @@ def generate_with(
|
|
|
473
463
|
A tuple of (AST, variable bindings) if successful, or (None, None) on failure.
|
|
474
464
|
"""
|
|
475
465
|
from syncraft.syntax import run
|
|
476
|
-
|
|
466
|
+
v, s = run(syntax=syntax, alg=Generator, use_cache=not restore_pruned, ast=data, seed=seed, restore_pruned=restore_pruned)
|
|
467
|
+
if s is not None:
|
|
468
|
+
return v, s.binding.bound()
|
|
469
|
+
else:
|
|
470
|
+
return v, None
|
|
477
471
|
|
|
478
472
|
|
|
479
473
|
def validate(
|
|
480
474
|
syntax: Syntax[Any, Any],
|
|
481
475
|
data: ParseResult[Any]
|
|
482
|
-
) -> Tuple[AST, FrozenDict[str, Tuple[AST, ...]]]
|
|
476
|
+
) -> Tuple[AST, None | FrozenDict[str, Tuple[AST, ...]]]:
|
|
483
477
|
"""
|
|
484
478
|
Validate a parse result (AST) against the given syntax.
|
|
485
479
|
|
|
@@ -491,12 +485,16 @@ def validate(
|
|
|
491
485
|
A tuple of (AST, variable bindings) if valid, or (None, None) if invalid.
|
|
492
486
|
"""
|
|
493
487
|
from syncraft.syntax import run
|
|
494
|
-
|
|
488
|
+
v, s = run(syntax=syntax, alg=Generator, use_cache=True, ast=data, seed=0, restore_pruned=True)
|
|
489
|
+
if s is not None:
|
|
490
|
+
return v, s.binding.bound()
|
|
491
|
+
else:
|
|
492
|
+
return v, None
|
|
495
493
|
|
|
496
494
|
|
|
497
495
|
def generate(
|
|
498
496
|
syntax
|
|
499
|
-
) -> Tuple[AST, FrozenDict[str, Tuple[AST, ...]]]
|
|
497
|
+
) -> Tuple[AST, None | FrozenDict[str, Tuple[AST, ...]]]:
|
|
500
498
|
"""
|
|
501
499
|
Generate a random AST that conforms to the given syntax.
|
|
502
500
|
|
|
@@ -507,4 +505,8 @@ def generate(
|
|
|
507
505
|
A tuple of (AST, variable bindings) if successful, or (None, None) on failure.
|
|
508
506
|
"""
|
|
509
507
|
from syncraft.syntax import run
|
|
510
|
-
|
|
508
|
+
v, s = run(syntax=syntax, alg=Generator, use_cache=False, ast=None, seed=random.randint(0, 2**32 - 1), restore_pruned=False)
|
|
509
|
+
if s is not None:
|
|
510
|
+
return v, s.binding.bound()
|
|
511
|
+
else:
|
|
512
|
+
return v, None
|
syncraft/lexer.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional, Tuple, List, Iterable
|
|
4
|
+
import re
|
|
5
|
+
from enum import Enum, auto
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from dataclasses import dataclass, fields, field
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from typing import Optional, Callable, Dict, List, Tuple, Any, Pattern
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# -----------------------
|
|
15
|
+
# Rule decorator
|
|
16
|
+
# -----------------------
|
|
17
|
+
def rule(state: str = "DEFAULT", next_state: Optional[str] = None):
|
|
18
|
+
"""Decorator to attach lexer metadata to dataclass fields."""
|
|
19
|
+
def wrapper(f):
|
|
20
|
+
setattr(f, "_rule_meta", {"state": state, "next_state": next_state})
|
|
21
|
+
return f
|
|
22
|
+
return wrapper
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class Lexer:
|
|
27
|
+
TokenType: Enum # will be generated automatically
|
|
28
|
+
_states: Dict[str, re.Scanner] = field(default_factory=dict, init=False)
|
|
29
|
+
@classmethod
|
|
30
|
+
def build(cls) -> "Lexer":
|
|
31
|
+
# 1. generate TokenType enum from dataclass fields
|
|
32
|
+
cls.TokenType = Enum(
|
|
33
|
+
cls.__name__ + "Type",
|
|
34
|
+
{f.name: f.name for f in fields(cls)}
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# 2. collect regex rules per state
|
|
38
|
+
states: Dict[str, List[Tuple[str, Callable]]] = {}
|
|
39
|
+
|
|
40
|
+
for f in fields(cls):
|
|
41
|
+
regex = getattr(cls, f.name)
|
|
42
|
+
meta = getattr(f, "_rule_meta", {})
|
|
43
|
+
state = meta.get("state", "DEFAULT")
|
|
44
|
+
next_state = meta.get("next_state")
|
|
45
|
+
|
|
46
|
+
if state not in states:
|
|
47
|
+
states[state] = []
|
|
48
|
+
|
|
49
|
+
states[state].append(
|
|
50
|
+
(regex, cls._make_action(f.name, next_state))
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# 3. build Scanner per state
|
|
54
|
+
instance = cls.__new__(cls)
|
|
55
|
+
instance._states = {
|
|
56
|
+
s: re.Scanner(rules) for s, rules in states.items()
|
|
57
|
+
}
|
|
58
|
+
instance._current_state = "DEFAULT"
|
|
59
|
+
|
|
60
|
+
# make parent accessible to action functions
|
|
61
|
+
for scanner in instance._states.values():
|
|
62
|
+
scanner.parent = instance
|
|
63
|
+
|
|
64
|
+
return instance
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def _make_action(name: str, next_state: Optional[str] = None):
|
|
68
|
+
def action(scanner: Any, token: str):
|
|
69
|
+
if next_state:
|
|
70
|
+
scanner.parent.switch(next_state)
|
|
71
|
+
return (scanner.parent.TokenType[name], token)
|
|
72
|
+
return action
|
|
73
|
+
|
|
74
|
+
def switch(self, state: str) -> None:
|
|
75
|
+
if state not in self._states:
|
|
76
|
+
raise ValueError(f"No such state: {state}")
|
|
77
|
+
self._current_state = state
|
|
78
|
+
|
|
79
|
+
def tokenize(self, text: str) -> List[Tuple[Enum, str]]:
|
|
80
|
+
tokens, remainder = self._states[self._current_state].scan(text)
|
|
81
|
+
if remainder:
|
|
82
|
+
raise SyntaxError(f"Unrecognized input: {remainder!r}")
|
|
83
|
+
return tokens
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# -----------------------
|
|
87
|
+
# Example SQLite-like lexer
|
|
88
|
+
# -----------------------
|
|
89
|
+
@dataclass(frozen=True)
|
|
90
|
+
class SQLiteLexer(Lexer):
|
|
91
|
+
# Default state tokens
|
|
92
|
+
@rule(state="DEFAULT")
|
|
93
|
+
WS: str = r"[ \t\n]+"
|
|
94
|
+
|
|
95
|
+
@rule(state="DEFAULT")
|
|
96
|
+
NUMBER: str = r"\d+(\.\d+)?([eE][+-]?\d+)?"
|
|
97
|
+
|
|
98
|
+
@rule(state="DEFAULT")
|
|
99
|
+
IDENT: str = r"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
100
|
+
|
|
101
|
+
@rule(state="DEFAULT", next_state="STRING")
|
|
102
|
+
STRING_QUOTE: str = r"'"
|
|
103
|
+
|
|
104
|
+
@rule(state="DEFAULT")
|
|
105
|
+
PLUS: str = r"\+"
|
|
106
|
+
|
|
107
|
+
@rule(state="DEFAULT")
|
|
108
|
+
EQ: str = r"="
|
|
109
|
+
|
|
110
|
+
# String state tokens
|
|
111
|
+
@rule(state="STRING")
|
|
112
|
+
STRING_TEXT: str = r"[^']+"
|
|
113
|
+
|
|
114
|
+
@rule(state="STRING", next_state="DEFAULT")
|
|
115
|
+
STRING_END: str = r"'"
|
|
116
|
+
|
|
117
|
+
# Example comment (line comment)
|
|
118
|
+
@rule(state="DEFAULT")
|
|
119
|
+
COMMENT: str = r"--[^\n]*"
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# -----------------------
|
|
123
|
+
# Usage example
|
|
124
|
+
# -----------------------
|
|
125
|
+
if __name__ == "__main__":
|
|
126
|
+
lexer = SQLiteLexer.build()
|
|
127
|
+
sql = "abc + 123 'hello' -- comment"
|
|
128
|
+
|
|
129
|
+
tokens = lexer.tokenize(sql)
|
|
130
|
+
for tok_type, value in tokens:
|
|
131
|
+
print(f"{tok_type.name:12} : {value!r}")
|