syncraft 0.1.9__tar.gz → 0.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of syncraft might be problematic. Click here for more details.

@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: syncraft
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: Parser combinator library
5
5
  Author-email: Michael Afmokt <michael@esacca.com>
6
6
  License-Expression: MIT
7
7
  Keywords: parser,combinator,sql,sqlite,generator,printer
8
- Requires-Python: >=3.9
8
+ Requires-Python: >=3.10
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: rich>=14.1.0
@@ -1,13 +1,13 @@
1
1
  [project]
2
2
  name = "syncraft"
3
- version = "0.1.9"
3
+ version = "0.1.11"
4
4
  description = "Parser combinator library"
5
5
  license = "MIT"
6
6
  license-files = ["LICENSE"]
7
7
  readme = "README.md"
8
8
  authors = [{name = "Michael Afmokt", email = "michael@esacca.com"}]
9
9
  keywords = ["parser", "combinator", "sql", "sqlite", "generator", 'printer']
10
- requires-python = ">=3.9"
10
+ requires-python = ">=3.10"
11
11
  dependencies = [
12
12
  "rich>=14.1.0",
13
13
  "rstr>=3.2.2",
@@ -0,0 +1,202 @@
1
+
2
+
3
+ from __future__ import annotations
4
+ import re
5
+ from typing import (
6
+ Optional, Any, TypeVar, Tuple, runtime_checkable, Dict,
7
+ Protocol, Generic, Callable, Union
8
+ )
9
+ from syncraft.algebra import (
10
+ NamedResult, OrResult,ThenResult, ManyResult, ThenKind,
11
+ Lens
12
+ )
13
+ from dataclasses import dataclass, field, replace, is_dataclass, asdict
14
+ from enum import Enum
15
+ from functools import cached_property
16
+
17
+ @runtime_checkable
18
+ class TokenProtocol(Protocol):
19
+ @property
20
+ def token_type(self) -> Enum: ...
21
+ @property
22
+ def text(self) -> str: ...
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class Token:
27
+ token_type: Enum
28
+ text: str
29
+ def __str__(self) -> str:
30
+ return f"{self.token_type.name}({self.text})"
31
+
32
+ def __repr__(self) -> str:
33
+ return self.__str__()
34
+
35
+ @dataclass(frozen=True)
36
+ class TokenSpec:
37
+ token_type: Optional[Enum] = None
38
+ text: Optional[str] = None
39
+ case_sensitive: bool = False
40
+ regex: Optional[re.Pattern[str]] = None
41
+
42
+ def is_valid(self, token: TokenProtocol) -> bool:
43
+ type_match = self.token_type is None or token.token_type == self.token_type
44
+ value_match = self.text is None or (token.text.strip() == self.text.strip() if self.case_sensitive else
45
+ token.text.strip().upper() == self.text.strip().upper())
46
+ value_match = value_match or (self.regex is not None and self.regex.fullmatch(token.text) is not None)
47
+ return type_match and value_match
48
+
49
+
50
+
51
+
52
+ T = TypeVar('T', bound=TokenProtocol)
53
+
54
+
55
+ ParseResult = Union[
56
+ ThenResult['ParseResult[T]', 'ParseResult[T]'],
57
+ NamedResult['ParseResult[T]', Any],
58
+ ManyResult['ParseResult[T]'],
59
+ OrResult['ParseResult[T]'],
60
+ Tuple[T, ...],
61
+ T,
62
+ ]
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+ @dataclass(frozen=True)
71
+ class NamedRecord:
72
+ lens: Lens[Any, Any]
73
+ value: Any
74
+
75
+ @dataclass(frozen=True)
76
+ class Walker:
77
+ lens: Optional[Lens[Any, Any]] = None
78
+ def get(self, root: ParseResult[Any]) -> Dict[str, NamedRecord]:
79
+ match root:
80
+ case ManyResult(value=children):
81
+ new_named: Dict[str, NamedRecord] = {}
82
+ for i, child in enumerate(children):
83
+ new_walker = replace(self, lens=(self.lens / ManyResult.lens(i)) if self.lens else ManyResult.lens(i))
84
+ new_named |= new_walker.get(child)
85
+ return new_named
86
+ case OrResult(value=value):
87
+ new_walker = replace(self, lens=(self.lens / OrResult.lens()) if self.lens else OrResult.lens())
88
+ return new_walker.get(value)
89
+ case ThenResult(left=left,
90
+ right=right,
91
+ kind=kind):
92
+ new_walker = replace(self, lens=(self.lens / ThenResult.lens(kind)) if self.lens else ThenResult.lens(kind))
93
+ return new_walker.get(left) | new_walker.get(right)
94
+ case NamedResult(name=name,
95
+ value=value,
96
+ forward_map=forward_map,
97
+ backward_map=backward_map,
98
+ aggregator=aggregator):
99
+ this_lens = (self.lens / NamedResult.lens()) if self.lens else NamedResult.lens()
100
+ if callable(forward_map) and callable(backward_map):
101
+ this_lens = this_lens.bimap(forward_map, backward_map)
102
+ elif callable(forward_map):
103
+ this_lens = this_lens.bimap(forward_map, lambda _: value)
104
+ elif callable(backward_map):
105
+ raise ValueError("backward_map provided without forward_map")
106
+ new_walker = replace(self, lens=this_lens)
107
+ child_named = new_walker.get(value)
108
+ if aggregator is not None:
109
+ return child_named | {name: NamedRecord(lens=this_lens,
110
+ value=aggregator(child_named))}
111
+ else:
112
+ return child_named
113
+ return {}
114
+
115
+ def set(self, root: ParseResult[Any], updated_values: Dict[str, Any]) -> ParseResult[Any]:
116
+ named_records = self.get(root)
117
+ def apply_update(name: str, value: Any, root: ParseResult[Any]) -> ParseResult[Any]:
118
+ if name not in named_records:
119
+ # Skip unknown names safely
120
+ return root
121
+ record = named_records[name]
122
+ target_named: NamedResult[Any, Any] = record.lens.get(root)
123
+ assert isinstance(target_named, NamedResult)
124
+
125
+ if target_named.aggregator is not None:
126
+ # Break apart dataclass/dict into child fields
127
+ if isinstance(value, dict):
128
+ child_updates = value
129
+ elif is_dataclass(value) and not isinstance(value, type):
130
+ child_updates = asdict(value)
131
+ else:
132
+ raise TypeError(f"Unsupported aggregator value for '{name}': {type(value)}")
133
+
134
+ # Recursively apply each child update
135
+ for child_name, child_value in child_updates.items():
136
+ root = apply_update(child_name, child_value, root)
137
+ return root
138
+
139
+ else:
140
+ # Leaf: just replace the value
141
+ updated_named = replace(target_named, value=value)
142
+ return record.lens.set(root, updated_named)
143
+
144
+ for name, value in updated_values.items():
145
+ root = apply_update(name, value, root)
146
+
147
+ return root
148
+
149
+ @dataclass(frozen=True)
150
+ class AST(Generic[T]):
151
+ focus: ParseResult[T]
152
+ pruned: bool = False
153
+ parent: Optional[AST[T]] = None
154
+
155
+ def up(self)->Optional[AST[T]]:
156
+ return self.parent
157
+
158
+ def left(self) -> Optional[AST[T]]:
159
+ match self.focus:
160
+ case ThenResult(left=left, kind=kind):
161
+ return replace(self, focus=left, parent=self, pruned = self.pruned or kind == ThenKind.RIGHT)
162
+ case _:
163
+ raise TypeError(f"Invalid focus type({self.focus}) for left traversal")
164
+
165
+ def right(self) -> Optional[AST[T]]:
166
+ match self.focus:
167
+ case ThenResult(right=right, kind=kind):
168
+ return replace(self, focus=right, parent=self, pruned = self.pruned or kind == ThenKind.LEFT)
169
+ case _:
170
+ raise TypeError(f"Invalid focus type({self.focus}) for right traversal")
171
+
172
+
173
+ def down(self, index: int) -> Optional[AST[T]]:
174
+ match self.focus:
175
+ case ManyResult(value=children):
176
+ if 0 <= index < len(children):
177
+ return replace(self, focus=children[index], parent=self, pruned=self.pruned)
178
+ else:
179
+ raise IndexError(f"Index {index} out of bounds for ManyResult with {len(children)} children")
180
+ case OrResult(value=value):
181
+ if index == 0:
182
+ return replace(self, focus=value, parent=self, pruned=self.pruned)
183
+ else:
184
+ raise IndexError(f"Index {index} out of bounds for OrResult")
185
+ case _:
186
+ raise TypeError(f"Invalid focus type({self.focus}) for down traversal")
187
+
188
+ def how_many(self)->int:
189
+ match self.focus:
190
+ case ManyResult(value=children):
191
+ return len(children)
192
+ case _:
193
+ raise TypeError(f"Invalid focus type({self.focus}) for how_many")
194
+
195
+
196
+
197
+ @cached_property
198
+ def root(self) -> AST[T]:
199
+ while self.parent is not None:
200
+ self = self.parent
201
+ return self
202
+
@@ -156,7 +156,7 @@ class DSL(Generic[A, S], Insptectable):
156
156
  def sep_by(self, sep: DSL[Any, S]) -> DSL[ThenResult[A, ManyResult[ThenResult[None, A]]], S]:
157
157
  return (self + (sep >> self).many()).describe(
158
158
  name='sep_by',
159
- fixity='infix',
159
+ fixity='prefix',
160
160
  parameter=[self, sep]
161
161
  )
162
162
 
@@ -0,0 +1,239 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import (
4
+ Any, TypeVar, Tuple, Optional, Callable, Generic, Union, Iterable, Hashable,
5
+ cast, List
6
+ )
7
+ from functools import cached_property
8
+ from dataclasses import dataclass, replace
9
+ from syncraft.algebra import (
10
+ Algebra, ThenResult, Either, Left, Right, Error, Insptectable,
11
+ NamedResult, OrResult, ManyResult, ThenKind
12
+ )
13
+ from syncraft.ast import TokenProtocol, ParseResult, AST, Token, TokenSpec
14
+ from sqlglot import TokenType
15
+ import re
16
+ import rstr
17
+ from functools import lru_cache
18
+ import random
19
+
20
+ B = TypeVar('B')
21
+ T = TypeVar('T', bound=TokenProtocol)
22
+
23
+ GenResult = Union[
24
+ ThenResult['GenResult[T]', 'GenResult[T]'],
25
+ ManyResult['GenResult[T]'],
26
+ OrResult['GenResult[T]'],
27
+ Iterable[T],
28
+ T
29
+ ]
30
+
31
+ @dataclass(frozen=True)
32
+ class GenState(Generic[T], Insptectable):
33
+ ast: Optional[AST[T]]
34
+ seed: int
35
+
36
+ def fork(self, tag: Any) -> GenState[T]:
37
+ return replace(self, seed=hash((self.seed, tag)))
38
+
39
+ def rng(self, tag: Any = None) -> random.Random:
40
+ return random.Random(self.seed if tag is None else hash((self.seed, tag)))
41
+
42
+ def to_string(self, interested: Callable[[Any], bool]) -> str | None:
43
+ return f"GenState(current={self.focus})"
44
+
45
+ @cached_property
46
+ def pruned(self)->bool:
47
+ return self.ast is None or self.ast.pruned
48
+
49
+
50
+ @property
51
+ def focus(self) -> Optional[ParseResult[T]]:
52
+ if self.ast is None:
53
+ return None
54
+ return self.ast.focus
55
+
56
+
57
+ def left(self)-> GenState[T]:
58
+ if self.ast is None:
59
+ return self
60
+ return replace(self, ast=self.ast.left())
61
+
62
+ def right(self) -> GenState[T]:
63
+ if self.ast is None:
64
+ return self
65
+ return replace(self, ast=self.ast.right())
66
+
67
+ def up(self)->GenState[T]:
68
+ if self.ast is None:
69
+ return self
70
+ return replace(self, ast=self.ast.up())
71
+
72
+ def down(self, index: int) -> GenState[T]:
73
+ if self.ast is None:
74
+ return self
75
+ return replace(self, ast=self.ast.down(index))
76
+
77
+ @cached_property
78
+ def how_many(self) -> int:
79
+ if self.ast is None:
80
+ return 0
81
+ return self.ast.how_many()
82
+
83
+ @classmethod
84
+ def from_ast(cls, ast: Optional[AST[T]], seed: int = 0) -> GenState[T]:
85
+ return cls(ast=ast, seed=seed)
86
+
87
+
88
+ @classmethod
89
+ def from_parse_result(cls, parse_result: Optional[ParseResult[T]], seed: int = 0) -> GenState[T]:
90
+ return cls.from_ast(AST(parse_result) if parse_result else None, seed)
91
+
92
+
93
+
94
+
95
+
96
+ @lru_cache(maxsize=None)
97
+ def token_type_from_string(token_type: Optional[TokenType], text: str, case_sensitive:bool)-> TokenType:
98
+ if not isinstance(token_type, TokenType) or token_type == TokenType.VAR:
99
+ for t in TokenType:
100
+ if t.value == text or str(t.value).lower() == text.lower():
101
+ return t
102
+ return TokenType.VAR
103
+ return token_type
104
+
105
+
106
+ @dataclass(frozen=True)
107
+ class TokenGen(TokenSpec):
108
+ def __str__(self) -> str:
109
+ tt = self.token_type.name if self.token_type else ""
110
+ txt = self.text if self.text else ""
111
+ reg = self.regex.pattern if self.regex else ""
112
+ return f"TokenGen({tt}, {txt}, {self.case_sensitive}, {reg})"
113
+
114
+
115
+ def __repr__(self) -> str:
116
+ return self.__str__()
117
+
118
+ def gen(self) -> Token:
119
+ text: str
120
+ if self.text is not None:
121
+ text = self.text
122
+ elif self.regex is not None:
123
+ try:
124
+ text = rstr.xeger(self.regex)
125
+ except Exception as e:
126
+ # If the regex is invalid or generation fails
127
+ text = self.regex.pattern # fallback to pattern string
128
+ elif self.token_type is not None:
129
+ text = str(self.token_type.value)
130
+ else:
131
+ text = "VALUE"
132
+
133
+ return Token(token_type= token_type_from_string(self.token_type,
134
+ text,
135
+ self.case_sensitive),
136
+ text=text)
137
+
138
+
139
+
140
+
141
+ @dataclass(frozen=True)
142
+ class Generator(Algebra[GenResult[T], GenState[T]]):
143
+ def flat_map(self, f: Callable[[GenResult[T]], Algebra[B, GenState[T]]]) -> Algebra[B, GenState[T]]:
144
+ def flat_map_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[B, GenState[T]]]:
145
+ match self.run(input.left(), use_cache=use_cache):
146
+ case Left(error):
147
+ return Left(error)
148
+ case Right((value, next_input)):
149
+ r = input.right()
150
+ return f(value).run(r, use_cache)
151
+ raise ValueError("flat_map should always return a value or an error.")
152
+ return Generator(run_f = flat_map_run, name=self.name) # type: ignore
153
+
154
+
155
+
156
+ def many(self, *, at_least: int, at_most: Optional[int]) -> Algebra[ManyResult[GenResult[T]], GenState[T]]:
157
+ assert at_least > 0, "at_least must be greater than 0"
158
+ assert at_most is None or at_least <= at_most, "at_least must be less than or equal to at_most"
159
+ def many_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[ManyResult[GenResult[T]], GenState[T]]]:
160
+ if input.pruned:
161
+ upper = at_most if at_most is not None else at_least + 2
162
+ count = input.rng("many").randint(at_least, upper)
163
+ ret: List[Any] = []
164
+ for i in range(count):
165
+ forked_input = input.down(0).fork(tag=len(ret))
166
+ match self.run(forked_input, use_cache):
167
+ case Right((value, next_input)):
168
+ ret.append(value)
169
+ case Left(_):
170
+ pass
171
+ return Right((ManyResult(tuple(ret)), input))
172
+ else:
173
+ ret = []
174
+ for index in range(input.how_many):
175
+ match self.run(input.down(index), use_cache):
176
+ case Right((value, next_input)):
177
+ ret.append(value)
178
+ case Left(_):
179
+ pass
180
+ return Right((ManyResult(tuple(ret)), input))
181
+ return self.__class__(many_run, name=f"many({self.name})") # type: ignore
182
+
183
+
184
+ def or_else(self, # type: ignore
185
+ other: Algebra[GenResult[T], GenState[T]]
186
+ ) -> Algebra[OrResult[GenResult[T]], GenState[T]]:
187
+ def or_else_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[OrResult[GenResult[T]], GenState[T]]]:
188
+ if input.pruned:
189
+ forked_input = input.fork(tag="or_else")
190
+ match forked_input.rng("or_else").choice((self, other)).run(forked_input, use_cache):
191
+ case Right((value, next_input)):
192
+ return Right((OrResult(value), next_input))
193
+ case Left(error):
194
+ return Left(error)
195
+ else:
196
+ match self.run(input.down(0), use_cache):
197
+ case Right((value, next_input)):
198
+ return Right((OrResult(value), next_input))
199
+ case Left(error):
200
+ match other.run(input.down(0), use_cache):
201
+ case Right((value, next_input)):
202
+ return Right((OrResult(value), next_input))
203
+ case Left(error):
204
+ return Left(error)
205
+ raise ValueError("or_else should always return a value or an error.")
206
+ return self.__class__(or_else_run, name=f"free_or({self.name} | {other.name})") # type: ignore
207
+
208
+ @classmethod
209
+ def token(cls,
210
+ token_type: Optional[TokenType] = None,
211
+ text: Optional[str] = None,
212
+ case_sensitive: bool = False,
213
+ regex: Optional[re.Pattern[str]] = None
214
+ )-> Algebra[GenResult[T], GenState[T]]:
215
+ gen = TokenGen(token_type=token_type, text=text, case_sensitive=case_sensitive, regex=regex)
216
+ lazy_self: Algebra[GenResult[T], GenState[T]]
217
+ def token_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[GenResult[Token], GenState[T]]]:
218
+ if input.pruned:
219
+ return Right((gen.gen(), input))
220
+ else:
221
+ current = input.focus
222
+ if not isinstance(current, Token) or not gen.is_valid(current):
223
+ return Left(Error(None,
224
+ message=f"Expected a Token, but got {type(current)}.",
225
+ state=input))
226
+ return Right((current, input))
227
+ lazy_self = cls(token_run, name=cls.__name__ + f'.token({token_type or text or regex})') # type: ignore
228
+ return lazy_self
229
+
230
+
231
+
232
+ def generate(gen: Algebra[Any, Any], data: Optional[AST[Any]] = None, seed: int = 0) -> AST[Any] | Any:
233
+ state = GenState.from_ast(data, seed)
234
+ result = gen.run(state, use_cache=False)
235
+ if isinstance(result, Right):
236
+ return AST(result.value[0])
237
+ assert isinstance(result, Left), "Generator must return Either[Any, Tuple[Any, Any]]"
238
+ return result.value
239
+