syncraft 0.1.11__tar.gz → 0.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of syncraft might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: syncraft
3
- Version: 0.1.11
3
+ Version: 0.1.13
4
4
  Summary: Parser combinator library
5
5
  Author-email: Michael Afmokt <michael@esacca.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "syncraft"
3
- version = "0.1.11"
3
+ version = "0.1.13"
4
4
  description = "Parser combinator library"
5
5
  license = "MIT"
6
6
  license-files = ["LICENSE"]
@@ -28,7 +28,7 @@ from typing import (
28
28
  )
29
29
 
30
30
  import traceback
31
- from dataclasses import dataclass, fields, replace
31
+ from dataclasses import dataclass, fields, replace, field
32
32
  from functools import cached_property
33
33
  from weakref import WeakKeyDictionary
34
34
  from abc import ABC, abstractmethod
@@ -54,15 +54,11 @@ class Insptectable(ABC):
54
54
 
55
55
  A = TypeVar('A') # Result type
56
56
  B = TypeVar('B') # Result type for mapping
57
- C = TypeVar('C') # Result type for composing lenses
57
+
58
58
  S = TypeVar('S') # State type for the Algebra
59
59
 
60
60
 
61
61
 
62
-
63
- class StructuralResult:
64
- pass
65
-
66
62
  class FrozenDict(Generic[A]):
67
63
  def __init__(self, items: Mapping[str, A]):
68
64
  for k, v in items.items():
@@ -119,63 +115,41 @@ class Lens(Generic[S, A]):
119
115
 
120
116
  def __rtruediv__(self, other: Lens[B, S])->Lens[B, A]:
121
117
  return other.__truediv__(self)
122
-
123
-
118
+
119
+ class StructuralResult:
120
+ def bimap(self, ctx: Any)->Tuple[Any, Callable[[Any], StructuralResult]]:
121
+ return (self, lambda x: self)
124
122
 
125
- @dataclass(eq=True, frozen=True)
126
- class NamedResult(Generic[A, B], StructuralResult):
127
- value: A
123
+
124
+ @dataclass(frozen=True)
125
+ class NamedResult(Generic[A], StructuralResult):
128
126
  name: str
129
- forward_map: Callable[[A], B] | None = None
130
- backward_map: Callable[[B], A] | None = None
131
- aggregator: Callable[..., Any] | None = None
132
-
133
- def __post_init__(self)->None:
134
- if (self.forward_map or self.backward_map) and self.aggregator is not None:
135
- raise ValueError("NamedResult can have either bimap or aggregator, never both.")
136
-
137
- @staticmethod
138
- def lens() -> Lens[NamedResult[A, B], A]:
139
- def get(data: NamedResult[A, B]) -> A:
140
- return data.value
141
-
142
- def set(data: NamedResult[A, B], value: A) -> NamedResult[A, B]:
143
- return replace(data, value = value)
144
-
145
- return Lens(get=get, set=set)
146
-
147
-
127
+ value: A
128
+ def bimap(self, ctx: Any)->Tuple[NamedResult[Any], Callable[[NamedResult[Any]], StructuralResult]]:
129
+ value, backward = self.value.bimap(ctx) if isinstance(self.value, StructuralResult) else (self.value, lambda x: x)
130
+ return NamedResult(self.name, value), lambda data: NamedResult(self.name, backward(data))
148
131
 
149
132
  @dataclass(eq=True, frozen=True)
150
133
  class ManyResult(Generic[A], StructuralResult):
151
134
  value: Tuple[A, ...]
135
+ def bimap(self, ctx: Any)->Tuple[List[Any], Callable[[List[Any]], StructuralResult]]:
136
+ transformed = [v.bimap(ctx) if isinstance(v, StructuralResult) else (v, lambda x: x) for v in self.value]
137
+ backmaps = [b for (_, b) in transformed]
138
+ ret = [a for (a, _) in transformed]
139
+ def backward(data: List[Any]) -> StructuralResult:
140
+ if len(data) != len(transformed):
141
+ raise ValueError("Incompatible data length")
142
+ return ManyResult(value=tuple([backmaps[i](x) for i, x in enumerate(data)]))
143
+ return ret, lambda data: backward(data)
152
144
 
153
- @staticmethod
154
- def lens(index: int) -> Lens[ManyResult[A], A]:
155
- def get(data: ManyResult[A]) -> A:
156
- return data.value[index]
157
-
158
- def set(data: ManyResult[A], value: A) -> ManyResult[A]:
159
- new_value = list(data.value)
160
- new_value[index] = value
161
- return ManyResult(value=tuple(new_value))
162
-
163
- return Lens(get=get, set=set)
164
145
 
165
146
 
166
147
  @dataclass(eq=True, frozen=True)
167
148
  class OrResult(Generic[A], StructuralResult):
168
149
  value: A
169
-
170
- @staticmethod
171
- def lens() -> Lens[OrResult[A], A]:
172
- def get(data: OrResult[A]) -> A:
173
- return data.value
174
-
175
- def set(data: OrResult[A], value: A) -> OrResult[A]:
176
- return OrResult(value=value)
177
-
178
- return Lens(get=get, set=set)
150
+ def bimap(self, ctx: Any) -> Tuple[Any, Callable[[Any], StructuralResult]]:
151
+ value, backward = self.value.bimap(ctx) if isinstance(self.value, StructuralResult) else (self.value, lambda x: x)
152
+ return value, lambda data: OrResult(value=backward(data))
179
153
 
180
154
 
181
155
  class ThenKind(Enum):
@@ -188,89 +162,54 @@ class ThenResult(Generic[A, B], StructuralResult):
188
162
  kind: ThenKind
189
163
  left: A
190
164
  right: B
191
- @cached_property
192
- def flatten(self)-> Tuple[Any, ...]:
193
- def _flatten_side(side: Any)->Tuple[Any, ...]:
194
- return side.flatten if isinstance(side, ThenResult) else (side,)
165
+ def bimap(self, ctx: Any) -> Tuple[Any, Callable[[Any], StructuralResult]]:
166
+ def branch(b: Any) -> Tuple[Any, Callable[[Any], StructuralResult]]:
167
+ if isinstance(b, ThenResult):
168
+ value, backward = b.bimap(ctx)
169
+ x, y = ThenResult.flat((value, backward))
170
+ return x, lambda data: ThenResult(self.kind, y(data), self.right)
171
+ elif isinstance(b, StructuralResult):
172
+ return b.bimap(ctx)
173
+ else:
174
+ return b, lambda x: x
195
175
  match self.kind:
196
176
  case ThenKind.BOTH:
197
- return _flatten_side(self.left) + _flatten_side(self.right)
177
+ left_value, left_bmap = branch(self.left)
178
+ right_value, right_bmap = branch(self.right)
179
+ def backward(x: Tuple[Any, Any]) -> StructuralResult:
180
+ return ThenResult(self.kind, left_bmap(x[0]), right_bmap(x[1]))
181
+ x, y = ThenResult.flat((left_value, right_value))
182
+ return x, lambda data: backward(y(data))
198
183
  case ThenKind.LEFT:
199
- return _flatten_side(self.left)
184
+ left_value, left_bmap = branch(self.left)
185
+ return left_value, lambda data: ThenResult(self.kind, left_bmap(data), self.right)
200
186
  case ThenKind.RIGHT:
201
- return _flatten_side(self.right)
202
-
187
+ right_value, right_bmap = branch(self.right)
188
+ return right_value, lambda data: ThenResult(self.kind, self.left, right_bmap(data))
203
189
  @staticmethod
204
- def lens(kind: ThenKind) -> Lens[ThenResult[A, B], Tuple[A, B]] | Lens[ThenResult[A, B], A] | Lens[ThenResult[A, B], B]:
205
- def both_lens() -> Lens[ThenResult[A, B], Tuple[A, B]]:
206
- def get(data: ThenResult[A, B]) -> Tuple[A, B]:
207
- match data:
208
- case ThenResult(left=left, right=right, kind=ThenKind.BOTH):
209
- return left, right
210
- case _:
211
- raise ValueError(f"Unexpected ThenResult type: {type(data)}")
212
-
213
- def set(data: ThenResult[A, B], value: Tuple[A, B] | ThenResult[A, B]) -> ThenResult[A, B]:
214
- match data:
215
- case ThenResult(left=_, right=_, kind=ThenKind.BOTH):
216
- if isinstance(value, tuple) and len(value) == 2:
217
- return ThenResult(left=value[0], right=value[1], kind=ThenKind.BOTH)
218
- elif isinstance(value, ThenResult):
219
- return ThenResult(left=value.left, right=value.right, kind=ThenKind.BOTH)
220
- else:
221
- raise ValueError(f"Expected a tuple or ThenResult, got: {type(value)}")
222
- case _:
223
- raise ValueError(f"Unexpected ThenResult type: {type(data)}")
224
- return Lens(get=get, set=set)
225
-
226
- def left_lens()-> Lens[ThenResult[A, B], A]:
227
- def left_get(data: ThenResult[A, B]) -> A:
228
- match data:
229
- case ThenResult(left=left, right=_, kind=ThenKind.LEFT):
230
- return left
231
- case _:
232
- raise ValueError(f"Unexpected ParseResult type: {type(data)}")
233
-
234
- def left_set(data: ThenResult[A, B], v: A) -> ThenResult[A, B]:
235
- match data:
236
- case ThenResult(kind=ThenKind.LEFT):
237
- return replace(data, left=v)
238
- case _:
239
- raise ValueError(f"Unexpected ParseResult type: {type(data)}")
240
- return
241
- return Lens(
242
- get=left_get,
243
- set=left_set
244
- )
245
-
246
- def right_lens()-> Lens[ThenResult[A, B], B]:
247
- def right_get(data: ThenResult[A, B]) -> B:
248
- match data:
249
- case ThenResult(left=_, right=right, kind=ThenKind.RIGHT):
250
- return right
251
- case _:
252
- raise ValueError(f"Unexpected ParseResult type: {type(data)}")
253
-
254
- def right_set(data: ThenResult[A, B], v: B) -> ThenResult[A, B]:
255
- match data:
256
- case ThenResult(kind=ThenKind.RIGHT):
257
- return replace(data, right=v)
258
- case _:
259
- raise ValueError(f"Unexpected ParseResult type: {type(data)}")
260
- return
261
- return Lens(
262
- get=right_get,
263
- set=right_set
264
- )
265
- match kind:
266
- case ThenKind.BOTH:
267
- return both_lens()
268
- case ThenKind.LEFT:
269
- return left_lens()
270
- case ThenKind.RIGHT:
271
- return right_lens()
272
- case _:
273
- raise ValueError(f"Unknown ThenKind: {kind}")
190
+ def flat(array: Tuple[Any, Any]) -> Tuple[Tuple[Any, ...], Callable[[Tuple[Any, ...]], Tuple[Any, Any]]]:
191
+ index: Dict[int, int] = {}
192
+ ret: List[Any] = []
193
+ for e in array:
194
+ if isinstance(e, tuple):
195
+ index[len(ret)] = len(e)
196
+ ret.extend(e)
197
+ else:
198
+ ret.append(e)
199
+ def backward(data: Tuple[Any, ...]) -> Tuple[Any, Any]:
200
+ tmp: List[Any] = []
201
+ skip: int = 0
202
+ for i, e in enumerate(data):
203
+ if skip <= 0:
204
+ if i in index:
205
+ tmp.append(tuple(data[i:i + index[i]]))
206
+ skip = index[i] - 1
207
+ else:
208
+ tmp.append(e)
209
+ else:
210
+ skip -= 1
211
+ return tuple(tmp)
212
+ return tuple(ret), backward
274
213
 
275
214
 
276
215
  InProgress = object() # Marker for in-progress state, used to prevent re-entrance in recursive calls
@@ -0,0 +1,125 @@
1
+
2
+
3
+ from __future__ import annotations
4
+ import re
5
+ from typing import (
6
+ Optional, Any, TypeVar, Tuple, runtime_checkable, Dict,
7
+ Protocol, Generic, Callable, Union
8
+ )
9
+ from syncraft.algebra import (
10
+ OrResult,ThenResult, ManyResult, ThenKind,NamedResult, StructuralResult,
11
+ Lens
12
+ )
13
+ from dataclasses import dataclass, replace, is_dataclass, asdict
14
+ from enum import Enum
15
+ from functools import cached_property
16
+
17
+ @runtime_checkable
18
+ class TokenProtocol(Protocol):
19
+ @property
20
+ def token_type(self) -> Enum: ...
21
+ @property
22
+ def text(self) -> str: ...
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class Token:
27
+ token_type: Enum
28
+ text: str
29
+ def __str__(self) -> str:
30
+ return f"{self.token_type.name}({self.text})"
31
+
32
+ def __repr__(self) -> str:
33
+ return self.__str__()
34
+
35
+ @dataclass(frozen=True)
36
+ class TokenSpec:
37
+ token_type: Optional[Enum] = None
38
+ text: Optional[str] = None
39
+ case_sensitive: bool = False
40
+ regex: Optional[re.Pattern[str]] = None
41
+
42
+ def is_valid(self, token: TokenProtocol) -> bool:
43
+ type_match = self.token_type is None or token.token_type == self.token_type
44
+ value_match = self.text is None or (token.text.strip() == self.text.strip() if self.case_sensitive else
45
+ token.text.strip().upper() == self.text.strip().upper())
46
+ value_match = value_match or (self.regex is not None and self.regex.fullmatch(token.text) is not None)
47
+ return type_match and value_match
48
+
49
+
50
+
51
+
52
+ T = TypeVar('T', bound=TokenProtocol)
53
+
54
+
55
+ ParseResult = Union[
56
+ ThenResult['ParseResult[T]', 'ParseResult[T]'],
57
+ NamedResult['ParseResult[T]'],
58
+ ManyResult['ParseResult[T]'],
59
+ OrResult['ParseResult[T]'],
60
+ T,
61
+ ]
62
+ @dataclass(frozen=True)
63
+ class AST(Generic[T]):
64
+ focus: ParseResult[T]
65
+ pruned: bool = False
66
+ parent: Optional[AST[T]] = None
67
+
68
+ def bimap(self, ctx: Any) -> Tuple[Any, Callable[[Any], AST[T]]]:
69
+ value, backward = self.focus.bimap(ctx) if isinstance(self.focus, StructuralResult) else (self.focus, lambda x: x)
70
+ def back2ast(data: Any) -> AST[T]:
71
+ return replace(self, focus=backward(data)) # type: ignore
72
+ return value, back2ast
73
+
74
+ def up(self)->Optional[AST[T]]:
75
+ return self.parent
76
+
77
+ def left(self) -> Optional[AST[T]]:
78
+ focus = self.focus.value if isinstance(self.focus, NamedResult) else self.focus
79
+ match focus:
80
+ case ThenResult(left=left, kind=kind):
81
+ return replace(self, focus=left, parent=self, pruned = self.pruned or kind == ThenKind.RIGHT)
82
+ case _:
83
+ raise TypeError(f"Invalid focus type({self.focus}) for left traversal")
84
+
85
+ def right(self) -> Optional[AST[T]]:
86
+ focus = self.focus.value if isinstance(self.focus, NamedResult) else self.focus
87
+ match focus:
88
+ case ThenResult(right=right, kind=kind):
89
+ return replace(self, focus=right, parent=self, pruned = self.pruned or kind == ThenKind.LEFT)
90
+ case _:
91
+ raise TypeError(f"Invalid focus type({self.focus}) for right traversal")
92
+
93
+
94
+ def down(self, index: int) -> Optional[AST[T]]:
95
+ focus = self.focus.value if isinstance(self.focus, NamedResult) else self.focus
96
+ match focus:
97
+ case ManyResult(value=children):
98
+ if 0 <= index < len(children):
99
+ return replace(self, focus=children[index], parent=self, pruned=self.pruned)
100
+ else:
101
+ raise IndexError(f"Index {index} out of bounds for ManyResult with {len(children)} children")
102
+ case OrResult(value=value):
103
+ if index == 0:
104
+ return replace(self, focus=value, parent=self, pruned=self.pruned)
105
+ else:
106
+ raise IndexError(f"Index {index} out of bounds for OrResult")
107
+ case _:
108
+ raise TypeError(f"Invalid focus type({self.focus}) for down traversal")
109
+
110
+ def how_many(self)->int:
111
+ focus = self.focus.value if isinstance(self.focus, NamedResult) else self.focus
112
+ match focus:
113
+ case ManyResult(value=children):
114
+ return len(children)
115
+ case _:
116
+ raise TypeError(f"Invalid focus type({self.focus}) for how_many")
117
+
118
+
119
+
120
+ @cached_property
121
+ def root(self) -> AST[T]:
122
+ while self.parent is not None:
123
+ self = self.parent
124
+ return self
125
+
@@ -6,7 +6,7 @@ from typing import (
6
6
  )
7
7
  from dataclasses import dataclass, field
8
8
  from functools import reduce
9
- from syncraft.algebra import Algebra, Error, Either, Insptectable, NamedResult, ThenResult, ManyResult, ThenKind
9
+ from syncraft.algebra import Algebra, Error, Either, Insptectable, ThenResult, ManyResult, ThenKind
10
10
  from types import MethodType, FunctionType
11
11
 
12
12
 
@@ -1,14 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from typing import (
4
- Any, TypeVar, Tuple, Optional, Callable, Generic, Union, Iterable, Hashable,
5
- cast, List
4
+ Any, TypeVar, Tuple, Optional, Callable, Generic, Union, Iterable,
5
+ List
6
6
  )
7
7
  from functools import cached_property
8
8
  from dataclasses import dataclass, replace
9
9
  from syncraft.algebra import (
10
10
  Algebra, ThenResult, Either, Left, Right, Error, Insptectable,
11
- NamedResult, OrResult, ManyResult, ThenKind
11
+ OrResult, ManyResult
12
12
  )
13
13
  from syncraft.ast import TokenProtocol, ParseResult, AST, Token, TokenSpec
14
14
  from sqlglot import TokenType
@@ -142,7 +142,8 @@ class TokenGen(TokenSpec):
142
142
  class Generator(Algebra[GenResult[T], GenState[T]]):
143
143
  def flat_map(self, f: Callable[[GenResult[T]], Algebra[B, GenState[T]]]) -> Algebra[B, GenState[T]]:
144
144
  def flat_map_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[B, GenState[T]]]:
145
- match self.run(input.left(), use_cache=use_cache):
145
+ lft = input.left()
146
+ match self.run(lft, use_cache=use_cache):
146
147
  case Left(error):
147
148
  return Left(error)
148
149
  case Right((value, next_input)):
@@ -175,8 +176,20 @@ class Generator(Algebra[GenResult[T], GenState[T]]):
175
176
  match self.run(input.down(index), use_cache):
176
177
  case Right((value, next_input)):
177
178
  ret.append(value)
179
+ if at_most is not None and len(ret) > at_most:
180
+ return Left(Error(
181
+ message=f"Expected at most {at_most} matches, got {len(ret)}",
182
+ this=self,
183
+ state=input.down(index)
184
+ ))
178
185
  case Left(_):
179
186
  pass
187
+ if len(ret) < at_least:
188
+ return Left(Error(
189
+ message=f"Expected at least {at_least} matches, got {len(ret)}",
190
+ this=self,
191
+ state=input.down(index)
192
+ ))
180
193
  return Right((ManyResult(tuple(ret)), input))
181
194
  return self.__class__(many_run, name=f"many({self.name})") # type: ignore
182
195
 
@@ -203,7 +216,7 @@ class Generator(Algebra[GenResult[T], GenState[T]]):
203
216
  case Left(error):
204
217
  return Left(error)
205
218
  raise ValueError("or_else should always return a value or an error.")
206
- return self.__class__(or_else_run, name=f"free_or({self.name} | {other.name})") # type: ignore
219
+ return self.__class__(or_else_run, name=f"or_else({self.name} | {other.name})") # type: ignore
207
220
 
208
221
  @classmethod
209
222
  def token(cls,
@@ -111,52 +111,79 @@ class Parser(Algebra[Tuple[T,...] | T, ParserState[T]]):
111
111
  inclusive: bool = True,
112
112
  strict: bool = True) -> Algebra[Any, ParserState[T]]:
113
113
  def until_run(state: ParserState[T], use_cache:bool) -> Either[Any, Tuple[Any, ParserState[T]]]:
114
- counters = [0] * len(open_close)
114
+ # Use a stack to enforce proper nesting across multiple open/close pairs.
115
115
  tokens: List[Any] = []
116
116
  if not terminator and len(open_close) == 0:
117
- return Left(Error(this=until_run, message="No terminator and no open/close parsers, nothing to parse", state=state))
118
- def run_oc(s: ParserState[T],
119
- sign: int,
120
- *oc: Algebra[Any, ParserState[T]])->Tuple[bool, ParserState[T]]:
121
- matched = False
122
- for i, p in enumerate(oc):
123
- new = p.run(s, use_cache)
124
- if isinstance(new, Right):
125
- matched = True
126
- counters[i] += sign
127
- if inclusive:
128
- tokens.append(new.value[0])
129
- s = new.value[1]
130
- return matched, s
117
+ return Left(Error(this=until_run, message="No terminator and no open/close parsers, nothing to parse", state=state))
118
+
119
+ # Helper to try matching any of the parsers once, returning early on first match
120
+ def try_match(s: ParserState[T], *parsers: Algebra[Any, ParserState[T]]) -> Tuple[bool, Optional[int], Optional[Any], ParserState[T]]:
121
+ for i, p in enumerate(parsers):
122
+ res = p.run(s, use_cache)
123
+ if isinstance(res, Right):
124
+ val, ns = res.value
125
+ return True, i, val, ns
126
+ return False, None, None, s
127
+
131
128
  opens, closes = zip(*open_close) if len(open_close) > 0 else ((), ())
132
129
  tmp_state: ParserState[T] = state.copy()
133
- if strict:
134
- c = reduce(lambda a, b: a.or_else(b), opens).run(tmp_state)
130
+ stack: List[int] = [] # indices into open_close indicating expected closer
131
+
132
+ # If strict, require the very next token to be an opener of any kind
133
+ if strict and len(opens) > 0:
134
+ c = reduce(lambda a, b: a.or_else(b), opens).run(tmp_state, use_cache)
135
135
  if c.is_left():
136
- return Left(Error(
137
- this=until_run,
138
- message="No opening parser matched",
139
- state=tmp_state
140
- ))
136
+ return Left(Error(this=until_run, message="No opening parser matched", state=tmp_state))
137
+
141
138
  while not tmp_state.ended():
142
- mopen, tmp_state = run_oc(tmp_state, 1, *opens)
143
- mclose, tmp_state = run_oc(tmp_state, -1, *closes)
144
- matched = mopen or mclose
145
- if all(c == 0 for c in counters):
146
- if terminator :
147
- new = terminator.run(tmp_state, use_cache)
148
- if isinstance(new, Right):
149
- matched = True
139
+ # Try to open
140
+ o_matched, o_idx, o_tok, o_state = try_match(tmp_state, *opens)
141
+ if o_matched and o_idx is not None:
142
+ stack.append(o_idx)
143
+ if inclusive:
144
+ tokens.append(o_tok)
145
+ tmp_state = o_state
146
+ continue
147
+
148
+ # Try to close
149
+ c_matched, c_idx, c_tok, c_state = try_match(tmp_state, *closes)
150
+ if c_matched and c_idx is not None:
151
+ if not stack or stack[-1] != c_idx:
152
+ return Left(Error(this=until_run, message="Mismatched closing parser", state=tmp_state))
153
+ stack.pop()
154
+ if inclusive:
155
+ tokens.append(c_tok)
156
+ tmp_state = c_state
157
+ # After closing, if stack empty, we may terminate on a terminator
158
+ if len(stack) == 0:
159
+ if terminator:
160
+ term = terminator.run(tmp_state, use_cache)
161
+ if isinstance(term, Right):
162
+ if inclusive:
163
+ tokens.append(term.value[0])
164
+ return Right((tuple(tokens), term.value[1]))
165
+ else:
166
+ return Right((tuple(tokens), tmp_state))
167
+ continue
168
+
169
+ # If nothing structural matched, check termination when not nested
170
+ if len(stack) == 0:
171
+ if terminator:
172
+ term2 = terminator.run(tmp_state, use_cache)
173
+ if isinstance(term2, Right):
150
174
  if inclusive:
151
- tokens.append(new.value[0])
152
- return Right((tuple(tokens), new.value[1]))
175
+ tokens.append(term2.value[0])
176
+ return Right((tuple(tokens), term2.value[1]))
153
177
  else:
154
178
  return Right((tuple(tokens), tmp_state))
155
- elif any(c < 0 for c in counters):
156
- return Left(Error(this=until_run, message="Unmatched closing parser", state=tmp_state))
157
- if not matched:
158
- tokens.append(tmp_state.current())
159
- tmp_state = tmp_state.advance()
179
+
180
+ # Otherwise, consume one token as payload and continue
181
+ tokens.append(tmp_state.current())
182
+ tmp_state = tmp_state.advance()
183
+
184
+ # Reached end of input
185
+ if len(stack) != 0:
186
+ return Left(Error(this=until_run, message="Unterminated group", state=tmp_state))
160
187
  return Right((tuple(tokens), tmp_state))
161
188
  return cls(until_run, name=cls.__name__ + '.until')
162
189
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: syncraft
3
- Version: 0.1.11
3
+ Version: 0.1.13
4
4
  Summary: Parser combinator library
5
5
  Author-email: Michael Afmokt <michael@esacca.com>
6
6
  License-Expression: MIT
@@ -16,4 +16,5 @@ syncraft.egg-info/SOURCES.txt
16
16
  syncraft.egg-info/dependency_links.txt
17
17
  syncraft.egg-info/requires.txt
18
18
  syncraft.egg-info/top_level.txt
19
- tests/test_parse.py
19
+ tests/test_parse.py
20
+ tests/test_until.py
@@ -1,7 +1,6 @@
1
1
  from syncraft.parser import AST, literal, variable, parse, Parser
2
2
  import syncraft.generator as gen
3
3
  from typing import Any
4
- from rich import print
5
4
 
6
5
  IF = literal("if")
7
6
  ELSE = literal("else")
@@ -32,5 +31,5 @@ def test_many_or()->None:
32
31
  syntax = (IF.many() | THEN.many()).many() // END
33
32
  sql = "if if then end"
34
33
  ast:AST[Any] = parse(syntax(Parser), sql, dialect='sqlite')
35
- generated = gen.generate(syntax(gen.Generator))
36
- # assert ast == generated, "Parsed and generated results do not match."
34
+ generated = gen.generate(syntax(gen.Generator), ast)
35
+ assert ast == generated, "Parsed and generated results do not match."
@@ -0,0 +1,40 @@
1
+ from typing import Any
2
+ from syncraft.parser import parse, until, literal, Parser
3
+ from syncraft.ast import AST
4
+
5
+ # Define common pair DSLs
6
+ LP, RP = literal("("), literal(")")
7
+ LB, RB = literal("["), literal("]")
8
+
9
+
10
+ def test_until_accepts_proper_nesting() -> None:
11
+ sql = "([])"
12
+ syntax = until((LP, RP), (LB, RB))
13
+ ast: AST[Any] | Any = parse(syntax(Parser), sql, dialect="sqlite")
14
+ assert isinstance(ast, AST), f"Expected AST for proper nesting, got {ast}"
15
+
16
+
17
+ def test_until_rejects_mismatched_pairs() -> None:
18
+ # Mismatched: ( ] should fail immediately
19
+ sql = "(]"
20
+ syntax = until((LP, RP), (LB, RB))
21
+ res = parse(syntax(Parser), sql, dialect="sqlite")
22
+ from syncraft.algebra import Error
23
+ assert isinstance(res, Error), "Mismatched pairs should be rejected with an Error"
24
+
25
+ def test_until_rejects_unterminated_group() -> None:
26
+ # Unterminated: ( ... EOF
27
+ sql = "("
28
+ syntax = until((LP, RP))
29
+ res = parse(syntax(Parser), sql, dialect="sqlite")
30
+ from syncraft.algebra import Error
31
+ assert isinstance(res, Error), "Unterminated group should be rejected with an Error"
32
+
33
+ def test_until_rejects_crossing_pairs() -> None:
34
+ # Crossing/interleaved: ([)] should be rejected
35
+ sql = "([)]"
36
+ syntax = until((LP, RP), (LB, RB))
37
+ # Use postgres dialect so [ and ] are tokenized distinctly (not as bracketed identifier)
38
+ res = parse(syntax(Parser), sql, dialect="postgres")
39
+ from syncraft.algebra import Error
40
+ assert isinstance(res, Error), "Crossing pairs should be rejected with an Error"
@@ -1,202 +0,0 @@
1
-
2
-
3
- from __future__ import annotations
4
- import re
5
- from typing import (
6
- Optional, Any, TypeVar, Tuple, runtime_checkable, Dict,
7
- Protocol, Generic, Callable, Union
8
- )
9
- from syncraft.algebra import (
10
- NamedResult, OrResult,ThenResult, ManyResult, ThenKind,
11
- Lens
12
- )
13
- from dataclasses import dataclass, field, replace, is_dataclass, asdict
14
- from enum import Enum
15
- from functools import cached_property
16
-
17
- @runtime_checkable
18
- class TokenProtocol(Protocol):
19
- @property
20
- def token_type(self) -> Enum: ...
21
- @property
22
- def text(self) -> str: ...
23
-
24
-
25
- @dataclass(frozen=True)
26
- class Token:
27
- token_type: Enum
28
- text: str
29
- def __str__(self) -> str:
30
- return f"{self.token_type.name}({self.text})"
31
-
32
- def __repr__(self) -> str:
33
- return self.__str__()
34
-
35
- @dataclass(frozen=True)
36
- class TokenSpec:
37
- token_type: Optional[Enum] = None
38
- text: Optional[str] = None
39
- case_sensitive: bool = False
40
- regex: Optional[re.Pattern[str]] = None
41
-
42
- def is_valid(self, token: TokenProtocol) -> bool:
43
- type_match = self.token_type is None or token.token_type == self.token_type
44
- value_match = self.text is None or (token.text.strip() == self.text.strip() if self.case_sensitive else
45
- token.text.strip().upper() == self.text.strip().upper())
46
- value_match = value_match or (self.regex is not None and self.regex.fullmatch(token.text) is not None)
47
- return type_match and value_match
48
-
49
-
50
-
51
-
52
- T = TypeVar('T', bound=TokenProtocol)
53
-
54
-
55
- ParseResult = Union[
56
- ThenResult['ParseResult[T]', 'ParseResult[T]'],
57
- NamedResult['ParseResult[T]', Any],
58
- ManyResult['ParseResult[T]'],
59
- OrResult['ParseResult[T]'],
60
- Tuple[T, ...],
61
- T,
62
- ]
63
-
64
-
65
-
66
-
67
-
68
-
69
-
70
- @dataclass(frozen=True)
71
- class NamedRecord:
72
- lens: Lens[Any, Any]
73
- value: Any
74
-
75
- @dataclass(frozen=True)
76
- class Walker:
77
- lens: Optional[Lens[Any, Any]] = None
78
- def get(self, root: ParseResult[Any]) -> Dict[str, NamedRecord]:
79
- match root:
80
- case ManyResult(value=children):
81
- new_named: Dict[str, NamedRecord] = {}
82
- for i, child in enumerate(children):
83
- new_walker = replace(self, lens=(self.lens / ManyResult.lens(i)) if self.lens else ManyResult.lens(i))
84
- new_named |= new_walker.get(child)
85
- return new_named
86
- case OrResult(value=value):
87
- new_walker = replace(self, lens=(self.lens / OrResult.lens()) if self.lens else OrResult.lens())
88
- return new_walker.get(value)
89
- case ThenResult(left=left,
90
- right=right,
91
- kind=kind):
92
- new_walker = replace(self, lens=(self.lens / ThenResult.lens(kind)) if self.lens else ThenResult.lens(kind))
93
- return new_walker.get(left) | new_walker.get(right)
94
- case NamedResult(name=name,
95
- value=value,
96
- forward_map=forward_map,
97
- backward_map=backward_map,
98
- aggregator=aggregator):
99
- this_lens = (self.lens / NamedResult.lens()) if self.lens else NamedResult.lens()
100
- if callable(forward_map) and callable(backward_map):
101
- this_lens = this_lens.bimap(forward_map, backward_map)
102
- elif callable(forward_map):
103
- this_lens = this_lens.bimap(forward_map, lambda _: value)
104
- elif callable(backward_map):
105
- raise ValueError("backward_map provided without forward_map")
106
- new_walker = replace(self, lens=this_lens)
107
- child_named = new_walker.get(value)
108
- if aggregator is not None:
109
- return child_named | {name: NamedRecord(lens=this_lens,
110
- value=aggregator(child_named))}
111
- else:
112
- return child_named
113
- return {}
114
-
115
- def set(self, root: ParseResult[Any], updated_values: Dict[str, Any]) -> ParseResult[Any]:
116
- named_records = self.get(root)
117
- def apply_update(name: str, value: Any, root: ParseResult[Any]) -> ParseResult[Any]:
118
- if name not in named_records:
119
- # Skip unknown names safely
120
- return root
121
- record = named_records[name]
122
- target_named: NamedResult[Any, Any] = record.lens.get(root)
123
- assert isinstance(target_named, NamedResult)
124
-
125
- if target_named.aggregator is not None:
126
- # Break apart dataclass/dict into child fields
127
- if isinstance(value, dict):
128
- child_updates = value
129
- elif is_dataclass(value) and not isinstance(value, type):
130
- child_updates = asdict(value)
131
- else:
132
- raise TypeError(f"Unsupported aggregator value for '{name}': {type(value)}")
133
-
134
- # Recursively apply each child update
135
- for child_name, child_value in child_updates.items():
136
- root = apply_update(child_name, child_value, root)
137
- return root
138
-
139
- else:
140
- # Leaf: just replace the value
141
- updated_named = replace(target_named, value=value)
142
- return record.lens.set(root, updated_named)
143
-
144
- for name, value in updated_values.items():
145
- root = apply_update(name, value, root)
146
-
147
- return root
148
-
149
- @dataclass(frozen=True)
150
- class AST(Generic[T]):
151
- focus: ParseResult[T]
152
- pruned: bool = False
153
- parent: Optional[AST[T]] = None
154
-
155
- def up(self)->Optional[AST[T]]:
156
- return self.parent
157
-
158
- def left(self) -> Optional[AST[T]]:
159
- match self.focus:
160
- case ThenResult(left=left, kind=kind):
161
- return replace(self, focus=left, parent=self, pruned = self.pruned or kind == ThenKind.RIGHT)
162
- case _:
163
- raise TypeError(f"Invalid focus type({self.focus}) for left traversal")
164
-
165
- def right(self) -> Optional[AST[T]]:
166
- match self.focus:
167
- case ThenResult(right=right, kind=kind):
168
- return replace(self, focus=right, parent=self, pruned = self.pruned or kind == ThenKind.LEFT)
169
- case _:
170
- raise TypeError(f"Invalid focus type({self.focus}) for right traversal")
171
-
172
-
173
- def down(self, index: int) -> Optional[AST[T]]:
174
- match self.focus:
175
- case ManyResult(value=children):
176
- if 0 <= index < len(children):
177
- return replace(self, focus=children[index], parent=self, pruned=self.pruned)
178
- else:
179
- raise IndexError(f"Index {index} out of bounds for ManyResult with {len(children)} children")
180
- case OrResult(value=value):
181
- if index == 0:
182
- return replace(self, focus=value, parent=self, pruned=self.pruned)
183
- else:
184
- raise IndexError(f"Index {index} out of bounds for OrResult")
185
- case _:
186
- raise TypeError(f"Invalid focus type({self.focus}) for down traversal")
187
-
188
- def how_many(self)->int:
189
- match self.focus:
190
- case ManyResult(value=children):
191
- return len(children)
192
- case _:
193
- raise TypeError(f"Invalid focus type({self.focus}) for how_many")
194
-
195
-
196
-
197
- @cached_property
198
- def root(self) -> AST[T]:
199
- while self.parent is not None:
200
- self = self.parent
201
- return self
202
-
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes