syncraft 0.1.12__tar.gz → 0.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of syncraft might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: syncraft
3
- Version: 0.1.12
3
+ Version: 0.1.14
4
4
  Summary: Parser combinator library
5
5
  Author-email: Michael Afmokt <michael@esacca.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "syncraft"
3
- version = "0.1.12"
3
+ version = "0.1.14"
4
4
  description = "Parser combinator library"
5
5
  license = "MIT"
6
6
  license-files = ["LICENSE"]
@@ -28,7 +28,7 @@ from typing import (
28
28
  )
29
29
 
30
30
  import traceback
31
- from dataclasses import dataclass, fields, replace
31
+ from dataclasses import dataclass, fields, replace, field
32
32
  from functools import cached_property
33
33
  from weakref import WeakKeyDictionary
34
34
  from abc import ABC, abstractmethod
@@ -54,15 +54,11 @@ class Insptectable(ABC):
54
54
 
55
55
  A = TypeVar('A') # Result type
56
56
  B = TypeVar('B') # Result type for mapping
57
- C = TypeVar('C') # Result type for composing lenses
57
+
58
58
  S = TypeVar('S') # State type for the Algebra
59
59
 
60
60
 
61
61
 
62
-
63
- class StructuralResult:
64
- pass
65
-
66
62
  class FrozenDict(Generic[A]):
67
63
  def __init__(self, items: Mapping[str, A]):
68
64
  for k, v in items.items():
@@ -119,63 +115,47 @@ class Lens(Generic[S, A]):
119
115
 
120
116
  def __rtruediv__(self, other: Lens[B, S])->Lens[B, A]:
121
117
  return other.__truediv__(self)
122
-
123
-
118
+
119
+ class StructuralResult:
120
+ def bimap(self, ctx: Any)->Tuple[Any, Callable[[Any], StructuralResult]]:
121
+ return (self, lambda x: self)
124
122
 
125
- @dataclass(eq=True, frozen=True)
126
- class NamedResult(Generic[A, B], StructuralResult):
127
- value: A
123
+
124
+ @dataclass(frozen=True)
125
+ class NamedResult(Generic[A], StructuralResult):
128
126
  name: str
129
- forward_map: Callable[[A], B] | None = None
130
- backward_map: Callable[[B], A] | None = None
131
- aggregator: Callable[..., Any] | None = None
132
-
133
- def __post_init__(self)->None:
134
- if (self.forward_map or self.backward_map) and self.aggregator is not None:
135
- raise ValueError("NamedResult can have either bimap or aggregator, never both.")
136
-
137
- @staticmethod
138
- def lens() -> Lens[NamedResult[A, B], A]:
139
- def get(data: NamedResult[A, B]) -> A:
140
- return data.value
141
-
142
- def set(data: NamedResult[A, B], value: A) -> NamedResult[A, B]:
143
- return replace(data, value = value)
144
-
145
- return Lens(get=get, set=set)
146
-
147
-
127
+ value: A
128
+ def bimap(self, ctx: Any)->Tuple[NamedResult[Any], Callable[[NamedResult[Any]], StructuralResult]]:
129
+ value, backward = self.value.bimap(ctx) if isinstance(self.value, StructuralResult) else (self.value, lambda x: x)
130
+ def named_back(data: Any)->NamedResult[Any]:
131
+ v = backward(data)
132
+ if isinstance(v, NamedResult):
133
+ return replace(v, name=self.name)
134
+ else:
135
+ return NamedResult(name=self.name, value=v)
136
+ return NamedResult(self.name, value), named_back
148
137
 
149
138
  @dataclass(eq=True, frozen=True)
150
139
  class ManyResult(Generic[A], StructuralResult):
151
140
  value: Tuple[A, ...]
141
+ def bimap(self, ctx: Any)->Tuple[List[Any], Callable[[List[Any]], StructuralResult]]:
142
+ transformed = [v.bimap(ctx) if isinstance(v, StructuralResult) else (v, lambda x: x) for v in self.value]
143
+ backmaps = [b for (_, b) in transformed]
144
+ ret = [a for (a, _) in transformed]
145
+ def backward(data: List[Any]) -> StructuralResult:
146
+ if len(data) != len(transformed):
147
+ raise ValueError("Incompatible data length")
148
+ return ManyResult(value=tuple([backmaps[i](x) for i, x in enumerate(data)]))
149
+ return ret, lambda data: backward(data)
152
150
 
153
- @staticmethod
154
- def lens(index: int) -> Lens[ManyResult[A], A]:
155
- def get(data: ManyResult[A]) -> A:
156
- return data.value[index]
157
-
158
- def set(data: ManyResult[A], value: A) -> ManyResult[A]:
159
- new_value = list(data.value)
160
- new_value[index] = value
161
- return ManyResult(value=tuple(new_value))
162
-
163
- return Lens(get=get, set=set)
164
151
 
165
152
 
166
153
  @dataclass(eq=True, frozen=True)
167
154
  class OrResult(Generic[A], StructuralResult):
168
155
  value: A
169
-
170
- @staticmethod
171
- def lens() -> Lens[OrResult[A], A]:
172
- def get(data: OrResult[A]) -> A:
173
- return data.value
174
-
175
- def set(data: OrResult[A], value: A) -> OrResult[A]:
176
- return OrResult(value=value)
177
-
178
- return Lens(get=get, set=set)
156
+ def bimap(self, ctx: Any) -> Tuple[Any, Callable[[Any], StructuralResult]]:
157
+ value, backward = self.value.bimap(ctx) if isinstance(self.value, StructuralResult) else (self.value, lambda x: x)
158
+ return value, lambda data: OrResult(value=backward(data))
179
159
 
180
160
 
181
161
  class ThenKind(Enum):
@@ -188,89 +168,57 @@ class ThenResult(Generic[A, B], StructuralResult):
188
168
  kind: ThenKind
189
169
  left: A
190
170
  right: B
191
- @cached_property
192
- def flatten(self)-> Tuple[Any, ...]:
193
- def _flatten_side(side: Any)->Tuple[Any, ...]:
194
- return side.flatten if isinstance(side, ThenResult) else (side,)
171
+ def bimap(self, ctx: Any) -> Tuple[Any, Callable[[Any], StructuralResult]]:
172
+ def branch(b: Any) -> Tuple[Any, Callable[[Any], StructuralResult]]:
173
+ if isinstance(b, ThenResult):
174
+ value, backward = b.bimap(ctx)
175
+ if isinstance(value, tuple):
176
+ x, y = ThenResult.flat(value)
177
+ return x, lambda data: ThenResult(self.kind, y(data), self.right)
178
+ else:
179
+ return value, backward
180
+ elif isinstance(b, StructuralResult):
181
+ return b.bimap(ctx)
182
+ else:
183
+ return b, lambda x: x
195
184
  match self.kind:
196
185
  case ThenKind.BOTH:
197
- return _flatten_side(self.left) + _flatten_side(self.right)
186
+ left_value, left_bmap = branch(self.left)
187
+ right_value, right_bmap = branch(self.right)
188
+ def backward(x: Tuple[Any, Any]) -> StructuralResult:
189
+ return ThenResult(self.kind, left_bmap(x[0]), right_bmap(x[1]))
190
+ x, y = ThenResult.flat((left_value, right_value))
191
+ return x, lambda data: backward(y(data))
198
192
  case ThenKind.LEFT:
199
- return _flatten_side(self.left)
193
+ left_value, left_bmap = branch(self.left)
194
+ return left_value, lambda data: ThenResult(self.kind, left_bmap(data), self.right)
200
195
  case ThenKind.RIGHT:
201
- return _flatten_side(self.right)
202
-
196
+ right_value, right_bmap = branch(self.right)
197
+ return right_value, lambda data: ThenResult(self.kind, self.left, right_bmap(data))
203
198
  @staticmethod
204
- def lens(kind: ThenKind) -> Lens[ThenResult[A, B], Tuple[A, B]] | Lens[ThenResult[A, B], A] | Lens[ThenResult[A, B], B]:
205
- def both_lens() -> Lens[ThenResult[A, B], Tuple[A, B]]:
206
- def get(data: ThenResult[A, B]) -> Tuple[A, B]:
207
- match data:
208
- case ThenResult(left=left, right=right, kind=ThenKind.BOTH):
209
- return left, right
210
- case _:
211
- raise ValueError(f"Unexpected ThenResult type: {type(data)}")
212
-
213
- def set(data: ThenResult[A, B], value: Tuple[A, B] | ThenResult[A, B]) -> ThenResult[A, B]:
214
- match data:
215
- case ThenResult(left=_, right=_, kind=ThenKind.BOTH):
216
- if isinstance(value, tuple) and len(value) == 2:
217
- return ThenResult(left=value[0], right=value[1], kind=ThenKind.BOTH)
218
- elif isinstance(value, ThenResult):
219
- return ThenResult(left=value.left, right=value.right, kind=ThenKind.BOTH)
220
- else:
221
- raise ValueError(f"Expected a tuple or ThenResult, got: {type(value)}")
222
- case _:
223
- raise ValueError(f"Unexpected ThenResult type: {type(data)}")
224
- return Lens(get=get, set=set)
225
-
226
- def left_lens()-> Lens[ThenResult[A, B], A]:
227
- def left_get(data: ThenResult[A, B]) -> A:
228
- match data:
229
- case ThenResult(left=left, right=_, kind=ThenKind.LEFT):
230
- return left
231
- case _:
232
- raise ValueError(f"Unexpected ParseResult type: {type(data)}")
233
-
234
- def left_set(data: ThenResult[A, B], v: A) -> ThenResult[A, B]:
235
- match data:
236
- case ThenResult(kind=ThenKind.LEFT):
237
- return replace(data, left=v)
238
- case _:
239
- raise ValueError(f"Unexpected ParseResult type: {type(data)}")
240
- return
241
- return Lens(
242
- get=left_get,
243
- set=left_set
244
- )
245
-
246
- def right_lens()-> Lens[ThenResult[A, B], B]:
247
- def right_get(data: ThenResult[A, B]) -> B:
248
- match data:
249
- case ThenResult(left=_, right=right, kind=ThenKind.RIGHT):
250
- return right
251
- case _:
252
- raise ValueError(f"Unexpected ParseResult type: {type(data)}")
253
-
254
- def right_set(data: ThenResult[A, B], v: B) -> ThenResult[A, B]:
255
- match data:
256
- case ThenResult(kind=ThenKind.RIGHT):
257
- return replace(data, right=v)
258
- case _:
259
- raise ValueError(f"Unexpected ParseResult type: {type(data)}")
260
- return
261
- return Lens(
262
- get=right_get,
263
- set=right_set
264
- )
265
- match kind:
266
- case ThenKind.BOTH:
267
- return both_lens()
268
- case ThenKind.LEFT:
269
- return left_lens()
270
- case ThenKind.RIGHT:
271
- return right_lens()
272
- case _:
273
- raise ValueError(f"Unknown ThenKind: {kind}")
199
+ def flat(array: Tuple[Any, Any]) -> Tuple[Tuple[Any, ...], Callable[[Tuple[Any, ...]], Tuple[Any, Any]]]:
200
+ index: Dict[int, int] = {}
201
+ ret: List[Any] = []
202
+ for e in array:
203
+ if isinstance(e, tuple):
204
+ index[len(ret)] = len(e)
205
+ ret.extend(e)
206
+ else:
207
+ ret.append(e)
208
+ def backward(data: Tuple[Any, ...]) -> Tuple[Any, Any]:
209
+ tmp: List[Any] = []
210
+ skip: int = 0
211
+ for i, e in enumerate(data):
212
+ if skip <= 0:
213
+ if i in index:
214
+ tmp.append(tuple(data[i:i + index[i]]))
215
+ skip = index[i] - 1
216
+ else:
217
+ tmp.append(e)
218
+ else:
219
+ skip -= 1
220
+ return tuple(tmp)
221
+ return tuple(ret), backward
274
222
 
275
223
 
276
224
  InProgress = object() # Marker for in-progress state, used to prevent re-entrance in recursive calls
@@ -0,0 +1,132 @@
1
+
2
+
3
+ from __future__ import annotations
4
+ import re
5
+ from typing import (
6
+ Optional, Any, TypeVar, Tuple, runtime_checkable, Dict,
7
+ Protocol, Generic, Callable, Union, cast
8
+ )
9
+ from syncraft.algebra import (
10
+ OrResult,ThenResult, ManyResult, ThenKind,NamedResult, StructuralResult,
11
+ Lens
12
+ )
13
+ from dataclasses import dataclass, replace, is_dataclass, asdict
14
+ from enum import Enum
15
+ from functools import cached_property
16
+
17
+ @runtime_checkable
18
+ class TokenProtocol(Protocol):
19
+ @property
20
+ def token_type(self) -> Enum: ...
21
+ @property
22
+ def text(self) -> str: ...
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class Token:
27
+ token_type: Enum
28
+ text: str
29
+ def __str__(self) -> str:
30
+ return f"{self.token_type.name}({self.text})"
31
+
32
+ def __repr__(self) -> str:
33
+ return self.__str__()
34
+
35
+ @dataclass(frozen=True)
36
+ class TokenSpec:
37
+ token_type: Optional[Enum] = None
38
+ text: Optional[str] = None
39
+ case_sensitive: bool = False
40
+ regex: Optional[re.Pattern[str]] = None
41
+
42
+ def is_valid(self, token: TokenProtocol) -> bool:
43
+ type_match = self.token_type is None or token.token_type == self.token_type
44
+ value_match = self.text is None or (token.text.strip() == self.text.strip() if self.case_sensitive else
45
+ token.text.strip().upper() == self.text.strip().upper())
46
+ value_match = value_match or (self.regex is not None and self.regex.fullmatch(token.text) is not None)
47
+ return type_match and value_match
48
+
49
+
50
+
51
+
52
+ T = TypeVar('T', bound=TokenProtocol)
53
+
54
+
55
+ ParseResult = Union[
56
+ ThenResult['ParseResult[T]', 'ParseResult[T]'],
57
+ NamedResult['ParseResult[T]'],
58
+ ManyResult['ParseResult[T]'],
59
+ OrResult['ParseResult[T]'],
60
+ T,
61
+ ]
62
+ @dataclass(frozen=True)
63
+ class AST(Generic[T]):
64
+ focus: ParseResult[T]
65
+ pruned: bool = False
66
+ parent: Optional[AST[T]] = None
67
+
68
+
69
+ def bimap(self, ctx: Any) -> Tuple[Any, Callable[[Any], AST[T]]]:
70
+ value, backward = self.focus.bimap(ctx) if isinstance(self.focus, StructuralResult) else (self.focus, lambda x: x)
71
+ def back2ast(data: Any) -> AST[T]:
72
+ return replace(self, focus=backward(data)) # type: ignore
73
+ return value, back2ast
74
+
75
+ def wrapper(self)-> Callable[[Any], Any]:
76
+ if isinstance(self.focus, NamedResult):
77
+ focus = cast(NamedResult[Any], self.focus)
78
+ return lambda x: NamedResult(name = focus.name, value = x)
79
+ else:
80
+ return lambda x: x
81
+
82
+ def is_named(self) -> bool:
83
+ return isinstance(self.focus, NamedResult)
84
+
85
+ def left(self) -> Optional[AST[T]]:
86
+ match self.focus:
87
+ case ThenResult(left=left, kind=kind):
88
+ return replace(self, focus=left, parent=self, pruned = self.pruned or kind == ThenKind.RIGHT)
89
+ case _:
90
+ raise TypeError(f"Invalid focus type({self.focus}) for left traversal")
91
+
92
+ def right(self) -> Optional[AST[T]]:
93
+ match self.focus:
94
+ case ThenResult(right=right, kind=kind):
95
+ return replace(self, focus=right, parent=self, pruned = self.pruned or kind == ThenKind.LEFT)
96
+ case _:
97
+ raise TypeError(f"Invalid focus type({self.focus}) for right traversal")
98
+
99
+
100
+ def down(self, index: int) -> Optional[AST[T]]:
101
+ match self.focus:
102
+ case ManyResult(value=children):
103
+ if 0 <= index < len(children):
104
+ return replace(self, focus=children[index], parent=self, pruned=self.pruned)
105
+ else:
106
+ raise IndexError(f"Index {index} out of bounds for ManyResult with {len(children)} children")
107
+ case OrResult(value=value):
108
+ if index == 0:
109
+ return replace(self, focus=value, parent=self, pruned=self.pruned)
110
+ else:
111
+ raise IndexError(f"Index {index} out of bounds for OrResult")
112
+ case NamedResult(value=value):
113
+ return replace(self, focus=value, parent=self, pruned=self.pruned)
114
+ case _:
115
+ raise TypeError(f"Invalid focus type({self.focus}) for down traversal")
116
+
117
+ def how_many(self)->int:
118
+ focus = self.focus.value if isinstance(self.focus, NamedResult) else self.focus
119
+ match focus:
120
+ case ManyResult(value=children):
121
+ return len(children)
122
+ case _:
123
+ raise TypeError(f"Invalid focus type({self.focus}) for how_many")
124
+
125
+
126
+
127
+ @cached_property
128
+ def root(self) -> AST[T]:
129
+ while self.parent is not None:
130
+ self = self.parent
131
+ return self
132
+
@@ -4,9 +4,9 @@ from typing import (
4
4
  Optional, List, Any, TypeVar, Generic, Callable, Tuple, cast,
5
5
  Type, Literal
6
6
  )
7
- from dataclasses import dataclass, field
7
+ from dataclasses import dataclass, field, replace
8
8
  from functools import reduce
9
- from syncraft.algebra import Algebra, Error, Either, Insptectable, NamedResult, ThenResult, ManyResult, ThenKind
9
+ from syncraft.algebra import Algebra, Error, Either, Insptectable, ThenResult, ManyResult, ThenKind, NamedResult
10
10
  from types import MethodType, FunctionType
11
11
 
12
12
 
@@ -226,75 +226,14 @@ class DSL(Generic[A, S], Insptectable):
226
226
 
227
227
 
228
228
  ######################################################################## data processing combinators #########################################################
229
-
230
- def _attach(self,
231
- name: str,
232
- *,
233
- forward_map: Callable[[B], C] | None,
234
- backward_map: Callable[[C], B] | None,
235
- aggregator_f: Callable[..., Any] | None,
236
- ) -> DSL[NamedResult[A, C], S]:
237
- def attach_f(x: A | NamedResult[A, B]) -> NamedResult[A, C]:
238
- if isinstance(x, NamedResult):
239
- if x.backward_map is not None:
240
- b_f = x.backward_map
241
- def combined_bf(a: Any)->A:
242
- if backward_map is not None:
243
- return b_f(backward_map(a))
244
- else:
245
- return b_f(a)
246
- if x.forward_map is not None:
247
- f_f = x.forward_map
248
- def combined_ff(a: Any)->Any:
249
- if forward_map is not None:
250
- return forward_map(f_f(a))
251
- else:
252
- return f_f(a)
253
- if x.aggregator is not None:
254
- agg_f = x.aggregator
255
- def combined_agg(a: Any)->Any:
256
- if aggregator_f is not None:
257
- return aggregator_f(agg_f(a))
258
- else:
259
- return agg_f(a)
260
- return NamedResult(
261
- name=name,
262
- value=x.value,
263
- forward_map=forward_map if x.forward_map is None else combined_ff, # type: ignore
264
- backward_map=backward_map if x.backward_map is None else combined_bf, # type: ignore
265
- aggregator=aggregator_f if x.aggregator is None else combined_agg
266
- )
229
+ def bind(self, name: str) -> DSL[NamedResult[A], S]:
230
+ def bind_f(value: A) -> NamedResult[A]:
231
+ if isinstance(value, NamedResult):
232
+ return replace(value, name=name)
267
233
  else:
268
- return NamedResult(
269
- name=name,
270
- value=x,
271
- forward_map=forward_map, # type: ignore
272
- backward_map=backward_map, # type: ignore
273
- aggregator=aggregator_f,
274
-
275
- )
276
-
277
- return self.map(attach_f)
278
-
279
- def bind(self, name: str) -> DSL[NamedResult[Any, Any], S]:
280
- return self._attach(name,
281
- forward_map=None,
282
- backward_map=None,
283
- aggregator_f=None,
284
- ).describe(name=f'bind("{name}")', fixity='postfix', parameter=[self])
285
-
286
- def bimap(self, name: str, *, forward_map: Callable[[Any], Any], backward_map: Callable[[Any], Any]) -> DSL[NamedResult[Any, Any], S]:
287
- return self._attach(name,
288
- forward_map=forward_map,
289
- backward_map=backward_map,
290
- aggregator_f=None,
291
- ).describe(name=f'bimap("{name}")', fixity='postfix', parameter=[self])
292
- def to(self, name: str, aggregator_f: Callable[..., Any]) -> DSL[NamedResult[A, Any], S]:
293
- return self._attach(name,
294
- forward_map=None,
295
- backward_map=None,
296
- aggregator_f=aggregator_f,
297
- ).describe(name=f'to("{name}")', fixity='postfix', parameter=[self])
234
+ return NamedResult(name=name, value=value)
235
+ return self.map(bind_f).describe(name=f'bind("{name}")', fixity='postfix', parameter=[self])
236
+
298
237
 
299
238
  def dump_error(self, formatter: Optional[Callable[[Error], None]] = None) -> DSL[A, S]:
300
239
  def dump_error_run(err: Any)->Any:
@@ -1,14 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from typing import (
4
- Any, TypeVar, Tuple, Optional, Callable, Generic, Union, Iterable, Hashable,
5
- cast, List
4
+ Any, TypeVar, Tuple, Optional, Callable, Generic, Union,
5
+ List
6
6
  )
7
7
  from functools import cached_property
8
8
  from dataclasses import dataclass, replace
9
9
  from syncraft.algebra import (
10
10
  Algebra, ThenResult, Either, Left, Right, Error, Insptectable,
11
- NamedResult, OrResult, ManyResult, ThenKind
11
+ OrResult, ManyResult
12
12
  )
13
13
  from syncraft.ast import TokenProtocol, ParseResult, AST, Token, TokenSpec
14
14
  from sqlglot import TokenType
@@ -24,7 +24,7 @@ GenResult = Union[
24
24
  ThenResult['GenResult[T]', 'GenResult[T]'],
25
25
  ManyResult['GenResult[T]'],
26
26
  OrResult['GenResult[T]'],
27
- Iterable[T],
27
+
28
28
  T
29
29
  ]
30
30
 
@@ -52,7 +52,16 @@ class GenState(Generic[T], Insptectable):
52
52
  if self.ast is None:
53
53
  return None
54
54
  return self.ast.focus
55
+
56
+ @property
57
+ def is_named(self)->bool:
58
+ return self.ast is not None and self.ast.is_named()
55
59
 
60
+ def wrapper(self)->Callable[[Any], Any]:
61
+ if self.ast is not None:
62
+ return self.ast.wrapper()
63
+ else:
64
+ return lambda x: x
56
65
 
57
66
  def left(self)-> GenState[T]:
58
67
  if self.ast is None:
@@ -64,10 +73,7 @@ class GenState(Generic[T], Insptectable):
64
73
  return self
65
74
  return replace(self, ast=self.ast.right())
66
75
 
67
- def up(self)->GenState[T]:
68
- if self.ast is None:
69
- return self
70
- return replace(self, ast=self.ast.up())
76
+
71
77
 
72
78
  def down(self, index: int) -> GenState[T]:
73
79
  if self.ast is None:
@@ -142,13 +148,19 @@ class TokenGen(TokenSpec):
142
148
  class Generator(Algebra[GenResult[T], GenState[T]]):
143
149
  def flat_map(self, f: Callable[[GenResult[T]], Algebra[B, GenState[T]]]) -> Algebra[B, GenState[T]]:
144
150
  def flat_map_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[B, GenState[T]]]:
145
- lft = input.left()
151
+ wrapper = input.wrapper()
152
+ input = input if not input.is_named else input.down(0) # If the input is named, we need to go down to the first child
153
+ lft = input.left()
146
154
  match self.run(lft, use_cache=use_cache):
147
155
  case Left(error):
148
156
  return Left(error)
149
157
  case Right((value, next_input)):
150
- r = input.right()
151
- return f(value).run(r, use_cache)
158
+ r = input.right()
159
+ match f(value).run(r, use_cache):
160
+ case Left(e):
161
+ return Left(e)
162
+ case Right((result, next_input)):
163
+ return Right((wrapper(result), next_input))
152
164
  raise ValueError("flat_map should always return a value or an error.")
153
165
  return Generator(run_f = flat_map_run, name=self.name) # type: ignore
154
166
 
@@ -158,6 +170,8 @@ class Generator(Algebra[GenResult[T], GenState[T]]):
158
170
  assert at_least > 0, "at_least must be greater than 0"
159
171
  assert at_most is None or at_least <= at_most, "at_least must be less than or equal to at_most"
160
172
  def many_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[ManyResult[GenResult[T]], GenState[T]]]:
173
+ wrapper = input.wrapper()
174
+ input = input if not input.is_named else input.down(0) # If the input is named, we need to go down to the first child
161
175
  if input.pruned:
162
176
  upper = at_most if at_most is not None else at_least + 2
163
177
  count = input.rng("many").randint(at_least, upper)
@@ -169,7 +183,7 @@ class Generator(Algebra[GenResult[T], GenState[T]]):
169
183
  ret.append(value)
170
184
  case Left(_):
171
185
  pass
172
- return Right((ManyResult(tuple(ret)), input))
186
+ return Right((wrapper(ManyResult(tuple(ret))), input))
173
187
  else:
174
188
  ret = []
175
189
  for index in range(input.how_many):
@@ -190,7 +204,7 @@ class Generator(Algebra[GenResult[T], GenState[T]]):
190
204
  this=self,
191
205
  state=input.down(index)
192
206
  ))
193
- return Right((ManyResult(tuple(ret)), input))
207
+ return Right((wrapper(ManyResult(tuple(ret))), input))
194
208
  return self.__class__(many_run, name=f"many({self.name})") # type: ignore
195
209
 
196
210
 
@@ -198,21 +212,23 @@ class Generator(Algebra[GenResult[T], GenState[T]]):
198
212
  other: Algebra[GenResult[T], GenState[T]]
199
213
  ) -> Algebra[OrResult[GenResult[T]], GenState[T]]:
200
214
  def or_else_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[OrResult[GenResult[T]], GenState[T]]]:
215
+ wrapper = input.wrapper()
216
+ input = input if not input.is_named else input.down(0) # If the input is named, we need to go down to the first child
201
217
  if input.pruned:
202
218
  forked_input = input.fork(tag="or_else")
203
219
  match forked_input.rng("or_else").choice((self, other)).run(forked_input, use_cache):
204
220
  case Right((value, next_input)):
205
- return Right((OrResult(value), next_input))
221
+ return Right((wrapper(OrResult(value)), next_input))
206
222
  case Left(error):
207
223
  return Left(error)
208
224
  else:
209
225
  match self.run(input.down(0), use_cache):
210
226
  case Right((value, next_input)):
211
- return Right((OrResult(value), next_input))
227
+ return Right((wrapper(OrResult(value)), next_input))
212
228
  case Left(error):
213
229
  match other.run(input.down(0), use_cache):
214
230
  case Right((value, next_input)):
215
- return Right((OrResult(value), next_input))
231
+ return Right((wrapper(OrResult(value)), next_input))
216
232
  case Left(error):
217
233
  return Left(error)
218
234
  raise ValueError("or_else should always return a value or an error.")
@@ -228,6 +244,8 @@ class Generator(Algebra[GenResult[T], GenState[T]]):
228
244
  gen = TokenGen(token_type=token_type, text=text, case_sensitive=case_sensitive, regex=regex)
229
245
  lazy_self: Algebra[GenResult[T], GenState[T]]
230
246
  def token_run(input: GenState[T], use_cache:bool) -> Either[Any, Tuple[GenResult[Token], GenState[T]]]:
247
+ wrapper = input.wrapper()
248
+ input = input if not input.is_named else input.down(0) # If the input is named, we need to go down to the first child
231
249
  if input.pruned:
232
250
  return Right((gen.gen(), input))
233
251
  else:
@@ -236,7 +254,7 @@ class Generator(Algebra[GenResult[T], GenState[T]]):
236
254
  return Left(Error(None,
237
255
  message=f"Expected a Token, but got {type(current)}.",
238
256
  state=input))
239
- return Right((current, input))
257
+ return Right((wrapper(current), input))
240
258
  lazy_self = cls(token_run, name=cls.__name__ + f'.token({token_type or text or regex})') # type: ignore
241
259
  return lazy_self
242
260
 
@@ -77,23 +77,23 @@ class ParserState(Generic[T], Insptectable):
77
77
 
78
78
 
79
79
  @dataclass(frozen=True)
80
- class Parser(Algebra[Tuple[T,...] | T, ParserState[T]]):
80
+ class Parser(Algebra[T, ParserState[T]]):
81
81
  @classmethod
82
82
  def token(cls,
83
83
  token_type: Optional[Enum] = None,
84
84
  text: Optional[str] = None,
85
85
  case_sensitive: bool = False,
86
86
  regex: Optional[re.Pattern[str]] = None
87
- )-> Algebra[Tuple[T,...] | T, ParserState[T]]:
87
+ )-> Algebra[T, ParserState[T]]:
88
88
  spec = TokenSpec(token_type=token_type, text=text, case_sensitive=case_sensitive, regex=regex)
89
- def token_run(state: ParserState[T], use_cache:bool) -> Either[Any, Tuple[Tuple[T,...] | T, ParserState[T]]]:
89
+ def token_run(state: ParserState[T], use_cache:bool) -> Either[Any, Tuple[T, ParserState[T]]]:
90
90
  if state.ended():
91
91
  return Left(state)
92
92
  token = state.current()
93
93
  if token is None or not spec.is_valid(token):
94
94
  return Left(state)
95
95
  return Right((Token(token_type = token.token_type, text=token.text), state.advance())) # type: ignore
96
- captured: Algebra[Tuple[T,...] | T, ParserState[T]] = cls(token_run, name=cls.__name__ + f'.token({token_type}, {text})')
96
+ captured: Algebra[T, ParserState[T]] = cls(token_run, name=cls.__name__ + f'.token({token_type}, {text})')
97
97
  def error_fn(err: Any) -> Error:
98
98
  if isinstance(err, ParserState):
99
99
  return Error(message=f"Cannot match token at {err}", this=captured, state=err)
@@ -111,52 +111,79 @@ class Parser(Algebra[Tuple[T,...] | T, ParserState[T]]):
111
111
  inclusive: bool = True,
112
112
  strict: bool = True) -> Algebra[Any, ParserState[T]]:
113
113
  def until_run(state: ParserState[T], use_cache:bool) -> Either[Any, Tuple[Any, ParserState[T]]]:
114
- counters = [0] * len(open_close)
114
+ # Use a stack to enforce proper nesting across multiple open/close pairs.
115
115
  tokens: List[Any] = []
116
116
  if not terminator and len(open_close) == 0:
117
- return Left(Error(this=until_run, message="No terminator and no open/close parsers, nothing to parse", state=state))
118
- def run_oc(s: ParserState[T],
119
- sign: int,
120
- *oc: Algebra[Any, ParserState[T]])->Tuple[bool, ParserState[T]]:
121
- matched = False
122
- for i, p in enumerate(oc):
123
- new = p.run(s, use_cache)
124
- if isinstance(new, Right):
125
- matched = True
126
- counters[i] += sign
127
- if inclusive:
128
- tokens.append(new.value[0])
129
- s = new.value[1]
130
- return matched, s
117
+ return Left(Error(this=until_run, message="No terminator and no open/close parsers, nothing to parse", state=state))
118
+
119
+ # Helper to try matching any of the parsers once, returning early on first match
120
+ def try_match(s: ParserState[T], *parsers: Algebra[Any, ParserState[T]]) -> Tuple[bool, Optional[int], Optional[Any], ParserState[T]]:
121
+ for i, p in enumerate(parsers):
122
+ res = p.run(s, use_cache)
123
+ if isinstance(res, Right):
124
+ val, ns = res.value
125
+ return True, i, val, ns
126
+ return False, None, None, s
127
+
131
128
  opens, closes = zip(*open_close) if len(open_close) > 0 else ((), ())
132
129
  tmp_state: ParserState[T] = state.copy()
133
- if strict:
134
- c = reduce(lambda a, b: a.or_else(b), opens).run(tmp_state)
130
+ stack: List[int] = [] # indices into open_close indicating expected closer
131
+
132
+ # If strict, require the very next token to be an opener of any kind
133
+ if strict and len(opens) > 0:
134
+ c = reduce(lambda a, b: a.or_else(b), opens).run(tmp_state, use_cache)
135
135
  if c.is_left():
136
- return Left(Error(
137
- this=until_run,
138
- message="No opening parser matched",
139
- state=tmp_state
140
- ))
136
+ return Left(Error(this=until_run, message="No opening parser matched", state=tmp_state))
137
+
141
138
  while not tmp_state.ended():
142
- mopen, tmp_state = run_oc(tmp_state, 1, *opens)
143
- mclose, tmp_state = run_oc(tmp_state, -1, *closes)
144
- matched = mopen or mclose
145
- if all(c == 0 for c in counters):
146
- if terminator :
147
- new = terminator.run(tmp_state, use_cache)
148
- if isinstance(new, Right):
149
- matched = True
139
+ # Try to open
140
+ o_matched, o_idx, o_tok, o_state = try_match(tmp_state, *opens)
141
+ if o_matched and o_idx is not None:
142
+ stack.append(o_idx)
143
+ if inclusive:
144
+ tokens.append(o_tok)
145
+ tmp_state = o_state
146
+ continue
147
+
148
+ # Try to close
149
+ c_matched, c_idx, c_tok, c_state = try_match(tmp_state, *closes)
150
+ if c_matched and c_idx is not None:
151
+ if not stack or stack[-1] != c_idx:
152
+ return Left(Error(this=until_run, message="Mismatched closing parser", state=tmp_state))
153
+ stack.pop()
154
+ if inclusive:
155
+ tokens.append(c_tok)
156
+ tmp_state = c_state
157
+ # After closing, if stack empty, we may terminate on a terminator
158
+ if len(stack) == 0:
159
+ if terminator:
160
+ term = terminator.run(tmp_state, use_cache)
161
+ if isinstance(term, Right):
162
+ if inclusive:
163
+ tokens.append(term.value[0])
164
+ return Right((tuple(tokens), term.value[1]))
165
+ else:
166
+ return Right((tuple(tokens), tmp_state))
167
+ continue
168
+
169
+ # If nothing structural matched, check termination when not nested
170
+ if len(stack) == 0:
171
+ if terminator:
172
+ term2 = terminator.run(tmp_state, use_cache)
173
+ if isinstance(term2, Right):
150
174
  if inclusive:
151
- tokens.append(new.value[0])
152
- return Right((tuple(tokens), new.value[1]))
175
+ tokens.append(term2.value[0])
176
+ return Right((tuple(tokens), term2.value[1]))
153
177
  else:
154
178
  return Right((tuple(tokens), tmp_state))
155
- elif any(c < 0 for c in counters):
156
- return Left(Error(this=until_run, message="Unmatched closing parser", state=tmp_state))
157
- if not matched:
158
- tokens.append(tmp_state.current())
159
- tmp_state = tmp_state.advance()
179
+
180
+ # Otherwise, consume one token as payload and continue
181
+ tokens.append(tmp_state.current())
182
+ tmp_state = tmp_state.advance()
183
+
184
+ # Reached end of input
185
+ if len(stack) != 0:
186
+ return Left(Error(this=until_run, message="Unterminated group", state=tmp_state))
160
187
  return Right((tuple(tokens), tmp_state))
161
188
  return cls(until_run, name=cls.__name__ + '.until')
162
189
 
@@ -280,7 +280,7 @@ HAVING = dsl.lift(TokenType.HAVING)
280
280
  HINT = dsl.lift(TokenType.HINT)
281
281
  IGNORE = dsl.lift(TokenType.IGNORE)
282
282
  ILIKE = dsl.lift(TokenType.ILIKE)
283
- ILIKE_ANY = dsl.lift(TokenType.ILIKE_ANY)
283
+
284
284
  IN = dsl.lift(TokenType.IN)
285
285
  INDEX = dsl.lift(TokenType.INDEX)
286
286
  INNER = dsl.lift(TokenType.INNER)
@@ -301,7 +301,7 @@ LANGUAGE = dsl.lift(TokenType.LANGUAGE)
301
301
  LATERAL = dsl.lift(TokenType.LATERAL)
302
302
  LEFT = dsl.lift(TokenType.LEFT)
303
303
  LIKE = dsl.lift(TokenType.LIKE)
304
- LIKE_ANY = dsl.lift(TokenType.LIKE_ANY)
304
+
305
305
  LIMIT = dsl.lift(TokenType.LIMIT)
306
306
  LIST = dsl.lift(TokenType.LIST)
307
307
  LOAD = dsl.lift(TokenType.LOAD)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: syncraft
3
- Version: 0.1.12
3
+ Version: 0.1.14
4
4
  Summary: Parser combinator library
5
5
  Author-email: Michael Afmokt <michael@esacca.com>
6
6
  License-Expression: MIT
@@ -16,4 +16,5 @@ syncraft.egg-info/SOURCES.txt
16
16
  syncraft.egg-info/dependency_links.txt
17
17
  syncraft.egg-info/requires.txt
18
18
  syncraft.egg-info/top_level.txt
19
- tests/test_parse.py
19
+ tests/test_parse.py
20
+ tests/test_until.py
@@ -1,7 +1,6 @@
1
1
  from syncraft.parser import AST, literal, variable, parse, Parser
2
2
  import syncraft.generator as gen
3
3
  from typing import Any
4
- from rich import print
5
4
 
6
5
  IF = literal("if")
7
6
  ELSE = literal("else")
@@ -32,5 +31,5 @@ def test_many_or()->None:
32
31
  syntax = (IF.many() | THEN.many()).many() // END
33
32
  sql = "if if then end"
34
33
  ast:AST[Any] = parse(syntax(Parser), sql, dialect='sqlite')
35
- generated = gen.generate(syntax(gen.Generator))
36
- # assert ast == generated, "Parsed and generated results do not match."
34
+ generated = gen.generate(syntax(gen.Generator), ast)
35
+ assert ast == generated, "Parsed and generated results do not match."
@@ -0,0 +1,40 @@
1
+ from typing import Any
2
+ from syncraft.parser import parse, until, literal, Parser
3
+ from syncraft.ast import AST
4
+
5
+ # Define common pair DSLs
6
+ LP, RP = literal("("), literal(")")
7
+ LB, RB = literal("["), literal("]")
8
+
9
+
10
+ def test_until_accepts_proper_nesting() -> None:
11
+ sql = "([])"
12
+ syntax = until((LP, RP), (LB, RB))
13
+ ast: AST[Any] | Any = parse(syntax(Parser), sql, dialect="sqlite")
14
+ assert isinstance(ast, AST), f"Expected AST for proper nesting, got {ast}"
15
+
16
+
17
+ def test_until_rejects_mismatched_pairs() -> None:
18
+ # Mismatched: ( ] should fail immediately
19
+ sql = "(]"
20
+ syntax = until((LP, RP), (LB, RB))
21
+ res = parse(syntax(Parser), sql, dialect="sqlite")
22
+ from syncraft.algebra import Error
23
+ assert isinstance(res, Error), "Mismatched pairs should be rejected with an Error"
24
+
25
+ def test_until_rejects_unterminated_group() -> None:
26
+ # Unterminated: ( ... EOF
27
+ sql = "("
28
+ syntax = until((LP, RP))
29
+ res = parse(syntax(Parser), sql, dialect="sqlite")
30
+ from syncraft.algebra import Error
31
+ assert isinstance(res, Error), "Unterminated group should be rejected with an Error"
32
+
33
+ def test_until_rejects_crossing_pairs() -> None:
34
+ # Crossing/interleaved: ([)] should be rejected
35
+ sql = "([)]"
36
+ syntax = until((LP, RP), (LB, RB))
37
+ # Use postgres dialect so [ and ] are tokenized distinctly (not as bracketed identifier)
38
+ res = parse(syntax(Parser), sql, dialect="postgres")
39
+ from syncraft.algebra import Error
40
+ assert isinstance(res, Error), "Crossing pairs should be rejected with an Error"
@@ -1,202 +0,0 @@
1
-
2
-
3
- from __future__ import annotations
4
- import re
5
- from typing import (
6
- Optional, Any, TypeVar, Tuple, runtime_checkable, Dict,
7
- Protocol, Generic, Callable, Union
8
- )
9
- from syncraft.algebra import (
10
- NamedResult, OrResult,ThenResult, ManyResult, ThenKind,
11
- Lens
12
- )
13
- from dataclasses import dataclass, field, replace, is_dataclass, asdict
14
- from enum import Enum
15
- from functools import cached_property
16
-
17
- @runtime_checkable
18
- class TokenProtocol(Protocol):
19
- @property
20
- def token_type(self) -> Enum: ...
21
- @property
22
- def text(self) -> str: ...
23
-
24
-
25
- @dataclass(frozen=True)
26
- class Token:
27
- token_type: Enum
28
- text: str
29
- def __str__(self) -> str:
30
- return f"{self.token_type.name}({self.text})"
31
-
32
- def __repr__(self) -> str:
33
- return self.__str__()
34
-
35
- @dataclass(frozen=True)
36
- class TokenSpec:
37
- token_type: Optional[Enum] = None
38
- text: Optional[str] = None
39
- case_sensitive: bool = False
40
- regex: Optional[re.Pattern[str]] = None
41
-
42
- def is_valid(self, token: TokenProtocol) -> bool:
43
- type_match = self.token_type is None or token.token_type == self.token_type
44
- value_match = self.text is None or (token.text.strip() == self.text.strip() if self.case_sensitive else
45
- token.text.strip().upper() == self.text.strip().upper())
46
- value_match = value_match or (self.regex is not None and self.regex.fullmatch(token.text) is not None)
47
- return type_match and value_match
48
-
49
-
50
-
51
-
52
- T = TypeVar('T', bound=TokenProtocol)
53
-
54
-
55
- ParseResult = Union[
56
- ThenResult['ParseResult[T]', 'ParseResult[T]'],
57
- NamedResult['ParseResult[T]', Any],
58
- ManyResult['ParseResult[T]'],
59
- OrResult['ParseResult[T]'],
60
- Tuple[T, ...],
61
- T,
62
- ]
63
-
64
-
65
-
66
-
67
-
68
-
69
-
70
- @dataclass(frozen=True)
71
- class NamedRecord:
72
- lens: Lens[Any, Any]
73
- value: Any
74
-
75
- @dataclass(frozen=True)
76
- class Walker:
77
- lens: Optional[Lens[Any, Any]] = None
78
- def get(self, root: ParseResult[Any]) -> Dict[str, NamedRecord]:
79
- match root:
80
- case ManyResult(value=children):
81
- new_named: Dict[str, NamedRecord] = {}
82
- for i, child in enumerate(children):
83
- new_walker = replace(self, lens=(self.lens / ManyResult.lens(i)) if self.lens else ManyResult.lens(i))
84
- new_named |= new_walker.get(child)
85
- return new_named
86
- case OrResult(value=value):
87
- new_walker = replace(self, lens=(self.lens / OrResult.lens()) if self.lens else OrResult.lens())
88
- return new_walker.get(value)
89
- case ThenResult(left=left,
90
- right=right,
91
- kind=kind):
92
- new_walker = replace(self, lens=(self.lens / ThenResult.lens(kind)) if self.lens else ThenResult.lens(kind))
93
- return new_walker.get(left) | new_walker.get(right)
94
- case NamedResult(name=name,
95
- value=value,
96
- forward_map=forward_map,
97
- backward_map=backward_map,
98
- aggregator=aggregator):
99
- this_lens = (self.lens / NamedResult.lens()) if self.lens else NamedResult.lens()
100
- if callable(forward_map) and callable(backward_map):
101
- this_lens = this_lens.bimap(forward_map, backward_map)
102
- elif callable(forward_map):
103
- this_lens = this_lens.bimap(forward_map, lambda _: value)
104
- elif callable(backward_map):
105
- raise ValueError("backward_map provided without forward_map")
106
- new_walker = replace(self, lens=this_lens)
107
- child_named = new_walker.get(value)
108
- if aggregator is not None:
109
- return child_named | {name: NamedRecord(lens=this_lens,
110
- value=aggregator(child_named))}
111
- else:
112
- return child_named
113
- return {}
114
-
115
- def set(self, root: ParseResult[Any], updated_values: Dict[str, Any]) -> ParseResult[Any]:
116
- named_records = self.get(root)
117
- def apply_update(name: str, value: Any, root: ParseResult[Any]) -> ParseResult[Any]:
118
- if name not in named_records:
119
- # Skip unknown names safely
120
- return root
121
- record = named_records[name]
122
- target_named: NamedResult[Any, Any] = record.lens.get(root)
123
- assert isinstance(target_named, NamedResult)
124
-
125
- if target_named.aggregator is not None:
126
- # Break apart dataclass/dict into child fields
127
- if isinstance(value, dict):
128
- child_updates = value
129
- elif is_dataclass(value) and not isinstance(value, type):
130
- child_updates = asdict(value)
131
- else:
132
- raise TypeError(f"Unsupported aggregator value for '{name}': {type(value)}")
133
-
134
- # Recursively apply each child update
135
- for child_name, child_value in child_updates.items():
136
- root = apply_update(child_name, child_value, root)
137
- return root
138
-
139
- else:
140
- # Leaf: just replace the value
141
- updated_named = replace(target_named, value=value)
142
- return record.lens.set(root, updated_named)
143
-
144
- for name, value in updated_values.items():
145
- root = apply_update(name, value, root)
146
-
147
- return root
148
-
149
- @dataclass(frozen=True)
150
- class AST(Generic[T]):
151
- focus: ParseResult[T]
152
- pruned: bool = False
153
- parent: Optional[AST[T]] = None
154
-
155
- def up(self)->Optional[AST[T]]:
156
- return self.parent
157
-
158
- def left(self) -> Optional[AST[T]]:
159
- match self.focus:
160
- case ThenResult(left=left, kind=kind):
161
- return replace(self, focus=left, parent=self, pruned = self.pruned or kind == ThenKind.RIGHT)
162
- case _:
163
- raise TypeError(f"Invalid focus type({self.focus}) for left traversal")
164
-
165
- def right(self) -> Optional[AST[T]]:
166
- match self.focus:
167
- case ThenResult(right=right, kind=kind):
168
- return replace(self, focus=right, parent=self, pruned = self.pruned or kind == ThenKind.LEFT)
169
- case _:
170
- raise TypeError(f"Invalid focus type({self.focus}) for right traversal")
171
-
172
-
173
- def down(self, index: int) -> Optional[AST[T]]:
174
- match self.focus:
175
- case ManyResult(value=children):
176
- if 0 <= index < len(children):
177
- return replace(self, focus=children[index], parent=self, pruned=self.pruned)
178
- else:
179
- raise IndexError(f"Index {index} out of bounds for ManyResult with {len(children)} children")
180
- case OrResult(value=value):
181
- if index == 0:
182
- return replace(self, focus=value, parent=self, pruned=self.pruned)
183
- else:
184
- raise IndexError(f"Index {index} out of bounds for OrResult")
185
- case _:
186
- raise TypeError(f"Invalid focus type({self.focus}) for down traversal")
187
-
188
- def how_many(self)->int:
189
- match self.focus:
190
- case ManyResult(value=children):
191
- return len(children)
192
- case _:
193
- raise TypeError(f"Invalid focus type({self.focus}) for how_many")
194
-
195
-
196
-
197
- @cached_property
198
- def root(self) -> AST[T]:
199
- while self.parent is not None:
200
- self = self.parent
201
- return self
202
-
File without changes
File without changes
File without changes
File without changes
File without changes