tengwar 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tengwar/__init__.py +20 -0
- tengwar/__main__.py +8 -0
- tengwar/ast_nodes.py +351 -0
- tengwar/binary_ast.py +654 -0
- tengwar/errors.py +43 -0
- tengwar/interpreter.py +1845 -0
- tengwar/lexer.py +483 -0
- tengwar/mcp_server.py +496 -0
- tengwar/parser.py +603 -0
- tengwar/repl.py +152 -0
- tengwar/vm.py +425 -0
- tengwar-0.3.1.dist-info/METADATA +202 -0
- tengwar-0.3.1.dist-info/RECORD +17 -0
- tengwar-0.3.1.dist-info/WHEEL +5 -0
- tengwar-0.3.1.dist-info/entry_points.txt +2 -0
- tengwar-0.3.1.dist-info/licenses/LICENSE +21 -0
- tengwar-0.3.1.dist-info/top_level.txt +1 -0
tengwar/parser.py
ADDED
|
@@ -0,0 +1,603 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TENGWAR Parser
|
|
3
|
+
|
|
4
|
+
Transforms token stream into AST. The syntax is strict prefix notation
|
|
5
|
+
with explicit structure — no ambiguity, no operator precedence needed.
|
|
6
|
+
|
|
7
|
+
Grammar (simplified):
|
|
8
|
+
program := expr*
|
|
9
|
+
expr := atom | list | bind_expr | tuple | vector
|
|
10
|
+
atom := INT | FLOAT | STRING | TRUE | FALSE | SYMBOL | HASH_ID | ADDR_REF | UNIT
|
|
11
|
+
list := '(' head expr* ')'
|
|
12
|
+
bind_expr := expr '→' target
|
|
13
|
+
tuple := '⟨' expr* '⟩'
|
|
14
|
+
vector := '⟦' expr* '⟧'
|
|
15
|
+
head := LAMBDA | COND | MATCH | SEQ | PARALLEL | MODULE | RECURSE
|
|
16
|
+
| DEFINE | TYPE | PROOF | EFFECT | MUTATE | IMPORT | operator | expr
|
|
17
|
+
operator := + | - | * | / | % | = | != | < | > | <= | >= | & | |
|
|
18
|
+
"""
|
|
19
|
+
from typing import List, Optional, Tuple as PyTuple
|
|
20
|
+
from .lexer import Token, TokenType
|
|
21
|
+
from .ast_nodes import *
|
|
22
|
+
from .errors import ParseError
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Parser:
|
|
26
|
+
def __init__(self, tokens: List[Token]):
|
|
27
|
+
self.tokens = tokens
|
|
28
|
+
self.pos = 0
|
|
29
|
+
|
|
30
|
+
def peek(self) -> Token:
|
|
31
|
+
return self.tokens[self.pos]
|
|
32
|
+
|
|
33
|
+
def peek_type(self) -> TokenType:
|
|
34
|
+
return self.tokens[self.pos].type
|
|
35
|
+
|
|
36
|
+
def advance(self) -> Token:
|
|
37
|
+
tok = self.tokens[self.pos]
|
|
38
|
+
self.pos += 1
|
|
39
|
+
return tok
|
|
40
|
+
|
|
41
|
+
def expect(self, type: TokenType) -> Token:
|
|
42
|
+
tok = self.advance()
|
|
43
|
+
if tok.type != type:
|
|
44
|
+
raise ParseError(
|
|
45
|
+
f"Expected {type.name}, got {tok.type.name} ({tok.value!r})",
|
|
46
|
+
tok.line, tok.col
|
|
47
|
+
)
|
|
48
|
+
return tok
|
|
49
|
+
|
|
50
|
+
def at_end(self) -> bool:
|
|
51
|
+
return self.peek_type() == TokenType.EOF
|
|
52
|
+
|
|
53
|
+
def parse(self) -> Program:
|
|
54
|
+
"""Parse full program"""
|
|
55
|
+
body = []
|
|
56
|
+
while not self.at_end():
|
|
57
|
+
if self.peek_type() == TokenType.COMMENT:
|
|
58
|
+
self.advance() # skip comments at top level
|
|
59
|
+
continue
|
|
60
|
+
expr = self.parse_expr()
|
|
61
|
+
if expr is not None:
|
|
62
|
+
# Check for bind: expr → target
|
|
63
|
+
if not self.at_end() and self.peek_type() == TokenType.ARROW:
|
|
64
|
+
expr = self.parse_bind(expr)
|
|
65
|
+
body.append(expr)
|
|
66
|
+
return Program(body=body, type=None)
|
|
67
|
+
|
|
68
|
+
def parse_expr(self) -> ASTNode:
|
|
69
|
+
"""Parse a single expression"""
|
|
70
|
+
tok = self.peek()
|
|
71
|
+
|
|
72
|
+
# Skip comments
|
|
73
|
+
while tok.type == TokenType.COMMENT:
|
|
74
|
+
self.advance()
|
|
75
|
+
tok = self.peek()
|
|
76
|
+
|
|
77
|
+
if tok.type == TokenType.LPAREN:
|
|
78
|
+
return self.parse_list()
|
|
79
|
+
elif tok.type == TokenType.LANGLE:
|
|
80
|
+
return self.parse_tuple()
|
|
81
|
+
elif tok.type == TokenType.LBRACKET:
|
|
82
|
+
return self.parse_vector()
|
|
83
|
+
elif tok.type == TokenType.LBRACE:
|
|
84
|
+
return self.parse_short_lambda()
|
|
85
|
+
elif tok.type == TokenType.UNIT:
|
|
86
|
+
self.advance()
|
|
87
|
+
return UnitLit(line=tok.line, col=tok.col)
|
|
88
|
+
elif tok.type == TokenType.LAMBDA:
|
|
89
|
+
return self.parse_bare_lambda()
|
|
90
|
+
elif tok.type in self.OPERATOR_TOKENS:
|
|
91
|
+
# Bare operator as value: + → Symbol("+")
|
|
92
|
+
self.advance()
|
|
93
|
+
return Symbol(name=tok.value, line=tok.line, col=tok.col)
|
|
94
|
+
else:
|
|
95
|
+
return self.parse_atom()
|
|
96
|
+
|
|
97
|
+
# Operator tokens that can appear as values
|
|
98
|
+
OPERATOR_TOKENS = {
|
|
99
|
+
TokenType.PLUS, TokenType.MINUS, TokenType.STAR,
|
|
100
|
+
TokenType.SLASH, TokenType.PERCENT, TokenType.EQ,
|
|
101
|
+
TokenType.NEQ, TokenType.LT, TokenType.GT,
|
|
102
|
+
TokenType.LTE, TokenType.GTE, TokenType.AND,
|
|
103
|
+
TokenType.OR, TokenType.NOT,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
def parse_atom(self) -> ASTNode:
|
|
107
|
+
"""Parse an atomic expression"""
|
|
108
|
+
tok = self.advance()
|
|
109
|
+
|
|
110
|
+
if tok.type == TokenType.INT:
|
|
111
|
+
return IntLit(value=int(tok.value), line=tok.line, col=tok.col)
|
|
112
|
+
elif tok.type == TokenType.FLOAT:
|
|
113
|
+
return FloatLit(value=float(tok.value), line=tok.line, col=tok.col)
|
|
114
|
+
elif tok.type == TokenType.STRING:
|
|
115
|
+
return StrLit(value=tok.value, line=tok.line, col=tok.col)
|
|
116
|
+
elif tok.type == TokenType.TEMPLATE:
|
|
117
|
+
# Template string: desugar $"Hi {name}!" to (fmt "Hi {}!" name)
|
|
118
|
+
parts = tok.value.split('\x00')
|
|
119
|
+
fmt_str = parts[0]
|
|
120
|
+
expr_sources = parts[1:]
|
|
121
|
+
# Parse each expression
|
|
122
|
+
from .lexer import tokenize as lex_tokenize
|
|
123
|
+
parsed_exprs = []
|
|
124
|
+
for src in expr_sources:
|
|
125
|
+
src = src.strip()
|
|
126
|
+
if src:
|
|
127
|
+
expr_tokens = lex_tokenize(src)
|
|
128
|
+
sub_parser = Parser(expr_tokens)
|
|
129
|
+
parsed_exprs.append(sub_parser.parse_expr())
|
|
130
|
+
args = [StrLit(value=fmt_str, line=tok.line, col=tok.col)] + parsed_exprs
|
|
131
|
+
return Apply(
|
|
132
|
+
func=Symbol(name='fmt', line=tok.line, col=tok.col),
|
|
133
|
+
args=args,
|
|
134
|
+
line=tok.line, col=tok.col
|
|
135
|
+
)
|
|
136
|
+
elif tok.type == TokenType.TRUE:
|
|
137
|
+
return BoolLit(value=True, line=tok.line, col=tok.col)
|
|
138
|
+
elif tok.type == TokenType.FALSE:
|
|
139
|
+
return BoolLit(value=False, line=tok.line, col=tok.col)
|
|
140
|
+
elif tok.type == TokenType.SYMBOL:
|
|
141
|
+
return Symbol(name=tok.value, line=tok.line, col=tok.col)
|
|
142
|
+
elif tok.type == TokenType.HASH_ID:
|
|
143
|
+
return HashId(hash=tok.value, line=tok.line, col=tok.col)
|
|
144
|
+
elif tok.type == TokenType.ADDR_REF:
|
|
145
|
+
return AddrRef(addr=tok.value, line=tok.line, col=tok.col)
|
|
146
|
+
elif tok.type == TokenType.UNDERSCORE:
|
|
147
|
+
return Symbol(name='_', line=tok.line, col=tok.col)
|
|
148
|
+
elif tok.type == TokenType.UNIT:
|
|
149
|
+
return UnitLit(line=tok.line, col=tok.col)
|
|
150
|
+
else:
|
|
151
|
+
raise ParseError(
|
|
152
|
+
f"Unexpected token: {tok.type.name} ({tok.value!r})",
|
|
153
|
+
tok.line, tok.col
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def parse_list(self) -> ASTNode:
|
|
157
|
+
"""Parse a parenthesized expression: (head args...)"""
|
|
158
|
+
lparen = self.expect(TokenType.LPAREN)
|
|
159
|
+
line, col = lparen.line, lparen.col
|
|
160
|
+
|
|
161
|
+
if self.peek_type() == TokenType.RPAREN:
|
|
162
|
+
# () is unit
|
|
163
|
+
self.advance()
|
|
164
|
+
return UnitLit(line=line, col=col)
|
|
165
|
+
|
|
166
|
+
head = self.peek()
|
|
167
|
+
|
|
168
|
+
# Special forms
|
|
169
|
+
if head.type == TokenType.LAMBDA:
|
|
170
|
+
return self.parse_lambda(line, col)
|
|
171
|
+
elif head.type == TokenType.COND:
|
|
172
|
+
return self.parse_cond(line, col)
|
|
173
|
+
elif head.type == TokenType.MATCH:
|
|
174
|
+
return self.parse_match(line, col)
|
|
175
|
+
elif head.type == TokenType.SEQ:
|
|
176
|
+
return self.parse_seq(line, col)
|
|
177
|
+
elif head.type == TokenType.PARALLEL:
|
|
178
|
+
return self.parse_parallel(line, col)
|
|
179
|
+
elif head.type == TokenType.MODULE:
|
|
180
|
+
return self.parse_module(line, col)
|
|
181
|
+
elif head.type == TokenType.DEFINE:
|
|
182
|
+
return self.parse_define(line, col)
|
|
183
|
+
elif head.type == TokenType.RECURSE:
|
|
184
|
+
return self.parse_recurse(line, col)
|
|
185
|
+
elif head.type == TokenType.PROOF:
|
|
186
|
+
return self.parse_proof(line, col)
|
|
187
|
+
elif head.type == TokenType.MUTATE:
|
|
188
|
+
return self.parse_mutate(line, col)
|
|
189
|
+
elif head.type == TokenType.IMPORT:
|
|
190
|
+
return self.parse_import(line, col)
|
|
191
|
+
elif head.type == TokenType.SYMBOL and head.value == 'let':
|
|
192
|
+
return self.parse_let(line, col)
|
|
193
|
+
elif head.type == TokenType.SYMBOL and head.value == 'pipe':
|
|
194
|
+
return self.parse_pipe(line, col)
|
|
195
|
+
elif head.type == TokenType.SYMBOL and head.value == 'throw':
|
|
196
|
+
return self.parse_throw(line, col)
|
|
197
|
+
elif head.type == TokenType.SYMBOL and head.value == 'catch':
|
|
198
|
+
return self.parse_catch(line, col)
|
|
199
|
+
elif head.type == TokenType.SYMBOL and head.value == 'cond':
|
|
200
|
+
return self.parse_cond_multi(line, col)
|
|
201
|
+
elif head.type == TokenType.SYMBOL and head.value == 'defn':
|
|
202
|
+
return self.parse_defn(line, col)
|
|
203
|
+
elif head.type == TokenType.SYMBOL and head.value == 'for':
|
|
204
|
+
return self.parse_for(line, col)
|
|
205
|
+
elif head.type == TokenType.SYMBOL and head.value == 'py-import':
|
|
206
|
+
return self.parse_py_import(line, col)
|
|
207
|
+
elif head.type in (TokenType.PLUS, TokenType.MINUS, TokenType.STAR,
|
|
208
|
+
TokenType.SLASH, TokenType.PERCENT, TokenType.EQ,
|
|
209
|
+
TokenType.NEQ, TokenType.LT, TokenType.GT,
|
|
210
|
+
TokenType.LTE, TokenType.GTE, TokenType.AND,
|
|
211
|
+
TokenType.OR):
|
|
212
|
+
return self.parse_binop(line, col)
|
|
213
|
+
elif head.type == TokenType.NOT:
|
|
214
|
+
return self.parse_unop(line, col)
|
|
215
|
+
else:
|
|
216
|
+
return self.parse_apply(line, col)
|
|
217
|
+
|
|
218
|
+
def parse_lambda(self, line: int, col: int) -> Lambda:
|
|
219
|
+
"""(λ param1 param2 ... body) or (λ param1 param2 ... τ(...) body)"""
|
|
220
|
+
self.advance() # consume λ
|
|
221
|
+
|
|
222
|
+
params = []
|
|
223
|
+
type_ann = None
|
|
224
|
+
|
|
225
|
+
# Collect parameters until we find the body
|
|
226
|
+
# Params are hash IDs or symbols, body is everything else
|
|
227
|
+
while self.peek_type() != TokenType.RPAREN:
|
|
228
|
+
if self.peek_type() in (TokenType.HASH_ID, TokenType.SYMBOL):
|
|
229
|
+
# Check if this is the last thing before ) — if so, it's the body
|
|
230
|
+
saved_pos = self.pos
|
|
231
|
+
candidate = self.parse_expr()
|
|
232
|
+
|
|
233
|
+
if self.peek_type() == TokenType.RPAREN:
|
|
234
|
+
# This was the body
|
|
235
|
+
self.expect(TokenType.RPAREN)
|
|
236
|
+
return Lambda(params=params, body=candidate, type_ann=type_ann,
|
|
237
|
+
line=line, col=col)
|
|
238
|
+
else:
|
|
239
|
+
# This was a parameter, rewind isn't needed since it was simple
|
|
240
|
+
params.append(candidate)
|
|
241
|
+
elif self.peek_type() == TokenType.TYPE:
|
|
242
|
+
# τ type annotation
|
|
243
|
+
self.advance()
|
|
244
|
+
type_ann = self.parse_expr()
|
|
245
|
+
else:
|
|
246
|
+
# This must be the body
|
|
247
|
+
body = self.parse_expr()
|
|
248
|
+
# Check for bind after body
|
|
249
|
+
if self.peek_type() == TokenType.ARROW:
|
|
250
|
+
body = self.parse_bind(body)
|
|
251
|
+
self.expect(TokenType.RPAREN)
|
|
252
|
+
return Lambda(params=params, body=body, type_ann=type_ann,
|
|
253
|
+
line=line, col=col)
|
|
254
|
+
|
|
255
|
+
# If we get here, lambda with no body
|
|
256
|
+
self.expect(TokenType.RPAREN)
|
|
257
|
+
return Lambda(params=params, body=UnitLit(line=line, col=col),
|
|
258
|
+
type_ann=type_ann, line=line, col=col)
|
|
259
|
+
|
|
260
|
+
def parse_bare_lambda(self) -> Lambda:
|
|
261
|
+
"""λ without parens — must be inside a list already"""
|
|
262
|
+
tok = self.advance() # consume λ
|
|
263
|
+
# Read params then body
|
|
264
|
+
params = []
|
|
265
|
+
while self.peek_type() in (TokenType.HASH_ID, TokenType.SYMBOL):
|
|
266
|
+
params.append(self.parse_atom())
|
|
267
|
+
body = self.parse_expr()
|
|
268
|
+
return Lambda(params=params, body=body, line=tok.line, col=tok.col)
|
|
269
|
+
|
|
270
|
+
def parse_short_lambda(self) -> Lambda:
|
|
271
|
+
"""{expr} — short lambda with _ as implicit parameter
|
|
272
|
+
{< _ 5} becomes (λ _ (< _ 5))
|
|
273
|
+
{+ _ 1} becomes (λ _ (+ _ 1))
|
|
274
|
+
{f _} becomes (λ _ (f _))
|
|
275
|
+
"""
|
|
276
|
+
tok = self.expect(TokenType.LBRACE)
|
|
277
|
+
line, col = tok.line, tok.col
|
|
278
|
+
|
|
279
|
+
# Parse contents like a list expression: head args...
|
|
280
|
+
head = self.peek()
|
|
281
|
+
|
|
282
|
+
if head.type in (TokenType.PLUS, TokenType.MINUS, TokenType.STAR,
|
|
283
|
+
TokenType.SLASH, TokenType.PERCENT, TokenType.EQ,
|
|
284
|
+
TokenType.NEQ, TokenType.LT, TokenType.GT,
|
|
285
|
+
TokenType.LTE, TokenType.GTE, TokenType.AND,
|
|
286
|
+
TokenType.OR):
|
|
287
|
+
# Operator expression: {+ _ 1} → (+ _ 1)
|
|
288
|
+
op = self.advance()
|
|
289
|
+
args = []
|
|
290
|
+
while self.peek_type() != TokenType.RBRACE:
|
|
291
|
+
args.append(self.parse_expr())
|
|
292
|
+
if len(args) == 2:
|
|
293
|
+
body = BinOp(op=op.value, left=args[0], right=args[1],
|
|
294
|
+
line=op.line, col=op.col)
|
|
295
|
+
elif len(args) == 1:
|
|
296
|
+
body = UnOp(op=op.value, operand=args[0],
|
|
297
|
+
line=op.line, col=op.col)
|
|
298
|
+
else:
|
|
299
|
+
raise ParseError(f"Short lambda operator expects 1-2 args, got {len(args)}",
|
|
300
|
+
line, col)
|
|
301
|
+
elif head.type == TokenType.NOT:
|
|
302
|
+
op = self.advance()
|
|
303
|
+
operand = self.parse_expr()
|
|
304
|
+
body = UnOp(op=op.value, operand=operand, line=op.line, col=op.col)
|
|
305
|
+
else:
|
|
306
|
+
# Could be a function call {f _ args} or a single expression {_.1}
|
|
307
|
+
func = self.parse_expr()
|
|
308
|
+
if self.peek_type() == TokenType.RBRACE:
|
|
309
|
+
# Single expression: {_.1} → (λ _ _.1)
|
|
310
|
+
body = func
|
|
311
|
+
else:
|
|
312
|
+
# Function call: {f _ args} → (f _ args)
|
|
313
|
+
args = []
|
|
314
|
+
while self.peek_type() != TokenType.RBRACE:
|
|
315
|
+
args.append(self.parse_expr())
|
|
316
|
+
body = Apply(func=func, args=args, line=line, col=col)
|
|
317
|
+
|
|
318
|
+
self.expect(TokenType.RBRACE)
|
|
319
|
+
|
|
320
|
+
# Collect all _ references to determine params
|
|
321
|
+
# For now, always use single _ param
|
|
322
|
+
param = Symbol(name='_', line=line, col=col)
|
|
323
|
+
return Lambda(params=[param], body=body, line=line, col=col)
|
|
324
|
+
|
|
325
|
+
def parse_cond(self, line: int, col: int) -> Cond:
|
|
326
|
+
"""(? condition then else)"""
|
|
327
|
+
self.advance() # consume ?
|
|
328
|
+
condition = self.parse_expr()
|
|
329
|
+
then_branch = self.parse_expr()
|
|
330
|
+
else_branch = self.parse_expr() if self.peek_type() != TokenType.RPAREN else UnitLit(line=line, col=col)
|
|
331
|
+
self.expect(TokenType.RPAREN)
|
|
332
|
+
return Cond(condition=condition, then_branch=then_branch,
|
|
333
|
+
else_branch=else_branch, line=line, col=col)
|
|
334
|
+
|
|
335
|
+
def parse_match(self, line: int, col: int) -> Match:
|
|
336
|
+
"""(~ expr (pattern result) (pattern result) ...)"""
|
|
337
|
+
self.advance() # consume ~
|
|
338
|
+
expr = self.parse_expr()
|
|
339
|
+
cases = []
|
|
340
|
+
while self.peek_type() != TokenType.RPAREN:
|
|
341
|
+
pattern = self.parse_expr()
|
|
342
|
+
body = self.parse_expr()
|
|
343
|
+
cases.append((pattern, body))
|
|
344
|
+
self.expect(TokenType.RPAREN)
|
|
345
|
+
return Match(expr=expr, cases=cases, line=line, col=col)
|
|
346
|
+
|
|
347
|
+
def parse_cond_multi(self, line: int, col: int) -> Cond:
|
|
348
|
+
"""Multi-branch conditional. Desugars to nested Cond nodes.
|
|
349
|
+
Flat syntax: (cond test1 val1 test2 val2 ... _ default)
|
|
350
|
+
Last test can be 'else' or '_' as catch-all."""
|
|
351
|
+
self.advance() # consume 'cond'
|
|
352
|
+
branches = []
|
|
353
|
+
while self.peek_type() != TokenType.RPAREN:
|
|
354
|
+
test = self.parse_expr()
|
|
355
|
+
if self.peek_type() == TokenType.RPAREN:
|
|
356
|
+
# Odd — last is default value with implicit _ test
|
|
357
|
+
branches.append((Symbol(name='_', line=line, col=col), test))
|
|
358
|
+
break
|
|
359
|
+
val = self.parse_expr()
|
|
360
|
+
branches.append((test, val))
|
|
361
|
+
self.expect(TokenType.RPAREN)
|
|
362
|
+
if not branches:
|
|
363
|
+
return UnitLit(line=line, col=col)
|
|
364
|
+
# Build nested Cond from right to left
|
|
365
|
+
result = UnitLit(line=line, col=col)
|
|
366
|
+
for test, val in reversed(branches):
|
|
367
|
+
if isinstance(test, Symbol) and test.name in ('else', '_'):
|
|
368
|
+
result = val
|
|
369
|
+
else:
|
|
370
|
+
result = Cond(condition=test, then_branch=val,
|
|
371
|
+
else_branch=result, line=line, col=col)
|
|
372
|
+
return result
|
|
373
|
+
|
|
374
|
+
def parse_seq(self, line: int, col: int) -> Seq:
|
|
375
|
+
"""(>> expr1 expr2 ...)"""
|
|
376
|
+
self.advance() # consume >>
|
|
377
|
+
exprs = []
|
|
378
|
+
while self.peek_type() != TokenType.RPAREN:
|
|
379
|
+
expr = self.parse_expr()
|
|
380
|
+
# Check for bind within seq
|
|
381
|
+
if self.peek_type() == TokenType.ARROW:
|
|
382
|
+
expr = self.parse_bind(expr)
|
|
383
|
+
exprs.append(expr)
|
|
384
|
+
self.expect(TokenType.RPAREN)
|
|
385
|
+
return Seq(exprs=exprs, line=line, col=col)
|
|
386
|
+
|
|
387
|
+
def parse_parallel(self, line: int, col: int) -> Parallel:
|
|
388
|
+
"""(∥ expr1 expr2 ...)"""
|
|
389
|
+
self.advance() # consume ∥
|
|
390
|
+
exprs = []
|
|
391
|
+
while self.peek_type() != TokenType.RPAREN:
|
|
392
|
+
exprs.append(self.parse_expr())
|
|
393
|
+
self.expect(TokenType.RPAREN)
|
|
394
|
+
return Parallel(exprs=exprs, line=line, col=col)
|
|
395
|
+
|
|
396
|
+
def parse_module(self, line: int, col: int) -> Module:
|
|
397
|
+
"""(□ name? ...definitions)"""
|
|
398
|
+
self.advance() # consume □
|
|
399
|
+
name = ""
|
|
400
|
+
if self.peek_type() == TokenType.SYMBOL:
|
|
401
|
+
name = self.advance().value
|
|
402
|
+
|
|
403
|
+
body = []
|
|
404
|
+
while self.peek_type() != TokenType.RPAREN:
|
|
405
|
+
expr = self.parse_expr()
|
|
406
|
+
if self.peek_type() == TokenType.ARROW:
|
|
407
|
+
expr = self.parse_bind(expr)
|
|
408
|
+
body.append(expr)
|
|
409
|
+
self.expect(TokenType.RPAREN)
|
|
410
|
+
return Module(name=name, body=body, line=line, col=col)
|
|
411
|
+
|
|
412
|
+
def parse_define(self, line: int, col: int) -> Define:
|
|
413
|
+
""":= name value"""
|
|
414
|
+
self.advance() # consume :=
|
|
415
|
+
name = self.parse_expr()
|
|
416
|
+
value = self.parse_expr()
|
|
417
|
+
self.expect(TokenType.RPAREN)
|
|
418
|
+
return Define(name=name, value=value, line=line, col=col)
|
|
419
|
+
|
|
420
|
+
def parse_recurse(self, line: int, col: int) -> Recurse:
|
|
421
|
+
"""(↺ name body)"""
|
|
422
|
+
self.advance() # consume ↺
|
|
423
|
+
name = self.parse_expr()
|
|
424
|
+
body = self.parse_expr()
|
|
425
|
+
self.expect(TokenType.RPAREN)
|
|
426
|
+
return Recurse(name=name, body=body, line=line, col=col)
|
|
427
|
+
|
|
428
|
+
def parse_proof(self, line: int, col: int) -> Proof:
|
|
429
|
+
"""(⊢ assertion)"""
|
|
430
|
+
self.advance() # consume ⊢
|
|
431
|
+
assertion = self.parse_expr()
|
|
432
|
+
self.expect(TokenType.RPAREN)
|
|
433
|
+
return Proof(assertion=assertion, line=line, col=col)
|
|
434
|
+
|
|
435
|
+
def parse_mutate(self, line: int, col: int) -> Mutate:
|
|
436
|
+
"""(μ name value)"""
|
|
437
|
+
self.advance() # consume μ
|
|
438
|
+
name = self.parse_expr()
|
|
439
|
+
value = self.parse_expr()
|
|
440
|
+
self.expect(TokenType.RPAREN)
|
|
441
|
+
return Mutate(name=name, value=value, line=line, col=col)
|
|
442
|
+
|
|
443
|
+
def parse_import(self, line: int, col: int) -> Import:
|
|
444
|
+
"""(⇐ @addr)"""
|
|
445
|
+
self.advance() # consume ⇐
|
|
446
|
+
addr = self.parse_expr()
|
|
447
|
+
self.expect(TokenType.RPAREN)
|
|
448
|
+
return Import(addr=addr, line=line, col=col)
|
|
449
|
+
|
|
450
|
+
def parse_let(self, line: int, col: int) -> Let:
|
|
451
|
+
"""(let x 1 y 2 ... body)"""
|
|
452
|
+
self.advance() # consume 'let'
|
|
453
|
+
bindings = []
|
|
454
|
+
# Parse pairs until we hit the last expression (the body)
|
|
455
|
+
exprs = []
|
|
456
|
+
while self.peek_type() != TokenType.RPAREN:
|
|
457
|
+
exprs.append(self.parse_expr())
|
|
458
|
+
self.expect(TokenType.RPAREN)
|
|
459
|
+
# Last expr is body, everything before is name/value pairs
|
|
460
|
+
if len(exprs) < 1:
|
|
461
|
+
raise ParseError("let requires at least a body", line, col)
|
|
462
|
+
body = exprs[-1]
|
|
463
|
+
pairs = exprs[:-1]
|
|
464
|
+
if len(pairs) % 2 != 0:
|
|
465
|
+
raise ParseError("let requires even number of binding terms", line, col)
|
|
466
|
+
for i in range(0, len(pairs), 2):
|
|
467
|
+
bindings.append((pairs[i], pairs[i+1]))
|
|
468
|
+
return Let(bindings=bindings, body=body, line=line, col=col)
|
|
469
|
+
|
|
470
|
+
def parse_pipe(self, line: int, col: int) -> Pipe:
|
|
471
|
+
"""(pipe value f1 f2 f3)"""
|
|
472
|
+
self.advance() # consume 'pipe'
|
|
473
|
+
value = self.parse_expr()
|
|
474
|
+
funcs = []
|
|
475
|
+
while self.peek_type() != TokenType.RPAREN:
|
|
476
|
+
funcs.append(self.parse_expr())
|
|
477
|
+
self.expect(TokenType.RPAREN)
|
|
478
|
+
return Pipe(value=value, funcs=funcs, line=line, col=col)
|
|
479
|
+
|
|
480
|
+
def parse_throw(self, line: int, col: int) -> Throw:
|
|
481
|
+
"""(throw expr)"""
|
|
482
|
+
self.advance() # consume 'throw'
|
|
483
|
+
expr = self.parse_expr()
|
|
484
|
+
self.expect(TokenType.RPAREN)
|
|
485
|
+
return Throw(expr=expr, line=line, col=col)
|
|
486
|
+
|
|
487
|
+
def parse_catch(self, line: int, col: int) -> Catch:
|
|
488
|
+
"""(catch expr handler)"""
|
|
489
|
+
self.advance() # consume 'catch'
|
|
490
|
+
expr = self.parse_expr()
|
|
491
|
+
handler = self.parse_expr()
|
|
492
|
+
self.expect(TokenType.RPAREN)
|
|
493
|
+
return Catch(expr=expr, handler=handler, line=line, col=col)
|
|
494
|
+
|
|
495
|
+
def parse_py_import(self, line: int, col: int) -> PyImportNode:
|
|
496
|
+
"""(py-import \"module\") or (py-import \"module\" alias)"""
|
|
497
|
+
self.advance() # consume 'py-import'
|
|
498
|
+
module_name = self.parse_expr()
|
|
499
|
+
alias = ""
|
|
500
|
+
if self.peek_type() != TokenType.RPAREN:
|
|
501
|
+
alias_node = self.parse_expr()
|
|
502
|
+
if isinstance(alias_node, Symbol):
|
|
503
|
+
alias = alias_node.name
|
|
504
|
+
self.expect(TokenType.RPAREN)
|
|
505
|
+
return PyImportNode(module_name=module_name, alias=alias, line=line, col=col)
|
|
506
|
+
|
|
507
|
+
def parse_defn(self, line: int, col: int) -> Seq:
|
|
508
|
+
"""(defn name params... body) → (>> (def name (fn params... body)))
|
|
509
|
+
Short form for defining named functions with auto-recursion."""
|
|
510
|
+
self.advance() # consume 'defn'
|
|
511
|
+
name = self.parse_expr() # function name
|
|
512
|
+
# Collect params until we hit the body (last expr before RPAREN)
|
|
513
|
+
exprs = []
|
|
514
|
+
while self.peek_type() != TokenType.RPAREN:
|
|
515
|
+
exprs.append(self.parse_expr())
|
|
516
|
+
self.expect(TokenType.RPAREN)
|
|
517
|
+
# Last expr is body, rest are params
|
|
518
|
+
if not exprs:
|
|
519
|
+
raise ParseError("defn requires at least a body")
|
|
520
|
+
body = exprs[-1]
|
|
521
|
+
params = exprs[:-1]
|
|
522
|
+
# Build: (def name (fn params body))
|
|
523
|
+
lam = Lambda(params=params, body=body, line=line, col=col)
|
|
524
|
+
return Define(name=name, value=lam, line=line, col=col)
|
|
525
|
+
|
|
526
|
+
def parse_for(self, line: int, col: int):
|
|
527
|
+
"""(for x coll body) → (map (fn x body) coll)
|
|
528
|
+
(for x coll when pred body) → (map (fn x body) (filter (fn x pred) coll))
|
|
529
|
+
Comprehension-like syntax."""
|
|
530
|
+
self.advance() # consume 'for'
|
|
531
|
+
var = self.parse_expr() # binding variable
|
|
532
|
+
coll = self.parse_expr() # collection
|
|
533
|
+
# Check for 'when' filter
|
|
534
|
+
when_pred = None
|
|
535
|
+
if (self.peek_type() == TokenType.SYMBOL and
|
|
536
|
+
self.tokens[self.pos].value == 'when'):
|
|
537
|
+
self.advance() # consume 'when'
|
|
538
|
+
when_pred = self.parse_expr()
|
|
539
|
+
body = self.parse_expr()
|
|
540
|
+
self.expect(TokenType.RPAREN)
|
|
541
|
+
# Build: (map (fn var body) coll) or (map (fn var body) (filter (fn var pred) coll))
|
|
542
|
+
map_fn = Lambda(params=[var], body=body, line=line, col=col)
|
|
543
|
+
if when_pred:
|
|
544
|
+
filter_fn = Lambda(params=[var], body=when_pred, line=line, col=col)
|
|
545
|
+
filtered = Apply(func=Symbol(name='filter', line=line, col=col),
|
|
546
|
+
args=[filter_fn, coll], line=line, col=col)
|
|
547
|
+
return Apply(func=Symbol(name='map', line=line, col=col),
|
|
548
|
+
args=[map_fn, filtered], line=line, col=col)
|
|
549
|
+
return Apply(func=Symbol(name='map', line=line, col=col),
|
|
550
|
+
args=[map_fn, coll], line=line, col=col)
|
|
551
|
+
|
|
552
|
+
def parse_binop(self, line: int, col: int) -> BinOp:
|
|
553
|
+
"""(op left right)"""
|
|
554
|
+
op = self.advance()
|
|
555
|
+
left = self.parse_expr()
|
|
556
|
+
right = self.parse_expr()
|
|
557
|
+
self.expect(TokenType.RPAREN)
|
|
558
|
+
return BinOp(op=op.value, left=left, right=right, line=line, col=col)
|
|
559
|
+
|
|
560
|
+
def parse_unop(self, line: int, col: int) -> UnOp:
|
|
561
|
+
"""(! operand)"""
|
|
562
|
+
op = self.advance()
|
|
563
|
+
operand = self.parse_expr()
|
|
564
|
+
self.expect(TokenType.RPAREN)
|
|
565
|
+
return UnOp(op=op.value, operand=operand, line=line, col=col)
|
|
566
|
+
|
|
567
|
+
def parse_apply(self, line: int, col: int) -> Apply:
|
|
568
|
+
"""(func arg1 arg2 ...)"""
|
|
569
|
+
func = self.parse_expr()
|
|
570
|
+
args = []
|
|
571
|
+
while self.peek_type() != TokenType.RPAREN:
|
|
572
|
+
args.append(self.parse_expr())
|
|
573
|
+
self.expect(TokenType.RPAREN)
|
|
574
|
+
return Apply(func=func, args=args, line=line, col=col)
|
|
575
|
+
|
|
576
|
+
def parse_bind(self, expr: ASTNode) -> Bind:
|
|
577
|
+
"""expr → target"""
|
|
578
|
+
arrow = self.expect(TokenType.ARROW)
|
|
579
|
+
target = self.parse_expr()
|
|
580
|
+
return Bind(expr=expr, target=target, line=arrow.line, col=arrow.col)
|
|
581
|
+
|
|
582
|
+
def parse_tuple(self) -> Tuple:
|
|
583
|
+
"""⟨expr1 expr2 ...⟩"""
|
|
584
|
+
tok = self.expect(TokenType.LANGLE)
|
|
585
|
+
elements = []
|
|
586
|
+
while self.peek_type() != TokenType.RANGLE:
|
|
587
|
+
elements.append(self.parse_expr())
|
|
588
|
+
self.expect(TokenType.RANGLE)
|
|
589
|
+
return Tuple(elements=elements, line=tok.line, col=tok.col)
|
|
590
|
+
|
|
591
|
+
def parse_vector(self) -> Vector:
|
|
592
|
+
"""⟦expr1 expr2 ...⟧"""
|
|
593
|
+
tok = self.expect(TokenType.LBRACKET)
|
|
594
|
+
elements = []
|
|
595
|
+
while self.peek_type() != TokenType.RBRACKET:
|
|
596
|
+
elements.append(self.parse_expr())
|
|
597
|
+
self.expect(TokenType.RBRACKET)
|
|
598
|
+
return Vector(elements=elements, line=tok.line, col=tok.col)
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
def parse(tokens: List[Token]) -> Program:
|
|
602
|
+
"""Convenience function to parse a token list"""
|
|
603
|
+
return Parser(tokens).parse()
|