tengwar 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tengwar/__init__.py +20 -0
- tengwar/__main__.py +8 -0
- tengwar/ast_nodes.py +351 -0
- tengwar/binary_ast.py +654 -0
- tengwar/errors.py +43 -0
- tengwar/interpreter.py +1845 -0
- tengwar/lexer.py +483 -0
- tengwar/mcp_server.py +496 -0
- tengwar/parser.py +603 -0
- tengwar/repl.py +152 -0
- tengwar/vm.py +425 -0
- tengwar-0.3.1.dist-info/METADATA +202 -0
- tengwar-0.3.1.dist-info/RECORD +17 -0
- tengwar-0.3.1.dist-info/WHEEL +5 -0
- tengwar-0.3.1.dist-info/entry_points.txt +2 -0
- tengwar-0.3.1.dist-info/licenses/LICENSE +21 -0
- tengwar-0.3.1.dist-info/top_level.txt +1 -0
tengwar/binary_ast.py
ADDED
|
@@ -0,0 +1,654 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TENGWAR Binary AST Protocol (TBAP)
|
|
3
|
+
|
|
4
|
+
The killer AI feature: instead of generating text like
|
|
5
|
+
(filter {> _ 5} (map {* _ 2} data))
|
|
6
|
+
and parsing it, an AI emits compact binary opcodes that map
|
|
7
|
+
directly to AST nodes.
|
|
8
|
+
|
|
9
|
+
Benefits:
|
|
10
|
+
- ZERO syntax errors possible (structurally valid by construction)
|
|
11
|
+
- ZERO parsing overhead (binary → AST in O(n), no lexer/parser)
|
|
12
|
+
- ~60% fewer bytes than text representation
|
|
13
|
+
- Can be embedded in tool-call JSON as base64
|
|
14
|
+
- Streamable: AI can emit opcodes as it generates
|
|
15
|
+
|
|
16
|
+
Wire format:
|
|
17
|
+
Each node is: [opcode: u8] [payload...]
|
|
18
|
+
Opcodes map 1:1 to AST node types.
|
|
19
|
+
Strings are length-prefixed (u16 length + utf8 bytes).
|
|
20
|
+
Integers are varint-encoded.
|
|
21
|
+
Lists use a count prefix.
|
|
22
|
+
|
|
23
|
+
This is something NO other language offers to AI.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import struct
|
|
27
|
+
from typing import List, Tuple
|
|
28
|
+
from .ast_nodes import *
|
|
29
|
+
from .interpreter import (
|
|
30
|
+
Interpreter, TengwarValue, TengwarInt, TengwarFloat, TengwarStr,
|
|
31
|
+
TengwarBool, TengwarUnit, TengwarVector, TengwarTuple, TengwarDict,
|
|
32
|
+
TengwarClosure, TengwarBuiltin
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# === OPCODES ===
|
|
36
|
+
# One byte per AST node type
|
|
37
|
+
|
|
38
|
+
class Op:
|
|
39
|
+
# Literals (0x00-0x0F)
|
|
40
|
+
INT = 0x01 # + varint
|
|
41
|
+
FLOAT = 0x02 # + f64
|
|
42
|
+
STRING = 0x03 # + u16 len + utf8
|
|
43
|
+
TRUE = 0x04
|
|
44
|
+
FALSE = 0x05
|
|
45
|
+
UNIT = 0x06
|
|
46
|
+
|
|
47
|
+
# Identifiers (0x10-0x1F)
|
|
48
|
+
SYMBOL = 0x10 # + u16 len + utf8
|
|
49
|
+
HASH_ID = 0x11 # + u16 len + utf8
|
|
50
|
+
ADDR_REF = 0x12 # + u16 len + utf8
|
|
51
|
+
|
|
52
|
+
# Core forms (0x20-0x3F)
|
|
53
|
+
LAMBDA = 0x20 # + u8 param_count + params + body
|
|
54
|
+
APPLY = 0x21 # + func + u8 arg_count + args
|
|
55
|
+
COND = 0x22 # + condition + then + else
|
|
56
|
+
MATCH = 0x23 # + expr + u8 case_count + (pattern, body)*
|
|
57
|
+
SEQ = 0x24 # + u8 count + exprs
|
|
58
|
+
PARALLEL = 0x25 # + u8 count + exprs
|
|
59
|
+
BIND = 0x26 # + expr + target
|
|
60
|
+
DEFINE = 0x27 # + name + value
|
|
61
|
+
RECURSE = 0x28 # + name + body
|
|
62
|
+
MODULE = 0x29 # + u16 name_len + name + u8 count + body
|
|
63
|
+
LET = 0x2A # + u8 binding_count + (name, value)* + body
|
|
64
|
+
PIPE = 0x2B # + value + u8 func_count + funcs
|
|
65
|
+
THROW = 0x2C # + expr
|
|
66
|
+
CATCH = 0x2D # + expr + handler
|
|
67
|
+
PY_IMPORT = 0x2E # + module_name + u16 alias_len + alias
|
|
68
|
+
PROOF = 0x2F # + assertion
|
|
69
|
+
MUTATE = 0x30 # + name + value
|
|
70
|
+
|
|
71
|
+
# Collections (0x40-0x4F)
|
|
72
|
+
TUPLE = 0x40 # + u8 count + elements
|
|
73
|
+
VECTOR = 0x41 # + u16 count + elements
|
|
74
|
+
|
|
75
|
+
# Operators (0x50-0x6F)
|
|
76
|
+
BINOP = 0x50 # + u8 op_id + left + right
|
|
77
|
+
UNOP = 0x51 # + u8 op_id + operand
|
|
78
|
+
|
|
79
|
+
# Short lambda (0x70-0x7F)
|
|
80
|
+
SHORT_LAMBDA = 0x70 # + body (implicit _ param)
|
|
81
|
+
|
|
82
|
+
# Program
|
|
83
|
+
PROGRAM = 0xFF # + u16 count + exprs
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# Operator IDs for binary encoding
|
|
87
|
+
OP_IDS = {
|
|
88
|
+
'+': 0, '-': 1, '*': 2, '/': 3, '%': 4,
|
|
89
|
+
'=': 5, '!=': 6, '<': 7, '>': 8, '<=': 9, '>=': 10,
|
|
90
|
+
'&': 11, '|': 12, '!': 13,
|
|
91
|
+
}
|
|
92
|
+
ID_OPS = {v: k for k, v in OP_IDS.items()}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# === ENCODER: AST → Binary ===
|
|
96
|
+
|
|
97
|
+
class Encoder:
|
|
98
|
+
"""Encode a Tengwar AST to binary format"""
|
|
99
|
+
|
|
100
|
+
def __init__(self):
|
|
101
|
+
self.buf = bytearray()
|
|
102
|
+
|
|
103
|
+
def _write_u8(self, v: int):
|
|
104
|
+
self.buf.append(v & 0xFF)
|
|
105
|
+
|
|
106
|
+
def _write_u16(self, v: int):
|
|
107
|
+
self.buf.extend(struct.pack('>H', v))
|
|
108
|
+
|
|
109
|
+
def _write_varint(self, v: int):
|
|
110
|
+
"""Variable-length integer encoding"""
|
|
111
|
+
negative = v < 0
|
|
112
|
+
if negative:
|
|
113
|
+
v = -v
|
|
114
|
+
# Set high bit of first byte to indicate negative
|
|
115
|
+
while v >= 0x80:
|
|
116
|
+
self.buf.append((v & 0x7F) | 0x80)
|
|
117
|
+
v >>= 7
|
|
118
|
+
if negative:
|
|
119
|
+
self.buf.append(v | 0x40) # second-highest bit = negative
|
|
120
|
+
self.buf.append(0x00) # terminator
|
|
121
|
+
else:
|
|
122
|
+
self.buf.append(v)
|
|
123
|
+
|
|
124
|
+
def _write_f64(self, v: float):
|
|
125
|
+
self.buf.extend(struct.pack('>d', v))
|
|
126
|
+
|
|
127
|
+
def _write_string(self, s: str):
|
|
128
|
+
encoded = s.encode('utf-8')
|
|
129
|
+
self._write_u16(len(encoded))
|
|
130
|
+
self.buf.extend(encoded)
|
|
131
|
+
|
|
132
|
+
def encode(self, node: ASTNode) -> bytes:
|
|
133
|
+
"""Encode an AST node to binary"""
|
|
134
|
+
self._encode_node(node)
|
|
135
|
+
return bytes(self.buf)
|
|
136
|
+
|
|
137
|
+
def _encode_node(self, node):
|
|
138
|
+
if isinstance(node, Program):
|
|
139
|
+
self._write_u8(Op.PROGRAM)
|
|
140
|
+
self._write_u16(len(node.body))
|
|
141
|
+
for expr in node.body:
|
|
142
|
+
self._encode_node(expr)
|
|
143
|
+
|
|
144
|
+
elif isinstance(node, IntLit):
|
|
145
|
+
self._write_u8(Op.INT)
|
|
146
|
+
self._write_varint(node.value)
|
|
147
|
+
|
|
148
|
+
elif isinstance(node, FloatLit):
|
|
149
|
+
self._write_u8(Op.FLOAT)
|
|
150
|
+
self._write_f64(node.value)
|
|
151
|
+
|
|
152
|
+
elif isinstance(node, StrLit):
|
|
153
|
+
self._write_u8(Op.STRING)
|
|
154
|
+
self._write_string(node.value)
|
|
155
|
+
|
|
156
|
+
elif isinstance(node, BoolLit):
|
|
157
|
+
self._write_u8(Op.TRUE if node.value else Op.FALSE)
|
|
158
|
+
|
|
159
|
+
elif isinstance(node, UnitLit):
|
|
160
|
+
self._write_u8(Op.UNIT)
|
|
161
|
+
|
|
162
|
+
elif isinstance(node, Symbol):
|
|
163
|
+
self._write_u8(Op.SYMBOL)
|
|
164
|
+
self._write_string(node.name)
|
|
165
|
+
|
|
166
|
+
elif isinstance(node, HashId):
|
|
167
|
+
self._write_u8(Op.HASH_ID)
|
|
168
|
+
self._write_string(node.hash)
|
|
169
|
+
|
|
170
|
+
elif isinstance(node, AddrRef):
|
|
171
|
+
self._write_u8(Op.ADDR_REF)
|
|
172
|
+
self._write_string(node.addr)
|
|
173
|
+
|
|
174
|
+
elif isinstance(node, Lambda):
|
|
175
|
+
self._write_u8(Op.LAMBDA)
|
|
176
|
+
self._write_u8(len(node.params))
|
|
177
|
+
for p in node.params:
|
|
178
|
+
self._encode_node(p)
|
|
179
|
+
self._encode_node(node.body)
|
|
180
|
+
|
|
181
|
+
elif isinstance(node, Apply):
|
|
182
|
+
self._write_u8(Op.APPLY)
|
|
183
|
+
self._encode_node(node.func)
|
|
184
|
+
self._write_u8(len(node.args))
|
|
185
|
+
for a in node.args:
|
|
186
|
+
self._encode_node(a)
|
|
187
|
+
|
|
188
|
+
elif isinstance(node, Cond):
|
|
189
|
+
self._write_u8(Op.COND)
|
|
190
|
+
self._encode_node(node.condition)
|
|
191
|
+
self._encode_node(node.then_branch)
|
|
192
|
+
self._encode_node(node.else_branch)
|
|
193
|
+
|
|
194
|
+
elif isinstance(node, Match):
|
|
195
|
+
self._write_u8(Op.MATCH)
|
|
196
|
+
self._encode_node(node.expr)
|
|
197
|
+
self._write_u8(len(node.cases))
|
|
198
|
+
for pattern, body in node.cases:
|
|
199
|
+
self._encode_node(pattern)
|
|
200
|
+
self._encode_node(body)
|
|
201
|
+
|
|
202
|
+
elif isinstance(node, Seq):
|
|
203
|
+
self._write_u8(Op.SEQ)
|
|
204
|
+
self._write_u8(len(node.exprs))
|
|
205
|
+
for e in node.exprs:
|
|
206
|
+
self._encode_node(e)
|
|
207
|
+
|
|
208
|
+
elif isinstance(node, Parallel):
|
|
209
|
+
self._write_u8(Op.PARALLEL)
|
|
210
|
+
self._write_u8(len(node.exprs))
|
|
211
|
+
for e in node.exprs:
|
|
212
|
+
self._encode_node(e)
|
|
213
|
+
|
|
214
|
+
elif isinstance(node, Bind):
|
|
215
|
+
self._write_u8(Op.BIND)
|
|
216
|
+
self._encode_node(node.expr)
|
|
217
|
+
self._encode_node(node.target)
|
|
218
|
+
|
|
219
|
+
elif isinstance(node, Define):
|
|
220
|
+
self._write_u8(Op.DEFINE)
|
|
221
|
+
self._encode_node(node.name)
|
|
222
|
+
self._encode_node(node.value)
|
|
223
|
+
|
|
224
|
+
elif isinstance(node, Recurse):
|
|
225
|
+
self._write_u8(Op.RECURSE)
|
|
226
|
+
self._encode_node(node.name)
|
|
227
|
+
self._encode_node(node.body)
|
|
228
|
+
|
|
229
|
+
elif isinstance(node, Module):
|
|
230
|
+
self._write_u8(Op.MODULE)
|
|
231
|
+
self._write_string(node.name)
|
|
232
|
+
self._write_u8(len(node.body))
|
|
233
|
+
for e in node.body:
|
|
234
|
+
self._encode_node(e)
|
|
235
|
+
|
|
236
|
+
elif isinstance(node, Let):
|
|
237
|
+
self._write_u8(Op.LET)
|
|
238
|
+
self._write_u8(len(node.bindings))
|
|
239
|
+
for name_node, val_node in node.bindings:
|
|
240
|
+
self._encode_node(name_node)
|
|
241
|
+
self._encode_node(val_node)
|
|
242
|
+
self._encode_node(node.body)
|
|
243
|
+
|
|
244
|
+
elif isinstance(node, Pipe):
|
|
245
|
+
self._write_u8(Op.PIPE)
|
|
246
|
+
self._encode_node(node.value)
|
|
247
|
+
self._write_u8(len(node.funcs))
|
|
248
|
+
for f in node.funcs:
|
|
249
|
+
self._encode_node(f)
|
|
250
|
+
|
|
251
|
+
elif isinstance(node, Throw):
|
|
252
|
+
self._write_u8(Op.THROW)
|
|
253
|
+
self._encode_node(node.expr)
|
|
254
|
+
|
|
255
|
+
elif isinstance(node, Catch):
|
|
256
|
+
self._write_u8(Op.CATCH)
|
|
257
|
+
self._encode_node(node.expr)
|
|
258
|
+
self._encode_node(node.handler)
|
|
259
|
+
|
|
260
|
+
elif isinstance(node, PyImportNode):
|
|
261
|
+
self._write_u8(Op.PY_IMPORT)
|
|
262
|
+
self._encode_node(node.module_name)
|
|
263
|
+
self._write_string(node.alias)
|
|
264
|
+
|
|
265
|
+
elif isinstance(node, Proof):
|
|
266
|
+
self._write_u8(Op.PROOF)
|
|
267
|
+
self._encode_node(node.assertion)
|
|
268
|
+
|
|
269
|
+
elif isinstance(node, Mutate):
|
|
270
|
+
self._write_u8(Op.MUTATE)
|
|
271
|
+
self._encode_node(node.name)
|
|
272
|
+
self._encode_node(node.value)
|
|
273
|
+
|
|
274
|
+
elif isinstance(node, Tuple):
|
|
275
|
+
self._write_u8(Op.TUPLE)
|
|
276
|
+
self._write_u8(len(node.elements))
|
|
277
|
+
for e in node.elements:
|
|
278
|
+
self._encode_node(e)
|
|
279
|
+
|
|
280
|
+
elif isinstance(node, Vector):
|
|
281
|
+
self._write_u8(Op.VECTOR)
|
|
282
|
+
self._write_u16(len(node.elements))
|
|
283
|
+
for e in node.elements:
|
|
284
|
+
self._encode_node(e)
|
|
285
|
+
|
|
286
|
+
elif isinstance(node, BinOp):
|
|
287
|
+
self._write_u8(Op.BINOP)
|
|
288
|
+
self._write_u8(OP_IDS.get(node.op, 0))
|
|
289
|
+
self._encode_node(node.left)
|
|
290
|
+
self._encode_node(node.right)
|
|
291
|
+
|
|
292
|
+
elif isinstance(node, UnOp):
|
|
293
|
+
self._write_u8(Op.UNOP)
|
|
294
|
+
self._write_u8(OP_IDS.get(node.op, 0))
|
|
295
|
+
self._encode_node(node.operand)
|
|
296
|
+
|
|
297
|
+
else:
|
|
298
|
+
raise ValueError(f"Cannot encode: {type(node).__name__}")
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
# === DECODER: Binary → AST ===
|
|
302
|
+
|
|
303
|
+
class Decoder:
|
|
304
|
+
"""Decode binary format back to Tengwar AST"""
|
|
305
|
+
|
|
306
|
+
def __init__(self, data: bytes):
|
|
307
|
+
self.data = data
|
|
308
|
+
self.pos = 0
|
|
309
|
+
|
|
310
|
+
def _read_u8(self) -> int:
|
|
311
|
+
v = self.data[self.pos]
|
|
312
|
+
self.pos += 1
|
|
313
|
+
return v
|
|
314
|
+
|
|
315
|
+
def _read_u16(self) -> int:
|
|
316
|
+
v = struct.unpack('>H', self.data[self.pos:self.pos+2])[0]
|
|
317
|
+
self.pos += 2
|
|
318
|
+
return v
|
|
319
|
+
|
|
320
|
+
def _read_varint(self) -> int:
|
|
321
|
+
result = 0
|
|
322
|
+
shift = 0
|
|
323
|
+
while True:
|
|
324
|
+
b = self.data[self.pos]
|
|
325
|
+
self.pos += 1
|
|
326
|
+
if b & 0x80:
|
|
327
|
+
result |= (b & 0x7F) << shift
|
|
328
|
+
shift += 7
|
|
329
|
+
else:
|
|
330
|
+
if b & 0x40:
|
|
331
|
+
# Negative: next byte is terminator
|
|
332
|
+
result |= (b & 0x3F) << shift
|
|
333
|
+
self.pos += 1 # skip terminator
|
|
334
|
+
return -result
|
|
335
|
+
result |= b << shift
|
|
336
|
+
return result
|
|
337
|
+
|
|
338
|
+
def _read_f64(self) -> float:
|
|
339
|
+
v = struct.unpack('>d', self.data[self.pos:self.pos+8])[0]
|
|
340
|
+
self.pos += 8
|
|
341
|
+
return v
|
|
342
|
+
|
|
343
|
+
def _read_string(self) -> str:
|
|
344
|
+
length = self._read_u16()
|
|
345
|
+
s = self.data[self.pos:self.pos+length].decode('utf-8')
|
|
346
|
+
self.pos += length
|
|
347
|
+
return s
|
|
348
|
+
|
|
349
|
+
def decode(self) -> ASTNode:
|
|
350
|
+
"""Decode binary data to AST"""
|
|
351
|
+
return self._decode_node()
|
|
352
|
+
|
|
353
|
+
def _decode_node(self) -> ASTNode:
|
|
354
|
+
op = self._read_u8()
|
|
355
|
+
|
|
356
|
+
if op == Op.PROGRAM:
|
|
357
|
+
count = self._read_u16()
|
|
358
|
+
body = [self._decode_node() for _ in range(count)]
|
|
359
|
+
return Program(body=body)
|
|
360
|
+
|
|
361
|
+
elif op == Op.INT:
|
|
362
|
+
return IntLit(value=self._read_varint())
|
|
363
|
+
|
|
364
|
+
elif op == Op.FLOAT:
|
|
365
|
+
return FloatLit(value=self._read_f64())
|
|
366
|
+
|
|
367
|
+
elif op == Op.STRING:
|
|
368
|
+
return StrLit(value=self._read_string())
|
|
369
|
+
|
|
370
|
+
elif op == Op.TRUE:
|
|
371
|
+
return BoolLit(value=True)
|
|
372
|
+
|
|
373
|
+
elif op == Op.FALSE:
|
|
374
|
+
return BoolLit(value=False)
|
|
375
|
+
|
|
376
|
+
elif op == Op.UNIT:
|
|
377
|
+
return UnitLit()
|
|
378
|
+
|
|
379
|
+
elif op == Op.SYMBOL:
|
|
380
|
+
return Symbol(name=self._read_string())
|
|
381
|
+
|
|
382
|
+
elif op == Op.HASH_ID:
|
|
383
|
+
return HashId(hash=self._read_string())
|
|
384
|
+
|
|
385
|
+
elif op == Op.ADDR_REF:
|
|
386
|
+
return AddrRef(addr=self._read_string())
|
|
387
|
+
|
|
388
|
+
elif op == Op.LAMBDA:
|
|
389
|
+
param_count = self._read_u8()
|
|
390
|
+
params = [self._decode_node() for _ in range(param_count)]
|
|
391
|
+
body = self._decode_node()
|
|
392
|
+
return Lambda(params=params, body=body)
|
|
393
|
+
|
|
394
|
+
elif op == Op.APPLY:
|
|
395
|
+
func = self._decode_node()
|
|
396
|
+
arg_count = self._read_u8()
|
|
397
|
+
args = [self._decode_node() for _ in range(arg_count)]
|
|
398
|
+
return Apply(func=func, args=args)
|
|
399
|
+
|
|
400
|
+
elif op == Op.COND:
|
|
401
|
+
condition = self._decode_node()
|
|
402
|
+
then_branch = self._decode_node()
|
|
403
|
+
else_branch = self._decode_node()
|
|
404
|
+
return Cond(condition=condition, then_branch=then_branch, else_branch=else_branch)
|
|
405
|
+
|
|
406
|
+
elif op == Op.MATCH:
|
|
407
|
+
expr = self._decode_node()
|
|
408
|
+
case_count = self._read_u8()
|
|
409
|
+
cases = []
|
|
410
|
+
for _ in range(case_count):
|
|
411
|
+
pattern = self._decode_node()
|
|
412
|
+
body = self._decode_node()
|
|
413
|
+
cases.append((pattern, body))
|
|
414
|
+
return Match(expr=expr, cases=cases)
|
|
415
|
+
|
|
416
|
+
elif op == Op.SEQ:
|
|
417
|
+
count = self._read_u8()
|
|
418
|
+
exprs = [self._decode_node() for _ in range(count)]
|
|
419
|
+
return Seq(exprs=exprs)
|
|
420
|
+
|
|
421
|
+
elif op == Op.PARALLEL:
|
|
422
|
+
count = self._read_u8()
|
|
423
|
+
exprs = [self._decode_node() for _ in range(count)]
|
|
424
|
+
return Parallel(exprs=exprs)
|
|
425
|
+
|
|
426
|
+
elif op == Op.BIND:
|
|
427
|
+
expr = self._decode_node()
|
|
428
|
+
target = self._decode_node()
|
|
429
|
+
return Bind(expr=expr, target=target)
|
|
430
|
+
|
|
431
|
+
elif op == Op.DEFINE:
|
|
432
|
+
name = self._decode_node()
|
|
433
|
+
value = self._decode_node()
|
|
434
|
+
return Define(name=name, value=value)
|
|
435
|
+
|
|
436
|
+
elif op == Op.RECURSE:
|
|
437
|
+
name = self._decode_node()
|
|
438
|
+
body = self._decode_node()
|
|
439
|
+
return Recurse(name=name, body=body)
|
|
440
|
+
|
|
441
|
+
elif op == Op.MODULE:
|
|
442
|
+
name = self._read_string()
|
|
443
|
+
count = self._read_u8()
|
|
444
|
+
body = [self._decode_node() for _ in range(count)]
|
|
445
|
+
return Module(name=name, body=body)
|
|
446
|
+
|
|
447
|
+
elif op == Op.LET:
|
|
448
|
+
binding_count = self._read_u8()
|
|
449
|
+
bindings = []
|
|
450
|
+
for _ in range(binding_count):
|
|
451
|
+
name = self._decode_node()
|
|
452
|
+
value = self._decode_node()
|
|
453
|
+
bindings.append((name, value))
|
|
454
|
+
body = self._decode_node()
|
|
455
|
+
return Let(bindings=bindings, body=body)
|
|
456
|
+
|
|
457
|
+
elif op == Op.PIPE:
|
|
458
|
+
value = self._decode_node()
|
|
459
|
+
func_count = self._read_u8()
|
|
460
|
+
funcs = [self._decode_node() for _ in range(func_count)]
|
|
461
|
+
return Pipe(value=value, funcs=funcs)
|
|
462
|
+
|
|
463
|
+
elif op == Op.THROW:
|
|
464
|
+
expr = self._decode_node()
|
|
465
|
+
return Throw(expr=expr)
|
|
466
|
+
|
|
467
|
+
elif op == Op.CATCH:
|
|
468
|
+
expr = self._decode_node()
|
|
469
|
+
handler = self._decode_node()
|
|
470
|
+
return Catch(expr=expr, handler=handler)
|
|
471
|
+
|
|
472
|
+
elif op == Op.PY_IMPORT:
|
|
473
|
+
module_name = self._decode_node()
|
|
474
|
+
alias = self._read_string()
|
|
475
|
+
return PyImportNode(module_name=module_name, alias=alias)
|
|
476
|
+
|
|
477
|
+
elif op == Op.PROOF:
|
|
478
|
+
assertion = self._decode_node()
|
|
479
|
+
return Proof(assertion=assertion)
|
|
480
|
+
|
|
481
|
+
elif op == Op.MUTATE:
|
|
482
|
+
name = self._decode_node()
|
|
483
|
+
value = self._decode_node()
|
|
484
|
+
return Mutate(name=name, value=value)
|
|
485
|
+
|
|
486
|
+
elif op == Op.TUPLE:
|
|
487
|
+
count = self._read_u8()
|
|
488
|
+
elements = [self._decode_node() for _ in range(count)]
|
|
489
|
+
return Tuple(elements=elements)
|
|
490
|
+
|
|
491
|
+
elif op == Op.VECTOR:
|
|
492
|
+
count = self._read_u16()
|
|
493
|
+
elements = [self._decode_node() for _ in range(count)]
|
|
494
|
+
return Vector(elements=elements)
|
|
495
|
+
|
|
496
|
+
elif op == Op.BINOP:
|
|
497
|
+
op_id = self._read_u8()
|
|
498
|
+
left = self._decode_node()
|
|
499
|
+
right = self._decode_node()
|
|
500
|
+
return BinOp(op=ID_OPS.get(op_id, '+'), left=left, right=right)
|
|
501
|
+
|
|
502
|
+
elif op == Op.UNOP:
|
|
503
|
+
op_id = self._read_u8()
|
|
504
|
+
operand = self._decode_node()
|
|
505
|
+
return UnOp(op=ID_OPS.get(op_id, '!'), operand=operand)
|
|
506
|
+
|
|
507
|
+
elif op == Op.SHORT_LAMBDA:
|
|
508
|
+
body = self._decode_node()
|
|
509
|
+
param = Symbol(name='_')
|
|
510
|
+
return Lambda(params=[param], body=body)
|
|
511
|
+
|
|
512
|
+
else:
|
|
513
|
+
raise ValueError(f"Unknown opcode: 0x{op:02x} at position {self.pos-1}")
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
# === CONVENIENCE FUNCTIONS ===
|
|
517
|
+
|
|
518
|
+
def encode(source: str) -> bytes:
|
|
519
|
+
"""Parse Tengwar source and encode to binary"""
|
|
520
|
+
from .lexer import tokenize
|
|
521
|
+
from .parser import parse
|
|
522
|
+
tokens = tokenize(source)
|
|
523
|
+
ast = parse(tokens)
|
|
524
|
+
return Encoder().encode(ast)
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def decode(data: bytes) -> ASTNode:
|
|
528
|
+
"""Decode binary data to AST"""
|
|
529
|
+
return Decoder(data).decode()
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def run_binary(data: bytes, interpreter: Interpreter = None) -> TengwarValue:
|
|
533
|
+
"""Decode binary AST and execute it"""
|
|
534
|
+
ast = decode(data)
|
|
535
|
+
if interpreter is None:
|
|
536
|
+
interpreter = Interpreter()
|
|
537
|
+
return interpreter.eval(ast)
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def encode_b64(source: str) -> str:
|
|
541
|
+
"""Encode to base64 (for embedding in JSON/tool calls)"""
|
|
542
|
+
import base64
|
|
543
|
+
return base64.b64encode(encode(source)).decode('ascii')
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def run_b64(b64_data: str, interpreter: Interpreter = None) -> TengwarValue:
|
|
547
|
+
"""Decode base64 binary AST and execute"""
|
|
548
|
+
import base64
|
|
549
|
+
data = base64.b64decode(b64_data)
|
|
550
|
+
return run_binary(data, interpreter)
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
# === AST BUILDER (for AI to construct ASTs programmatically) ===
|
|
554
|
+
|
|
555
|
+
class ASTBuilder:
|
|
556
|
+
"""
|
|
557
|
+
Fluent API for building Tengwar ASTs without text.
|
|
558
|
+
|
|
559
|
+
An AI can use this to construct programs directly:
|
|
560
|
+
|
|
561
|
+
b = ASTBuilder()
|
|
562
|
+
program = b.program(
|
|
563
|
+
b.define("double", b.lam(["x"], b.binop("*", b.sym("x"), b.int(2)))),
|
|
564
|
+
b.apply(b.sym("double"), b.int(21))
|
|
565
|
+
)
|
|
566
|
+
result = b.run(program) # → 42
|
|
567
|
+
"""
|
|
568
|
+
|
|
569
|
+
def int(self, v: int) -> IntLit:
|
|
570
|
+
return IntLit(value=v)
|
|
571
|
+
|
|
572
|
+
def float(self, v: float) -> FloatLit:
|
|
573
|
+
return FloatLit(value=v)
|
|
574
|
+
|
|
575
|
+
def str(self, v: str) -> StrLit:
|
|
576
|
+
return StrLit(value=v)
|
|
577
|
+
|
|
578
|
+
def bool(self, v: bool) -> BoolLit:
|
|
579
|
+
return BoolLit(value=v)
|
|
580
|
+
|
|
581
|
+
def unit(self) -> UnitLit:
|
|
582
|
+
return UnitLit()
|
|
583
|
+
|
|
584
|
+
def sym(self, name: str) -> Symbol:
|
|
585
|
+
return Symbol(name=name)
|
|
586
|
+
|
|
587
|
+
def lam(self, params: list, body: ASTNode) -> Lambda:
|
|
588
|
+
return Lambda(
|
|
589
|
+
params=[Symbol(name=p) if isinstance(p, __builtins__['str'] if isinstance(__builtins__, dict) else str) else p for p in params],
|
|
590
|
+
body=body
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
def apply(self, func: ASTNode, *args: ASTNode) -> Apply:
|
|
594
|
+
return Apply(func=func, args=list(args))
|
|
595
|
+
|
|
596
|
+
def binop(self, op: str, left: ASTNode, right: ASTNode) -> BinOp:
|
|
597
|
+
return BinOp(op=op, left=left, right=right)
|
|
598
|
+
|
|
599
|
+
def unop(self, op: str, operand: ASTNode) -> UnOp:
|
|
600
|
+
return UnOp(op=op, operand=operand)
|
|
601
|
+
|
|
602
|
+
def cond(self, condition: ASTNode, then_b: ASTNode, else_b: ASTNode) -> Cond:
|
|
603
|
+
return Cond(condition=condition, then_branch=then_b, else_branch=else_b)
|
|
604
|
+
|
|
605
|
+
def seq(self, *exprs: ASTNode) -> Seq:
|
|
606
|
+
return Seq(exprs=list(exprs))
|
|
607
|
+
|
|
608
|
+
def bind(self, expr: ASTNode, target: ASTNode) -> Bind:
|
|
609
|
+
return Bind(expr=expr, target=target)
|
|
610
|
+
|
|
611
|
+
def define(self, name: str, value: ASTNode) -> Define:
|
|
612
|
+
return Define(name=Symbol(name=name), value=value)
|
|
613
|
+
|
|
614
|
+
def recurse(self, name: str, body: ASTNode) -> Recurse:
|
|
615
|
+
return Recurse(name=Symbol(name=name), body=body)
|
|
616
|
+
|
|
617
|
+
def let(self, bindings: list, body: ASTNode) -> Let:
|
|
618
|
+
"""bindings: [("name", value_node), ...]"""
|
|
619
|
+
return Let(
|
|
620
|
+
bindings=[(Symbol(name=n), v) for n, v in bindings],
|
|
621
|
+
body=body
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
def pipe(self, value: ASTNode, *funcs: ASTNode) -> Pipe:
|
|
625
|
+
return Pipe(value=value, funcs=list(funcs))
|
|
626
|
+
|
|
627
|
+
def vec(self, *elements: ASTNode) -> Vector:
|
|
628
|
+
return Vector(elements=list(elements))
|
|
629
|
+
|
|
630
|
+
def tup(self, *elements: ASTNode) -> Tuple:
|
|
631
|
+
return Tuple(elements=list(elements))
|
|
632
|
+
|
|
633
|
+
def match(self, expr: ASTNode, *cases) -> Match:
|
|
634
|
+
"""cases: [(pattern, body), ...]"""
|
|
635
|
+
return Match(expr=expr, cases=list(cases))
|
|
636
|
+
|
|
637
|
+
def throw(self, expr: ASTNode) -> Throw:
|
|
638
|
+
return Throw(expr=expr)
|
|
639
|
+
|
|
640
|
+
def catch(self, expr: ASTNode, handler: ASTNode) -> Catch:
|
|
641
|
+
return Catch(expr=expr, handler=handler)
|
|
642
|
+
|
|
643
|
+
def program(self, *exprs: ASTNode) -> Program:
|
|
644
|
+
return Program(body=list(exprs))
|
|
645
|
+
|
|
646
|
+
def encode(self, node: ASTNode) -> bytes:
|
|
647
|
+
"""Encode AST to binary"""
|
|
648
|
+
return Encoder().encode(node)
|
|
649
|
+
|
|
650
|
+
def run(self, node: ASTNode, interpreter: Interpreter = None) -> TengwarValue:
|
|
651
|
+
"""Execute an AST directly"""
|
|
652
|
+
if interpreter is None:
|
|
653
|
+
interpreter = Interpreter()
|
|
654
|
+
return interpreter.eval(node)
|
tengwar/errors.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TENGWAR Error Types
|
|
3
|
+
|
|
4
|
+
Structured error reporting with precise source locations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TengwarError(Exception):
|
|
9
|
+
def __init__(self, message: str, line: int = 0, col: int = 0):
|
|
10
|
+
self.message = message
|
|
11
|
+
self.line = line
|
|
12
|
+
self.col = col
|
|
13
|
+
super().__init__(f"[{line}:{col}] {message}")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class LexError(TengwarError):
|
|
17
|
+
"""Tokenization error"""
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ParseError(TengwarError):
|
|
22
|
+
"""Parsing error"""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TypeError_(TengwarError):
|
|
27
|
+
"""Type checking error"""
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class RuntimeError_(TengwarError):
|
|
32
|
+
"""Runtime evaluation error"""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ProofError(TengwarError):
|
|
37
|
+
"""Proof obligation failed"""
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class EffectError(TengwarError):
|
|
42
|
+
"""Unhandled or disallowed effect"""
|
|
43
|
+
pass
|