sutra-dev 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,553 @@
1
+ """AST node definitions for the Sutra compiler.
2
+
3
+ These are intentionally lean dataclasses. The parser builds them, the
4
+ validator walks them. A more elaborate visitor framework can come later
5
+ when we start lowering to IR.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import List, Optional, Union
12
+
13
+ from .diagnostics import SourceSpan
14
+
15
+
16
+ # ============================================================
17
+ # Base
18
+ # ============================================================
19
+
20
+
21
+ @dataclass
22
+ class Node:
23
+ span: SourceSpan
24
+
25
+
26
+ # ============================================================
27
+ # Types
28
+ # ============================================================
29
+
30
+
31
+ @dataclass
32
+ class TypeRef(Node):
33
+ """A type appearing in a declaration or expression.
34
+
35
+ `name` is the base type name. `type_args` is populated for generic
36
+ instantiations like `List<vector>` or `Identity<Cat>`.
37
+ """
38
+
39
+ name: str
40
+ type_args: List["TypeRef"] = field(default_factory=list)
41
+
42
+
43
+ # ============================================================
44
+ # Expressions
45
+ # ============================================================
46
+
47
+
48
+ @dataclass
49
+ class Expr(Node):
50
+ pass
51
+
52
+
53
+ @dataclass
54
+ class IntLiteral(Expr):
55
+ value: int
56
+
57
+
58
+ @dataclass
59
+ class FloatLiteral(Expr):
60
+ value: float
61
+
62
+
63
+ @dataclass
64
+ class ImaginaryLiteral(Expr):
65
+ # `5i`, `3.14i` — a number literal with an `i` suffix. Represents
66
+ # a pure-imaginary value; runtime allocation places the scalar on
67
+ # synthetic[AXIS_IMAG] with zero on the real axis. The `i` suffix
68
+ # only binds when the following character is not an identifier
69
+ # continuation, so the bare variable name `i` remains available
70
+ # (`5 * i` stays a multiplication; `5i` is one token).
71
+ value: float
72
+
73
+
74
+ @dataclass
75
+ class ComplexLiteral(Expr):
76
+ # Folded form produced by the simplifier when it sees `Number ± Nij`
77
+ # (an int/float plus or minus an imaginary literal). Codegen lowers
78
+ # this to a single `_VSA.make_complex(re, im)` call so the emitted
79
+ # source is one allocation instead of a vector-add. No user-facing
80
+ # syntax produces this directly — always a fold.
81
+ re: float
82
+ im: float
83
+
84
+
85
+ @dataclass
86
+ class StringLiteral(Expr):
87
+ value: str
88
+
89
+
90
+ @dataclass
91
+ class CharLiteral(Expr):
92
+ # The Unicode code point. Runtime representation is a number-axis
93
+ # scalar (synthetic[AXIS_REAL] = code point) plus a char-flag bit
94
+ # on synthetic[AXIS_CHAR_FLAG] distinguishing chars from plain ints.
95
+ value: int
96
+
97
+
98
+ @dataclass
99
+ class BoolLiteral(Expr):
100
+ value: bool
101
+
102
+
103
+ @dataclass
104
+ class UnknownLiteral(Expr):
105
+ # The `unknown` keyword — the neutral point on the truth axis,
106
+ # 0.0 between false (-1) and true (+1). First-class value for
107
+ # three-valued logic (see `trit` primitive class). Carries no
108
+ # payload — the value is implicit in the literal kind.
109
+ pass
110
+
111
+
112
+ @dataclass
113
+ class WaitLiteral(Expr):
114
+ # The `wait` keyword — explicit deferred-initializer marker.
115
+ # Legal only as the RHS of a var-decl (`int i = wait;`). Means
116
+ # "I'm declaring this name now, an assignment will follow before
117
+ # any read." The validator enforces definite assignment; codegen
118
+ # emits zero-of-type at the declaration site and the later
119
+ # assignment overrides it. Using `wait` anywhere else is a
120
+ # parse-time error. Resolves the no-null deferred-init candidate
121
+ # D from planning/open-questions/no-null.md.
122
+ pass
123
+
124
+
125
+ @dataclass
126
+ class InterpolatedString(Expr):
127
+ """$"foo {x} bar" — alternating literal chunks and expressions.
128
+
129
+ `parts` is a list where each item is either a `str` literal chunk
130
+ or an `Expr` interpolation.
131
+ """
132
+
133
+ parts: List[Union[str, Expr]]
134
+
135
+
136
+ @dataclass
137
+ class Identifier(Expr):
138
+ name: str
139
+
140
+
141
+ @dataclass
142
+ class ThisExpr(Expr):
143
+ pass
144
+
145
+
146
+ @dataclass
147
+ class MemberAccess(Expr):
148
+ obj: Expr
149
+ member: str
150
+
151
+
152
+ @dataclass
153
+ class Call(Expr):
154
+ callee: Expr
155
+ type_args: List[TypeRef]
156
+ args: List[Expr]
157
+
158
+
159
+ @dataclass
160
+ class CastExpr(Expr):
161
+ """`(Type) expr` — safe cast."""
162
+
163
+ target_type: TypeRef
164
+ expr: Expr
165
+
166
+
167
+ @dataclass
168
+ class UnsafeCastExpr(Expr):
169
+ """`unsafeCast<Type>(expr)`."""
170
+
171
+ target_type: TypeRef
172
+ expr: Expr
173
+
174
+
175
+ @dataclass
176
+ class UnsafeOverrideExpr(Expr):
177
+ expr: Expr
178
+
179
+
180
+ @dataclass
181
+ class DefuzzyExpr(Expr):
182
+ expr: Expr
183
+
184
+
185
+ @dataclass
186
+ class EmbedExpr(Expr):
187
+ expr: Expr
188
+
189
+
190
+ @dataclass
191
+ class BinaryOp(Expr):
192
+ op: str # "+", "-", "*", "/", "%", "==", "!=", "<", ">", "<=", ">=", "&&", "||"
193
+ left: Expr
194
+ right: Expr
195
+
196
+
197
+ @dataclass
198
+ class UnaryOp(Expr):
199
+ op: str # "!", "-", "+"
200
+ operand: Expr
201
+
202
+
203
+ @dataclass
204
+ class PostfixOp(Expr):
205
+ op: str # "++", "--"
206
+ operand: Expr
207
+
208
+
209
+ @dataclass
210
+ class Assignment(Expr):
211
+ op: str # "=", "+=", "-=", "*=", "/="
212
+ target: Expr
213
+ value: Expr
214
+
215
+
216
+ @dataclass
217
+ class Parenthesized(Expr):
218
+ inner: Expr
219
+
220
+
221
+ @dataclass
222
+ class ArrayLiteral(Expr):
223
+ """`[a, b, c]` — an inline sequence of expressions.
224
+
225
+ Used for argmax-cosine calls and similar list-of-vectors operands.
226
+ The element type is inferred at use — the AST node just carries
227
+ the raw element expressions.
228
+ """
229
+
230
+ elements: List[Expr] = field(default_factory=list)
231
+
232
+
233
+ @dataclass
234
+ class Subscript(Expr):
235
+ """`target[index]` — postfix subscript access.
236
+
237
+ Used for map lookups (`BEHAVIOR_OF[winner]`) and future array
238
+ indexing. Whether the lookup is exact-match, cosine-nearest, or
239
+ integer indexing is a runtime concern of the target type.
240
+ """
241
+
242
+ target: Expr
243
+ index: Expr
244
+
245
+
246
+ @dataclass
247
+ class MapLiteral(Expr):
248
+ """`{k1: v1, k2: v2, ...}` — an inline map literal.
249
+
250
+ Keys and values are stored as parallel lists so the generic
251
+ AST walker in the validator visits every child expression. An
252
+ empty map literal `{}` has both lists empty.
253
+
254
+ Map literals only appear in expression position (after `=`,
255
+ `return`, as a function argument, etc.). A bare `{...}` at
256
+ statement position is always a block — writing a map literal
257
+ there requires wrapping it in a declaration or call.
258
+ """
259
+
260
+ keys: List[Expr] = field(default_factory=list)
261
+ values: List[Expr] = field(default_factory=list)
262
+
263
+
264
+ # ============================================================
265
+ # Statements
266
+ # ============================================================
267
+
268
+
269
+ @dataclass
270
+ class Stmt(Node):
271
+ pass
272
+
273
+
274
+ @dataclass
275
+ class Block(Stmt):
276
+ statements: List[Stmt]
277
+
278
+
279
+ @dataclass
280
+ class ExprStmt(Stmt):
281
+ expr: Expr
282
+
283
+
284
+ @dataclass
285
+ class ReturnStmt(Stmt):
286
+ value: Optional[Expr]
287
+
288
+
289
+ @dataclass
290
+ class IfStmt(Stmt):
291
+ condition: Expr
292
+ then_branch: Block
293
+ else_branch: Optional[Union["IfStmt", Block]]
294
+
295
+
296
+ @dataclass
297
+ class WhileStmt(Stmt):
298
+ condition: Expr
299
+ body: Block
300
+
301
+
302
+ @dataclass
303
+ class ForStmt(Stmt):
304
+ init: Optional[Stmt] # var decl, expr stmt, or None
305
+ condition: Optional[Expr]
306
+ step: Optional[Expr]
307
+ body: Block
308
+
309
+
310
+ @dataclass
311
+ class ForeachStmt(Stmt):
312
+ var_type: Optional[TypeRef] # None means `var`
313
+ var_name: str
314
+ iterable: Expr
315
+ body: Block
316
+
317
+
318
+ @dataclass
319
+ class LoopStmt(Stmt):
320
+ """Sutra's unified loop construct.
321
+
322
+ Three forms:
323
+ loop (10) { ... } → bounded, count is IntLiteral, no index
324
+ loop (10 as i) { ... } → bounded with index variable
325
+ loop (condition) { ... } → eigenrotation (convergence-based)
326
+
327
+ The compiler distinguishes by checking whether `count` is set:
328
+ - count is not None → bounded loop, unrolls at compile time
329
+ - count is None → condition-based, compiles to geometric rotation
330
+ """
331
+ count: Optional[Expr] # integer expression for bounded loops, None for while-style
332
+ index_var: Optional[str] # 'as i' variable name, None if not used
333
+ condition: Optional[Expr] # boolean expression for while-style loops, None for bounded
334
+ body: Block
335
+
336
+
337
+ @dataclass
338
+ class DoWhileStmt(Stmt):
339
+ body: Block
340
+ condition: Expr
341
+
342
+
343
+ @dataclass
344
+ class TryStmt(Stmt):
345
+ try_body: Block
346
+ catch_body: Block
347
+
348
+
349
+
350
+
351
+ @dataclass
352
+ class LoopStateParam(Node):
353
+ """One state parameter of a loop function declaration.
354
+
355
+ Like Param but with an optional default initializer for use when
356
+ the loop is called without specifying that parameter (common for
357
+ accumulator state — running max, sum, count, etc.).
358
+ """
359
+ type_ref: TypeRef
360
+ name: str
361
+ default: Optional[Expr]
362
+
363
+
364
+ @dataclass
365
+ class LoopFunctionDecl(Node):
366
+ """A loop function declaration of one of the four kinds.
367
+
368
+ Surface syntax (kind keyword + name + paren-list + body):
369
+ do_while addNumber(x < 11, int x) { pass x + 1; }
370
+ while_loop ... (cond, ...state) { ... }
371
+ iterative_loop ... (count, ...state) { ... }
372
+ foreach_loop ... (array, ...state) { ... }
373
+
374
+ For do_while/while_loop, `condition` is a boolean expression that
375
+ references the state params; the loop iterates until it becomes
376
+ false. For iterative_loop, `condition` is an integer expression
377
+ giving the cap on tick count; the body uses the `iterator` keyword
378
+ for the current tick. For foreach_loop, `condition` is an array
379
+ expression; one element per tick (binding details TBD).
380
+
381
+ The body uses `pass <exprs>;` (PassStmt) for the tail-recursive yield.
382
+ """
383
+ kind: str # "do_while" | "while_loop" | "iterative_loop" | "foreach_loop"
384
+ name: str
385
+ condition: Expr # first paren-list item; semantic depends on kind
386
+ state_params: List[LoopStateParam]
387
+ body: Block
388
+
389
+
390
+ @dataclass
391
+ class ReplaceMarker(Node):
392
+ """Placeholder for the `replace` keyword in a `pass` argument list.
393
+
394
+ `pass <expr>, replace, <expr>;` means: update first state param to
395
+ expr, keep second state param at its input value, update third.
396
+ Used when the body only updates some of the state params per tick.
397
+ """
398
+
399
+
400
+ @dataclass
401
+ class PassStmt(Stmt):
402
+ """`pass expr1, expr2, ...;` — tail-recursive yield in a loop body.
403
+
404
+ Required to provide one value per state parameter (in declaration
405
+ order). Each value is either an expression (the new value for that
406
+ state param) or a ReplaceMarker (keep the param's input value).
407
+
408
+ Triggers the next iteration of the enclosing loop function.
409
+ Forbidden outside a loop function declaration body.
410
+ """
411
+ values: List[Union[Expr, ReplaceMarker]]
412
+
413
+
414
+ @dataclass
415
+ class LoopCallStmt(Stmt):
416
+ """`loop name(cond_arg, state_arg, ...);` — invoke a loop function.
417
+
418
+ The condition_arg is evaluated once before the first tick (and
419
+ re-evaluated against the new state each subsequent tick).
420
+ The state_args MUST be identifiers (slot variable names in the
421
+ caller scope); on loop completion, the loop's final state values
422
+ are written back into those caller variables (by-reference).
423
+ """
424
+ name: str
425
+ condition_arg: Expr
426
+ state_arg_names: List[str]
427
+
428
+
429
+ # ============================================================
430
+ # Declarations
431
+ # ============================================================
432
+
433
+
434
+ @dataclass
435
+ class Modifiers:
436
+ is_public: bool = False
437
+ is_private: bool = False
438
+ is_static: bool = False
439
+
440
+
441
+ @dataclass
442
+ class Param(Node):
443
+ type_ref: TypeRef
444
+ name: str
445
+
446
+
447
+ @dataclass
448
+ class VarDecl(Stmt):
449
+ """`var x = ...;`, `const x = ...;`, `TYPE x = ...;`, `var x : TYPE;`,
450
+ `var[N] x : TYPE;`, or `role x = ...;`.
451
+
452
+ The 2026-04-22 extensions (colon-syntax, array-slot form, role-
453
+ declaration form) all ride on the same node type with additional
454
+ flags:
455
+
456
+ - `is_role`: this is a semantic role binding. `role X = expr;`
457
+ produces a value that semantically should be treated as a
458
+ learned matrix operator; today it behaves identically to
459
+ `vector X = expr;` because learned-matrix binding is deferred.
460
+ When learned-matrix binding lands, the is_role flag is what
461
+ tells the codegen to emit the matrix-fit path.
462
+ - `is_var_colon`: declared via `var X : TYPE` (the new
463
+ rotation-bound form, with optional initializer). Uninitialized
464
+ form allocates a zero value of the given type — this is the
465
+ "var as storage slot" semantics from the surface-syntax decision.
466
+ - `array_size`: if non-None, this is a `var[N] X : TYPE` array
467
+ declaration allocating N slots. Semantics still pending; today
468
+ the codegen just treats it as a Python list of N zero-values.
469
+ """
470
+
471
+ is_const: bool
472
+ is_var_inferred: bool # true if declared with `var` (inferred)
473
+ type_ref: Optional[TypeRef] # None only if is_var_inferred is True
474
+ name: str
475
+ initializer: Optional[Expr]
476
+ is_role: bool = False
477
+ is_var_colon: bool = False
478
+ array_size: Optional[int] = None
479
+ is_slot: bool = False
480
+
481
+
482
+ @dataclass
483
+ class FunctionDecl(Node):
484
+ modifiers: Modifiers
485
+ return_type: TypeRef
486
+ name: str # operator name like "+" when is_operator
487
+ type_params: List[str]
488
+ params: List[Param]
489
+ body: Block
490
+ is_operator: bool = False
491
+ is_implicit_conversion: bool = False
492
+ # `intrinsic` declarations: signature only, semicolon-terminated,
493
+ # no Sutra body. `body` is an empty Block in that case. Used by
494
+ # stdlib files for leaf primitives whose implementation lives in
495
+ # the runtime class (`_VSA.<name>(...)`).
496
+ is_intrinsic: bool = False
497
+
498
+
499
+ @dataclass
500
+ class MethodDecl(Node):
501
+ modifiers: Modifiers
502
+ return_type: TypeRef
503
+ name: str
504
+ type_params: List[str]
505
+ params: List[Param]
506
+ body: Block
507
+ is_operator: bool = False
508
+ # `static intrinsic method ...;` — signature only, semicolon-
509
+ # terminated, no Sutra body. Used by stdlib class-as-namespace
510
+ # bodies for leaf primitives whose implementation lives in the
511
+ # runtime class (`_VSA.<name>(...)`). Mirrors FunctionDecl's
512
+ # is_intrinsic for the class-method shape.
513
+ is_intrinsic: bool = False
514
+
515
+
516
+ @dataclass
517
+ class ClassDecl(Node):
518
+ """`class Name extends Parent { ... }` — user-defined ontology
519
+ class.
520
+
521
+ Body content: method declarations are accepted inside the body.
522
+ Field declarations and operator implementations remain deferred.
523
+
524
+ Methods declared inside a class body land on this node's
525
+ `methods` list. They're validator-visited (per the existing
526
+ `visit_MethodDecl`); codegen routing for `ClassName.method(...)`
527
+ dispatch is the next slice and isn't wired today, so calls to
528
+ methods on a class fail at codegen with a clear pointer until
529
+ that lands.
530
+
531
+ At runtime an instance of a user class is a plain vector. The
532
+ declaration is compile-time metadata: the validator registers
533
+ the class name and uses the extends-chain to resolve
534
+ type-position references.
535
+ """
536
+ name: str
537
+ parent_name: str # the `extends` target — required in MVP
538
+ methods: List["MethodDecl"] = field(default_factory=list)
539
+ loop_functions: List["LoopFunctionDecl"] = field(default_factory=list)
540
+
541
+
542
+ # ============================================================
543
+ # Module
544
+ # ============================================================
545
+
546
+
547
+ TopLevel = Union[FunctionDecl, MethodDecl, VarDecl, Stmt, ClassDecl]
548
+
549
+
550
+ @dataclass
551
+ class Module:
552
+ items: List[TopLevel]
553
+ span: SourceSpan