sutra-dev 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2436 @@
1
+ """AST → Python source translator — backend-agnostic base.
2
+
3
+ This module walks a parsed Sutra `Module` and emits Python source.
4
+ Concrete backends (the CPU IR codegen and the PyTorch codegen)
5
+ subclass `BaseCodegen` and override `_emit_prelude` (and a few
6
+ per-backend hook methods for literal lowering — `_char_literal_src`,
7
+ `_embed_expr_src`, `_logical_op_src`, `_bool_literal_src`, etc.) to
8
+ target their specific runtime. The AST walker and the builtin-call
9
+ table are shared across backends.
10
+
11
+ See also `codegen.py` (the canonical CPU IR codegen) and
12
+ `codegen_pytorch.py` (the GPU/PyTorch codegen).
13
+
14
+ Unsupported AST nodes raise `CodegenNotSupported` with the source
15
+ span of the offending node so the CLI can print a compiler-style
16
+ `line:col` diagnostic instead of silently emitting wrong Python.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from typing import List, Optional
22
+
23
+ from . import ast_nodes as ast
24
+
25
+
26
+ _TRANSCENDENTALS_DISABLED = frozenset({
27
+ "log", "sqrt", "exp", "sin", "cos", "tan", "pow",
28
+ })
29
+
30
+
31
+ # ============================================================
32
+ # Error type
33
+ # ============================================================
34
+
35
+
36
+ def _is_bind_call(expr) -> bool:
37
+ """Match a direct `bind(role, filler)` Call — used by the fused
38
+ bundle-of-binds lowering in `_translate_call`. Does not match
39
+ `_VSA.bind(...)` via MemberAccess (those don't appear in .su source).
40
+ """
41
+ return (isinstance(expr, ast.Call)
42
+ and isinstance(expr.callee, ast.Identifier)
43
+ and expr.callee.name == "bind"
44
+ and len(expr.args) == 2)
45
+
46
+
47
+ class CodegenNotSupported(Exception):
48
+ """Raised when the translator hits an AST node it cannot lower.
49
+
50
+ Carries the source span of the offending node so the CLI can print a
51
+ compiler-style `line:col` diagnostic. The file path is not on the
52
+ span itself (it lives on `Diagnostic` in the parser's diagnostic
53
+ bag), so callers that know the source path should prepend it when
54
+ formatting for the user.
55
+ """
56
+
57
+ def __init__(self, node: ast.Node, message: str):
58
+ self.node = node
59
+ self.message = message
60
+ span = node.span
61
+ super().__init__(
62
+ f"{span.start.line}:{span.start.column}: codegen: {message}"
63
+ )
64
+
65
+
66
+ # ============================================================
67
+ # Builtin name → Python expression template
68
+ # ============================================================
69
+ #
70
+ # Each entry maps an Sutra builtin identifier to a callable that takes
71
+ # the already-translated argument strings and returns the Python
72
+ # expression to emit. Keeping this as a single table means the list of
73
+ # supported builtins is easy to audit against `planning/sutra-spec/21-builtins.md`.
74
+
75
+ def _builtin_basis_vector(args: List[str]) -> str:
76
+ return f"_VSA.embed({args[0]})"
77
+
78
+
79
+ def _builtin_permutation_key(args: List[str]) -> str:
80
+ return f"_VSA.make_sign_flip_key({args[0]})"
81
+
82
+
83
+ def _builtin_permute(args: List[str]) -> str:
84
+ return f"_VSA.sign_flip({args[0]}, {args[1]})"
85
+
86
+
87
+ def _builtin_bind(args: List[str]) -> str:
88
+ return f"_VSA.bind({args[0]}, {args[1]})"
89
+
90
+
91
+ def _builtin_unbind(args: List[str]) -> str:
92
+ return f"_VSA.unbind({args[0]}, {args[1]})"
93
+
94
+
95
+ def _builtin_bundle(args: List[str]) -> str:
96
+ return f"_VSA.bundle({', '.join(args)})"
97
+
98
+
99
+ def _builtin_zero_vector(args: List[str]) -> str:
100
+ # Zero vector in the runtime's d-dim substrate. Produced by the
101
+ # simplifier for `displacement(a, a)` and as an absorption element
102
+ # for bundle/addition. Not yet user-callable from .su, but the
103
+ # builtin path is ready for it.
104
+ return "_VSA.zero_vector()"
105
+
106
+
107
+ def _builtin_displacement(args: List[str]) -> str:
108
+ # displacement(a, b) = a - b (vector subtraction).
109
+ # Matches the cartography-paper primitive: a displacement is the
110
+ # rank-0 case of a learned role matrix. king - man + woman is
111
+ # expressed as bundle(displacement(king, man), woman).
112
+ return f"({args[0]} - {args[1]})"
113
+
114
+
115
+ def _builtin_similarity(args: List[str]) -> str:
116
+ return f"_VSA.similarity({args[0]}, {args[1]})"
117
+
118
+
119
+ def _and_chain(parts: List[str]) -> str:
120
+ """Fold `parts` into a left-associative `_VSA.logical_and` chain.
121
+ `[x, y, z]` -> `_VSA.logical_and(_VSA.logical_and(x, y), z)`."""
122
+ if len(parts) == 0:
123
+ return "_VSA.make_truth(1.0)"
124
+ if len(parts) == 1:
125
+ return parts[0]
126
+ expr = parts[0]
127
+ for p in parts[1:]:
128
+ expr = f"_VSA.logical_and({expr}, {p})"
129
+ return expr
130
+
131
+
132
+ def _builtin_equals(args: List[str]) -> str:
133
+ """N-ary equality: `Equals(a, b, c, ...)` -> all-pairwise-equal
134
+ along the chain. Lowers to fuzzy-AND of `_VSA.eq(a, b)` between
135
+ each adjacent pair. Produced by the parser's chained-comparison
136
+ reduction for `a == b == c == ...`."""
137
+ if len(args) < 2:
138
+ return "_VSA.make_truth(1.0)"
139
+ pairs = [f"_VSA.eq({args[i]}, {args[i + 1]})" for i in range(len(args) - 1)]
140
+ return _and_chain(pairs)
141
+
142
+
143
+ def _builtin_has_order(args: List[str]) -> str:
144
+ """Strict-ascending check: `hasOrder(a, b, c, ...)` ->
145
+ fuzzy-AND of `(b > a)` for each adjacent pair. The runtime's
146
+ `_VSA.gt` is intrinsic; `a < b` is just `b > a`. Produced by
147
+ the parser's chained-comparison reduction for `a < b < c < ...`
148
+ (or `a > b > c > ...` with args reversed so the reduction is
149
+ always-ascending)."""
150
+ if len(args) < 2:
151
+ return "_VSA.make_truth(1.0)"
152
+ pairs = [f"_VSA.gt({args[i + 1]}, {args[i]})" for i in range(len(args) - 1)]
153
+ return _and_chain(pairs)
154
+
155
+
156
+ def _builtin_has_order_or_equal(args: List[str]) -> str:
157
+ """Non-strict-ascending check: `hasOrderOrEqual(a, b, c, ...)`.
158
+ The current K3-tanh `<=` collapses to `<` (both produce
159
+ tanh(0)=0 on exact ties); when a real `le` semantics lands the
160
+ body switches. For now this is identical to `hasOrder`."""
161
+ return _builtin_has_order(args)
162
+
163
+
164
+
165
+
166
+ def _builtin_snap(args: List[str]) -> str:
167
+ return f"_VSA.snap({args[0]})"
168
+
169
+
170
+ def _builtin_identity_permutation(args: List[str]) -> str:
171
+ return "_np.ones(_VSA.dim)"
172
+
173
+
174
+ def _builtin_argmax_cosine(args: List[str]) -> str:
175
+ return f"_argmax_cosine({args[0]}, {args[1]})"
176
+
177
+
178
+ def _builtin_select(args: List[str]) -> str:
179
+ # Spec: planning/sutra-spec/26-select-and-gate.md.
180
+ # `select(scores, options)` is softmax-weighted superposition — the
181
+ # named conditional-branching primitive. No defuzz; the result is a
182
+ # vector usable as the input to further operations.
183
+ return f"_select_softmax({args[0]}, {args[1]})"
184
+
185
+
186
+ def _builtin_compose(args: List[str]) -> str:
187
+ # `compose` over the permutation primitive class is elementwise
188
+ # multiplication of the two underlying ±1 mask vectors.
189
+ return f"({args[0]} * {args[1]})"
190
+
191
+
192
+ def _builtin_make_rotation(args: List[str]) -> str:
193
+ # make_rotation(angle, n_planes) → orthogonal matrix
194
+ if len(args) == 1:
195
+ return f"_VSA.make_random_rotation(angle={args[0]})"
196
+ return f"_VSA.make_random_rotation(angle={args[0]}, n_planes={args[1]})"
197
+
198
+
199
+ def _builtin_compile_prototypes(args: List[str]) -> str:
200
+ return f"_VSA.compile_prototypes({args[0]})"
201
+
202
+
203
+ def _builtin_geometric_loop(args: List[str]) -> str:
204
+ # geometric_loop(initial_state, rotation, compiled_prototypes)
205
+ # Optional 4th arg: target_name
206
+ if len(args) >= 4:
207
+ return (f"_VSA.loop({args[0]}, {args[1]}, {args[2]}, "
208
+ f"target_name={args[3]})")
209
+ return f"_VSA.loop({args[0]}, {args[1]}, {args[2]})"
210
+
211
+
212
+ def _builtin_real_number(args: List[str]) -> str:
213
+ # Canonical-axis constructor: a scalar real number as an extended-
214
+ # state vector with x at synthetic[0], zeros elsewhere. Part of the
215
+ # int/float/complex shared-axis allocation — see project memory
216
+ # project_sutra_complex_numbers_first_class.md.
217
+ return f"_VSA.make_real({args[0]})"
218
+
219
+
220
+ def _builtin_complex_number(args: List[str]) -> str:
221
+ # Canonical-axis constructor: a complex number with re at
222
+ # synthetic[0] and im at synthetic[1]. Sutra's first-class complex.
223
+ return f"_VSA.make_complex({args[0]}, {args[1]})"
224
+
225
+
226
+ def _builtin_truth_value(args: List[str]) -> str:
227
+ # Canonical-axis constructor: a scalar truth value at synthetic[2].
228
+ # Higher = more true; 0 = neither; negative = false-leaning. The
229
+ # axis is orthogonal to real/imag by construction.
230
+ return f"_VSA.make_truth({args[0]})"
231
+
232
+
233
+ BUILTINS = {
234
+ "basis_vector": (_builtin_basis_vector, 1),
235
+ "permutation_key": (_builtin_permutation_key, 1),
236
+ "identity_permutation": (_builtin_identity_permutation, 0),
237
+ "permute": (_builtin_permute, 2),
238
+ "bind": (_builtin_bind, 2),
239
+ "unbind": (_builtin_unbind, 2),
240
+ "bundle": (_builtin_bundle, None), # variadic, at least 1
241
+ "zero_vector": (_builtin_zero_vector, 0),
242
+ "displacement": (_builtin_displacement, 2), # a - b (vector subtract)
243
+ "similarity": (_builtin_similarity, 2),
244
+ "Equals": (_builtin_equals, None),
245
+ "hasOrder": (_builtin_has_order, None),
246
+ "hasOrderOrEqual": (_builtin_has_order_or_equal, None),
247
+ "snap": (_builtin_snap, 1),
248
+ "argmax_cosine": (_builtin_argmax_cosine, 2),
249
+ "select": (_builtin_select, 2),
250
+ "compose": (_builtin_compose, 2),
251
+ "make_rotation": (_builtin_make_rotation, None), # 1-2 args
252
+ "compile_prototypes": (_builtin_compile_prototypes, 1),
253
+ "geometric_loop": (_builtin_geometric_loop, None), # 3-4 args
254
+ # Canonical-axis constructors. Lower to _VSA.make_real / make_complex /
255
+ # make_truth — runtime methods provided by the _VSA runtime class.
256
+ # A backend that doesn't implement them will fail at runtime with a
257
+ # clear AttributeError.
258
+ "real_number": (_builtin_real_number, 1),
259
+ "complex_number": (_builtin_complex_number, 2),
260
+ "truth_value": (_builtin_truth_value, 1),
261
+ }
262
+
263
+
264
+ # ============================================================
265
+ # Translator
266
+ # ============================================================
267
+
268
+
269
+ class BaseCodegen:
270
+ """Stateful walker that emits Python source for one Sutra module.
271
+
272
+ Instances are single-use — call `translate(module)` and then read
273
+ `.output`. Not thread-safe, not reusable.
274
+ """
275
+
276
+ def __init__(self, *, runtime_dim: int = 50, runtime_seed: int = 42,
277
+ runtime_n_kc: int = 2000,
278
+ runtime_use_hemibrain: bool = False,
279
+ loop_max_iterations: int = 50) -> None:
280
+ self.runtime_dim = runtime_dim
281
+ self.runtime_seed = runtime_seed
282
+ self.runtime_n_kc = runtime_n_kc
283
+ self.runtime_use_hemibrain = runtime_use_hemibrain
284
+ # Compile-time loop unroll depth. Defaults to 50 but is
285
+ # configurable via the CLI's --loop-T flag and via
286
+ # [project.compile] loop_max_iterations in atman.toml. Larger
287
+ # values cost a longer emitted tensor-op graph but no runtime
288
+ # overhead beyond unroll length, since the soft-halt cell
289
+ # freezes state once halt-cum saturates.
290
+ self._LOOP_T = loop_max_iterations
291
+ self._lines: List[str] = []
292
+ self._indent = 0
293
+ # Maps variable names to the *key* type of a map-typed declaration
294
+ # so subscript expressions know whether to use the identity-based
295
+ # vector-map helper or a plain dict lookup.
296
+ self._map_key_type: dict[str, str] = {}
297
+ # Set of variable names declared with type `dict<K, V>`. A dict
298
+ # in Sutra is a rotation-hashmap — subscript access (d[k])
299
+ # dispatches to _VSA.hashmap_get, assignment (d[k] = v)
300
+ # dispatches to _VSA.hashmap_set (functional update).
301
+ self._dict_declared: set[str] = set()
302
+ # Set of variable names declared with type `Axon`. An axon's
303
+ # instance methods route specially: `a.add(k, v)` (statement)
304
+ # rebinds `a` to `_VSA.axon_add(a, k, v)`; `a.item(k)`
305
+ # (expression) emits `_VSA.axon_item(a, k)`. See
306
+ # planning/sutra-spec/axons.md.
307
+ self._axon_declared: set[str] = set()
308
+ # For each axon-typed local in the current function scope,
309
+ # the set of literal-string keys whose `.add(K, V);` statements
310
+ # are elidable (never read via `.item(K)` and the axon doesn't
311
+ # escape the function). Populated by `_compute_axon_elision`
312
+ # at function entry; consumed by `_translate_stmt` on
313
+ # `obj.add(K, V);` to skip emission. The spec calls this
314
+ # "the compiler treats `a.item(k) = v` as SSA-rename when no
315
+ # boundary crossing forces materialization" — see
316
+ # planning/sutra-spec/axons.md §"The mutating-looking syntax
317
+ # is sugar; the compiler usually elides the axon entirely."
318
+ self._axon_elide_keys: dict[str, set[str]] = {}
319
+ # Maps (class_name, method_name) -> return type name, for class
320
+ # methods declared in user code or in the stdlib. Used by the
321
+ # general void-method-as-augmented-assignment dispatch:
322
+ # `obj.m(args);` (statement) where m returns void emits
323
+ # `obj = Class_m(obj, args)`. Populated alongside the
324
+ # _class_static_methods / _class_instance_methods registers.
325
+ self._class_method_return_types: dict[tuple[str, str], str] = {}
326
+ # Maps variable names to their declared primitive-class type
327
+ # string (`"complex"`, `"int"`, `"fuzzy"`, ...). Used by
328
+ # `*` dispatch: if either operand is known to be a complex,
329
+ # the BinaryOp lowers to _VSA.complex_mul instead of Python
330
+ # element-wise multiply. Populated in _translate_var_decl.
331
+ self._var_type: dict[str, str] = {}
332
+ # Per-function-scope slot-table: maps slot-declared variable
333
+ # name -> slot index. Populated when a `slot TYPE name = expr;`
334
+ # is translated; reset at function entry. Each slot variable
335
+ # gets a unique 2D Givens plane in the function-scope
336
+ # `_slot_state` vector. Used by the Identifier emit path
337
+ # (slot var -> _VSA.slot_load) and the Assignment emit path
338
+ # (target is slot var -> _VSA.slot_store + reassign).
339
+ self._slot_vars: dict[str, int] = {}
340
+ # When unrolling a `loop (N) { ... }` with N a compile-time
341
+ # integer literal, this is set to the current iteration's value
342
+ # (1-based: 1, 2, ..., N) before each copy of the body is
343
+ # translated. The Identifier translation path checks this when
344
+ # it sees the name `iterator` and substitutes the constant.
345
+ # Outside an unrolling context this stays None, and a reference
346
+ # to `iterator` raises CodegenNotSupported.
347
+ self._iterator_value: Optional[int] = None
348
+ # Set to True while translating an `iterative_loop` function
349
+ # body, so the `iterator` keyword translates to the runtime
350
+ # Python local `_iterator` rather than a compile-time constant.
351
+ # Restored on exit.
352
+ self._iterator_runtime_in_scope: bool = False
353
+ # Set to True while translating a `foreach_loop` function body,
354
+ # so the `element` keyword translates to the runtime Python local
355
+ # `_element` (the current array element this tick).
356
+ self._element_runtime_in_scope: bool = False
357
+ self._loop_state_stack: List[tuple[str, List[str]]] = []
358
+ # Registry of loop function declarations seen so far in the
359
+ # module, name -> LoopFunctionDecl. Used by LoopCallStmt
360
+ # translation to look up the state-param shape for the call's
361
+ # writeback. Populated in _translate_loop_function_decl.
362
+ self._loop_decls: dict[str, "ast.LoopFunctionDecl"] = {}
363
+ # Current function's return type name (e.g. "vector", "string").
364
+ # Set in _translate_function_decl / _translate_loop_function_decl.
365
+ # Halt-propagation (`return value * _program_halt`) only applies
366
+ # when the return is a vector — strings/ints/bools cannot be
367
+ # multiplied by a float halt accumulator.
368
+ self._current_return_type: str | None = None
369
+ # Static methods declared inside class bodies. Maps
370
+ # class_name -> set of static method names. Populated by
371
+ # _translate_top_level when seeing a ClassDecl. Used by
372
+ # _translate_call to dispatch `Math.foo(x)` to the mangled
373
+ # top-level function `Math_foo(x)` that the codegen emits.
374
+ # Non-static class methods are tracked separately and rejected
375
+ # at call time today (instance dispatch isn't wired yet).
376
+ self._class_static_methods: dict[str, set[str]] = {}
377
+ # Intrinsic static methods declared inside class bodies. These
378
+ # have no Sutra body — the runtime class implements them. A
379
+ # call `Math.log(x)` for an intrinsic dispatches directly to
380
+ # `_VSA.log(x)` without going through a mangled wrapper.
381
+ self._class_intrinsic_methods: dict[str, set[str]] = {}
382
+ # Non-static (instance) methods declared inside class bodies.
383
+ # Same shape as _class_static_methods. Calls of the form
384
+ # `this.method(args)` from inside another method on the same
385
+ # class dispatch to `{Class}_{method}(this, *args)`. Top-level
386
+ # `Class.method(instance, args)` also works the same way (the
387
+ # instance is the explicit first arg). True instance-syntax
388
+ # dispatch (`g.method(args)` for a typed variable `g`) needs
389
+ # variable type tracking which isn't wired today.
390
+ self._class_instance_methods: dict[str, set[str]] = {}
391
+ # Name of the class whose method body is currently being
392
+ # emitted. Used by `this.method(args)` dispatch to know which
393
+ # class to mangle with. None when not inside a class method.
394
+ self._current_class_name: Optional[str] = None
395
+
396
+ # -- emission helpers -------------------------------------------------
397
+
398
+ def _emit(self, line: str = "") -> None:
399
+ if line:
400
+ self._lines.append(" " * self._indent + line)
401
+ else:
402
+ self._lines.append("")
403
+
404
+ @property
405
+ def output(self) -> str:
406
+ return "\n".join(self._lines) + "\n"
407
+
408
+ def _emit_select_helper(self) -> None:
409
+ """Emit `_select_softmax(scores, options)` — the runtime for the
410
+ spec-level `select` primitive (planning/sutra-spec/26-select-and-gate.md).
411
+ Softmax weights, weighted sum of option vectors, no defuzz."""
412
+ self._emit("def _select_softmax(scores, options):")
413
+ self._indent += 1
414
+ self._emit('"""Softmax-weighted superposition of option vectors."""')
415
+ self._emit("s = _np.asarray(scores, dtype=float)")
416
+ self._emit("s = s - _np.max(s)")
417
+ self._emit("w = _np.exp(s)")
418
+ self._emit("w = w / _np.sum(w)")
419
+ self._emit("opts = _np.asarray(options, dtype=float)")
420
+ self._emit("return (w[:, None] * opts).sum(axis=0)")
421
+ self._indent -= 1
422
+
423
+ # -- public entry point -----------------------------------------------
424
+
425
+ def translate(self, module: ast.Module) -> str:
426
+ self._emit_prelude()
427
+ self._emit()
428
+ # Pre-pass A: pull in stdlib class intrinsics (e.g.
429
+ # `Tensor.MatrixMul`, `Tensor.matmul`, etc.) so namespaced
430
+ # stdlib calls dispatch to `_VSA.<name>` even though the
431
+ # stdlib class isn't declared in the user's module AST.
432
+ try:
433
+ from .stdlib_loader import stdlib_class_intrinsic_methods
434
+ for cls_name, method_names in stdlib_class_intrinsic_methods().items():
435
+ self._class_static_methods.setdefault(
436
+ cls_name, set()
437
+ ).update(method_names)
438
+ self._class_intrinsic_methods.setdefault(
439
+ cls_name, set()
440
+ ).update(method_names)
441
+ except Exception:
442
+ # If stdlib loading fails for any reason, fall back to
443
+ # user-class-only dispatch. Stdlib failures show up
444
+ # elsewhere with clearer diagnostics.
445
+ pass
446
+ # Pre-pass B: register every class's method names so call sites
447
+ # can dispatch even when the class declaration comes after the
448
+ # calling function in the file. Static methods land in
449
+ # _class_static_methods (intrinsic ones also in
450
+ # _class_intrinsic_methods so the call site goes to _VSA.<name>
451
+ # directly). Non-static methods land in _class_instance_methods
452
+ # so `this.method(args)` from inside another method on the same
453
+ # class can dispatch.
454
+ for item in module.items:
455
+ if isinstance(item, ast.ClassDecl):
456
+ for m in item.methods:
457
+ if m.is_operator or m.type_params:
458
+ continue
459
+ # Track return type for the augmented-assignment
460
+ # rule: void-returning instance methods called as
461
+ # statements rebind their receiver.
462
+ if m.return_type is not None:
463
+ self._class_method_return_types[(item.name, m.name)] = (
464
+ m.return_type.name
465
+ )
466
+ if m.modifiers.is_static:
467
+ self._class_static_methods.setdefault(
468
+ item.name, set()
469
+ ).add(m.name)
470
+ if m.is_intrinsic:
471
+ self._class_intrinsic_methods.setdefault(
472
+ item.name, set()
473
+ ).add(m.name)
474
+ else:
475
+ self._class_instance_methods.setdefault(
476
+ item.name, set()
477
+ ).add(m.name)
478
+ for item in module.items:
479
+ self._translate_top_level(item)
480
+ self._emit()
481
+ return self.output
482
+
483
+ # -- prelude ----------------------------------------------------------
484
+
485
+ def _emit_prelude(self) -> None:
486
+ """Emit the top-of-module prelude for this backend.
487
+
488
+ Each concrete backend (CPU IR, PyTorch) is responsible for
489
+ importing its runtime, instantiating the _VSA class, and
490
+ emitting any helper functions the translator references
491
+ (`_argmax_cosine`, `_select_softmax`, `_vector_map_lookup`, ...).
492
+ Called from `translate(module)` before the top-level walk.
493
+ """
494
+ raise NotImplementedError(
495
+ "_emit_prelude must be implemented by a concrete backend subclass"
496
+ )
497
+
498
+ # -- top level --------------------------------------------------------
499
+
500
+ def _translate_top_level(self, item: ast.TopLevel) -> None:
501
+ if isinstance(item, ast.VarDecl):
502
+ self._translate_var_decl(item, at_top_level=True)
503
+ elif isinstance(item, ast.FunctionDecl):
504
+ self._translate_function_decl(item)
505
+ elif isinstance(item, ast.LoopFunctionDecl):
506
+ self._translate_loop_function_decl(item)
507
+ elif isinstance(item, ast.MethodDecl):
508
+ raise CodegenNotSupported(
509
+ item, "method declarations are not supported by the V1 codegen"
510
+ )
511
+ elif isinstance(item, ast.ClassDecl):
512
+ for method in item.methods:
513
+ self._translate_class_method(item.name, method)
514
+ for lf in item.loop_functions:
515
+ self._translate_loop_function_decl(lf, class_name=item.name)
516
+ else:
517
+ # Statements at top level (ExprStmt, etc.) — lower as a stmt.
518
+ if isinstance(item, ast.Stmt):
519
+ self._translate_stmt(item)
520
+ else:
521
+ raise CodegenNotSupported(
522
+ item, f"unsupported top-level item: {type(item).__name__}"
523
+ )
524
+
525
+ # -- declarations -----------------------------------------------------
526
+
527
+ def _fuzzy_literal_init_src(self, decl: ast.VarDecl) -> str | None:
528
+ """Hook: emit a fuzzy-typed var decl whose initializer is a literal.
529
+
530
+ Per 2026-04-23 design, `fuzzy x = 0.7;` is conceptually
531
+ `fuzzy x = true * 0.7;` — a truth-axis vector scaled by 0.7.
532
+ The scalar-times-true folds at compile time to a single
533
+ vector allocation on the truth axis. Backends that have a
534
+ truth-axis runtime override this to emit `_VSA.make_truth(v)`.
535
+
536
+ Returns the full assignment RHS string (e.g.
537
+ `"_VSA.make_truth(0.7)"`) if the rewrite applies, or None to
538
+ fall through to the default codegen path. Base returns None.
539
+ """
540
+ return None
541
+
542
+ def _translate_var_decl(self, decl: ast.VarDecl, *, at_top_level: bool) -> None:
543
+ if decl.is_slot:
544
+ if at_top_level:
545
+ raise CodegenNotSupported(
546
+ decl,
547
+ "slot declarations are only valid at function scope; "
548
+ "top-level slot vars don't have a state vector to "
549
+ "thread through.",
550
+ )
551
+ slot_idx = len(self._slot_vars)
552
+ self._slot_vars[decl.name] = slot_idx
553
+ init_src = (
554
+ self._translate_expr(decl.initializer)
555
+ if decl.initializer is not None
556
+ else "0.0"
557
+ )
558
+ self._emit(
559
+ f"_slot_state = _VSA.slot_store(_slot_state, {slot_idx}, "
560
+ f"{init_src})"
561
+ )
562
+ return
563
+
564
+ # Track map<K, V> declarations so that a later subscript on this
565
+ # name can dispatch to the right lookup helper.
566
+ if decl.type_ref is not None and decl.type_ref.name == "map":
567
+ if len(decl.type_ref.type_args) >= 1:
568
+ self._map_key_type[decl.name] = decl.type_ref.type_args[0].name
569
+ # Record the declared type so binary-op dispatch can reason
570
+ # about the value's primitive class later. Needed for `*`
571
+ # to route complex multiplication through _VSA.complex_mul
572
+ # instead of Python element-wise multiply.
573
+ if decl.type_ref is not None:
574
+ self._var_type[decl.name] = decl.type_ref.name
575
+ # Track dict<K, V> declarations so that d[k] / d[k] = v
576
+ # dispatch to the rotation-hashmap runtime.
577
+ if decl.type_ref is not None and decl.type_ref.name == "dict":
578
+ self._dict_declared.add(decl.name)
579
+ # Uninitialized `dict<K, V> d;` emits `d = _VSA.hashmap_new()`.
580
+ # Initialized form falls through to the initializer translation.
581
+ if decl.initializer is None:
582
+ self._emit(f"{decl.name} = _VSA.hashmap_new()")
583
+ return
584
+ # Track Axon declarations so that a.add(...) / a.item(...) on
585
+ # the typed local route to the runtime axon methods.
586
+ if decl.type_ref is not None and decl.type_ref.name == "Axon":
587
+ self._axon_declared.add(decl.name)
588
+ if decl.initializer is None:
589
+ self._emit(f"{decl.name} = _VSA.axon_new()")
590
+ return
591
+
592
+ fuzzy_src = self._fuzzy_literal_init_src(decl)
593
+ if fuzzy_src is not None:
594
+ self._emit(f"{decl.name} = {fuzzy_src}")
595
+ return
596
+
597
+ # `int x = wait;` — explicit deferred initializer. The
598
+ # validator enforces that a real assignment happens before
599
+ # any read of `x`, so the value emitted here is a placeholder.
600
+ # We reuse the same zero-of-type emission used for the
601
+ # uninitialized var-colon form: same lowering, different
602
+ # ergonomics (the `wait` keyword is the explicit signal in
603
+ # source). Both backends inherit this path; only the validator
604
+ # treats `wait` differently from "no initializer."
605
+ is_wait_init = isinstance(decl.initializer, ast.WaitLiteral)
606
+
607
+ if (decl.initializer is None and decl.is_var_colon) or is_wait_init:
608
+ type_name = decl.type_ref.name if decl.type_ref is not None else "vector"
609
+ # Vector types get a zero d-dim array per slot.
610
+ if type_name == "vector":
611
+ if decl.array_size is not None:
612
+ self._emit(
613
+ f"{decl.name} = [_np.zeros(_VSA.dim) "
614
+ f"for _ in range({decl.array_size})]"
615
+ )
616
+ else:
617
+ self._emit(f"{decl.name} = _np.zeros(_VSA.dim)")
618
+ return
619
+ # Fuzzy / bool / trit / complex are (per spec target) scalars
620
+ # on canonical axes. `trit` defaults to 0 — "explicit
621
+ # neutrality," the first-class neutral on the truth axis.
622
+ # `complex` defaults to 0+0i — the origin of the plane.
623
+ # Until the full runtime lands for these in every backend,
624
+ # use a plain float zero as the placeholder; the numpy /
625
+ # pytorch backends' make_truth / make_complex paths are
626
+ # used by initialized declarations.
627
+ if type_name in ("fuzzy", "bool", "int", "scalar", "number",
628
+ "trit", "complex"):
629
+ if decl.array_size is not None:
630
+ self._emit(f"{decl.name} = [0.0] * {decl.array_size}")
631
+ else:
632
+ self._emit(f"{decl.name} = 0.0")
633
+ return
634
+ # Unknown colon-typed slot — fall through to the uninitialized
635
+ # error below with a clearer message.
636
+
637
+ if decl.initializer is None:
638
+ raise CodegenNotSupported(
639
+ decl,
640
+ f"uninitialized declaration `{decl.name}` is only supported "
641
+ f"for `var x : TYPE;` with TYPE in (vector, fuzzy, bool, "
642
+ f"int, scalar). Add an initializer or use a supported type."
643
+ )
644
+ init_src = self._translate_expr(decl.initializer, map_key_type=(
645
+ decl.type_ref.type_args[0].name
646
+ if decl.type_ref is not None
647
+ and decl.type_ref.name == "map"
648
+ and len(decl.type_ref.type_args) >= 1
649
+ else None
650
+ ))
651
+ # `role x = expr;` for now emits identical code to `vector x = expr;`.
652
+ # When learned-matrix binding lands (STATUS "Deferred"), the is_role
653
+ # flag will switch this branch to emit the matrix-fit path instead.
654
+ # `var[N] x = expr;` with an initializer would need a
655
+ # broadcast-or-replicate semantics that is not yet specified;
656
+ # reject for now so the spec work lands before the codegen does.
657
+ if decl.array_size is not None and decl.initializer is not None:
658
+ raise CodegenNotSupported(
659
+ decl,
660
+ f"`var[{decl.array_size}] {decl.name} = ...;` initialized "
661
+ "array declarations are not yet specified. Use "
662
+ f"`var[{decl.array_size}] {decl.name} : TYPE;` for a "
663
+ "zero-initialized slot array."
664
+ )
665
+ self._emit(f"{decl.name} = {init_src}")
666
+
667
+ def _translate_class_method(self, class_name: str, decl: ast.MethodDecl) -> None:
668
+ """Emit a class-body method as a mangled top-level Python function.
669
+
670
+ Static methods inside `class Math { static method scalar twice(x) {...} }`
671
+ emit as `def Math_twice(x): ...` at module level; call sites of
672
+ the form `Math.twice(5)` are routed to that mangled name in
673
+ `_translate_call`. Non-static methods aren't emitted today —
674
+ instance dispatch (`g.Hello()` for an instance `g`) isn't wired,
675
+ and the safer move is to fail loudly when one is declared so the
676
+ gap is visible.
677
+
678
+ Intrinsic methods (declared `static intrinsic method ...;`) have
679
+ no Sutra body — the runtime class implements them — so this
680
+ method emits nothing for them. The pre-pass in `translate()`
681
+ registers them in `_class_intrinsic_methods` so the call-site
682
+ dispatch routes `Math.log(x)` to `_VSA.log(x)`.
683
+ """
684
+ if decl.is_operator:
685
+ raise CodegenNotSupported(
686
+ decl,
687
+ "operator method declarations are not supported by the V1 codegen",
688
+ )
689
+ if decl.type_params:
690
+ raise CodegenNotSupported(
691
+ decl,
692
+ "generic method declarations are not supported by the V1 codegen",
693
+ )
694
+ is_static = decl.modifiers.is_static
695
+ # Register in the lookup tables the call-dispatch path consults.
696
+ if is_static:
697
+ self._class_static_methods.setdefault(class_name, set()).add(decl.name)
698
+ if decl.is_intrinsic:
699
+ # Signature-only declaration; runtime class implements
700
+ # the body. Emit nothing — the pre-pass already
701
+ # registered the method in `_class_intrinsic_methods`
702
+ # for call-site dispatch.
703
+ self._class_intrinsic_methods.setdefault(
704
+ class_name, set()
705
+ ).add(decl.name)
706
+ return
707
+ else:
708
+ if decl.is_intrinsic:
709
+ raise CodegenNotSupported(
710
+ decl,
711
+ f"non-static intrinsic methods are not supported — "
712
+ f"intrinsics live on the runtime class which has no "
713
+ f"per-instance state. Mark `{class_name}.{decl.name}` "
714
+ f"as `static intrinsic method` instead.",
715
+ )
716
+ self._class_instance_methods.setdefault(class_name, set()).add(decl.name)
717
+
718
+ # Non-static methods get `this` as an implicit first parameter.
719
+ # Static methods don't.
720
+ param_names = [p.name for p in decl.params]
721
+ if not is_static:
722
+ param_names = ["this", *param_names]
723
+ mangled = f"{class_name}_{decl.name}"
724
+ self._emit(f"def {mangled}({', '.join(param_names)}):")
725
+ self._indent += 1
726
+ outer_slot_vars = self._slot_vars
727
+ self._slot_vars = {}
728
+ outer_return_type = self._current_return_type
729
+ self._current_return_type = (
730
+ decl.return_type.name if decl.return_type else None
731
+ )
732
+ outer_class_name = self._current_class_name
733
+ self._current_class_name = class_name
734
+ self._emit("_program_halt = 1.0")
735
+ if _has_slot_decl(decl.body):
736
+ self._emit("_slot_state = _VSA.zero_vector()")
737
+ if not decl.body.statements:
738
+ self._emit("pass")
739
+ else:
740
+ for stmt in decl.body.statements:
741
+ self._translate_stmt(stmt)
742
+ self._slot_vars = outer_slot_vars
743
+ self._current_return_type = outer_return_type
744
+ self._current_class_name = outer_class_name
745
+ self._indent -= 1
746
+
747
+ def _translate_function_decl(self, decl: ast.FunctionDecl) -> None:
748
+ if decl.is_operator:
749
+ raise CodegenNotSupported(
750
+ decl, "operator declarations are not supported by the V1 codegen"
751
+ )
752
+ if decl.type_params:
753
+ raise CodegenNotSupported(
754
+ decl, "generic function declarations are not supported by the V1 codegen"
755
+ )
756
+ param_names = [p.name for p in decl.params]
757
+ self._emit(f"def {decl.name}({', '.join(param_names)}):")
758
+ self._indent += 1
759
+ # Register parameter types so instance-method dispatch
760
+ # (Axon-typed params) and the general typed-receiver path
761
+ # find them. Without this, `function int f(Axon a) {
762
+ # return a.item("k"); }` would not route `a.item(...)` to
763
+ # the runtime axon_item method.
764
+ for p in decl.params:
765
+ if p.type_ref is not None:
766
+ self._var_type[p.name] = p.type_ref.name
767
+ if p.type_ref.name == "Axon":
768
+ self._axon_declared.add(p.name)
769
+ # Reset the slot table for this function scope. If the body
770
+ # has any slot declarations we'll need a `_slot_state` local,
771
+ # initialized to a zero vector before the first slot_store.
772
+ outer_slot_vars = self._slot_vars
773
+ self._slot_vars = {}
774
+ outer_return_type = self._current_return_type
775
+ self._current_return_type = decl.return_type.name if decl.return_type else None
776
+ outer_axon_elide = self._axon_elide_keys
777
+ self._axon_elide_keys = self._compute_axon_elision(decl)
778
+ self._emit("_program_halt = 1.0")
779
+ if _has_slot_decl(decl.body):
780
+ self._emit("_slot_state = _VSA.zero_vector()")
781
+ if not decl.body.statements:
782
+ self._emit("pass")
783
+ else:
784
+ for stmt in decl.body.statements:
785
+ self._translate_stmt(stmt)
786
+ self._slot_vars = outer_slot_vars
787
+ self._current_return_type = outer_return_type
788
+ self._axon_elide_keys = outer_axon_elide
789
+ self._indent -= 1
790
+
791
+ def _compute_axon_elision(
792
+ self, decl: ast.FunctionDecl
793
+ ) -> dict[str, set[str]]:
794
+ """Pre-pass over a function body to find axon-typed locals
795
+ whose writes can be elided.
796
+
797
+ Within a single function body, an `a.add("k", v);` statement
798
+ on an axon-typed local is dead if the literal key `"k"` is
799
+ never read via `a.item("k")` AND the axon `a` doesn't escape
800
+ (return, pass to another function, etc.).
801
+
802
+ Returns: dict mapping each axon-typed local name to the set of
803
+ string-literal keys that are dead in that function. The
804
+ translator skips emission when an `add` call's key is in the
805
+ elide set.
806
+
807
+ Conservative: any escape causes ALL keys to stay materialized
808
+ for that local. Any read with a non-literal key (e.g. a
809
+ runtime-computed key) keeps everything materialized too.
810
+ """
811
+ # Find axon-typed parameter names + Axon-typed locals declared
812
+ # in the function body.
813
+ axon_locals: set[str] = set()
814
+ for p in decl.params:
815
+ if p.type_ref is not None and p.type_ref.name == "Axon":
816
+ axon_locals.add(p.name)
817
+ # First scan: find all `Axon` declarations + collect read/write
818
+ # info per axon var. Initialize every axon as conservative
819
+ # (not yet known to escape, no reads, no writes).
820
+ reads: dict[str, set[str]] = {}
821
+ writes: dict[str, set[str]] = {}
822
+ escaped: set[str] = set()
823
+ any_dynamic_read: set[str] = set()
824
+
825
+ def collect_decls(node):
826
+ if isinstance(node, ast.VarDecl):
827
+ if (node.type_ref is not None
828
+ and node.type_ref.name == "Axon"):
829
+ axon_locals.add(node.name)
830
+ # Walk all attribute children for nested statements.
831
+ for attr_name in dir(node):
832
+ if attr_name.startswith("_"):
833
+ continue
834
+ try:
835
+ val = getattr(node, attr_name)
836
+ except Exception:
837
+ continue
838
+ if isinstance(val, ast.Node):
839
+ collect_decls(val)
840
+ elif isinstance(val, list):
841
+ for v in val:
842
+ if isinstance(v, ast.Node):
843
+ collect_decls(v)
844
+
845
+ for stmt in decl.body.statements:
846
+ collect_decls(stmt)
847
+
848
+ for v in axon_locals:
849
+ reads[v] = set()
850
+ writes[v] = set()
851
+
852
+ def visit_expr(node, position: str) -> None:
853
+ """position is one of:
854
+ 'value' — node's evaluated value flows into something
855
+ (an arg, a return, an assignment RHS, etc.)
856
+ 'recv' — node is the receiver of a member access
857
+ 'lhs' — node is the LHS of an assignment.
858
+ Identifiers in `value` position that name an axon local
859
+ cause that axon to be marked as escaped.
860
+ """
861
+ if node is None:
862
+ return
863
+ if isinstance(node, ast.Identifier):
864
+ if node.name in axon_locals and position == "value":
865
+ escaped.add(node.name)
866
+ return
867
+ if isinstance(node, ast.MemberAccess):
868
+ # `obj.member` — obj is the receiver. The member name
869
+ # itself is just an identifier name, not an Identifier
870
+ # node here (it's a string field on MemberAccess).
871
+ visit_expr(node.obj, "recv")
872
+ return
873
+ if isinstance(node, ast.Call):
874
+ callee = node.callee
875
+ # `a.add(K, V)` and `a.item(K)` are special-cased — they
876
+ # are NOT escapes for `a`. `a` is the receiver; other args
877
+ # are values.
878
+ axon_method_call = (
879
+ isinstance(callee, ast.MemberAccess)
880
+ and isinstance(callee.obj, ast.Identifier)
881
+ and callee.obj.name in axon_locals
882
+ and callee.member in ("add", "item")
883
+ )
884
+ if axon_method_call:
885
+ var = callee.obj.name
886
+ member = callee.member
887
+ # Receiver doesn't escape.
888
+ visit_expr(callee.obj, "recv")
889
+ # Args are values — they DO contribute to escape if
890
+ # an axon flows through them.
891
+ if member == "add":
892
+ # Args: (key, value). Track the literal key.
893
+ if (len(node.args) >= 1
894
+ and isinstance(node.args[0], ast.StringLiteral)):
895
+ writes[var].add(node.args[0].value)
896
+ for arg in node.args:
897
+ visit_expr(arg, "value")
898
+ else: # item
899
+ if (len(node.args) >= 1
900
+ and isinstance(node.args[0], ast.StringLiteral)):
901
+ reads[var].add(node.args[0].value)
902
+ else:
903
+ # Non-literal key: all writes are needed.
904
+ any_dynamic_read.add(var)
905
+ for arg in node.args:
906
+ visit_expr(arg, "value")
907
+ return
908
+ # Generic call: callee in 'value' position (or recv if
909
+ # MemberAccess, but a non-axon-method MemberAccess
910
+ # receiver flows on too).
911
+ visit_expr(callee, "value")
912
+ for arg in node.args:
913
+ visit_expr(arg, "value")
914
+ return
915
+ if isinstance(node, ast.Assignment):
916
+ # LHS in 'lhs' position; RHS in 'value' position.
917
+ visit_expr(node.target, "lhs")
918
+ visit_expr(node.value, "value")
919
+ return
920
+ # Fallback: visit any sub-expression in value position. We
921
+ # only care about catching axon-named Identifiers in
922
+ # places where they'd escape, so this is safe.
923
+ for attr_name in dir(node):
924
+ if attr_name.startswith("_"):
925
+ continue
926
+ try:
927
+ val = getattr(node, attr_name)
928
+ except Exception:
929
+ continue
930
+ if isinstance(val, ast.Node):
931
+ visit_expr(val, "value")
932
+ elif isinstance(val, list):
933
+ for v in val:
934
+ if isinstance(v, ast.Node):
935
+ visit_expr(v, "value")
936
+
937
+ def visit_stmt(stmt) -> None:
938
+ if isinstance(stmt, ast.VarDecl):
939
+ # `Axon a = expr;` — the LHS is the declared name; the
940
+ # initializer is in value position.
941
+ if stmt.initializer is not None:
942
+ visit_expr(stmt.initializer, "value")
943
+ return
944
+ if isinstance(stmt, ast.ReturnStmt):
945
+ # Returning an axon counts as escape.
946
+ visit_expr(stmt.value, "value")
947
+ return
948
+ if isinstance(stmt, ast.ExprStmt):
949
+ expr = stmt.expr
950
+ # `a = expr;` — LHS is in lhs position, RHS in value.
951
+ if isinstance(expr, ast.Assignment):
952
+ visit_expr(expr.target, "lhs")
953
+ visit_expr(expr.value, "value")
954
+ return
955
+ # `a.add(...);` / `a.item(...);` — handled in the
956
+ # generic Call path above (axon receiver is OK).
957
+ visit_expr(expr, "value")
958
+ return
959
+ # All other statement kinds: walk inner expressions and
960
+ # nested statements.
961
+ for attr_name in dir(stmt):
962
+ if attr_name.startswith("_"):
963
+ continue
964
+ try:
965
+ val = getattr(stmt, attr_name)
966
+ except Exception:
967
+ continue
968
+ if isinstance(val, ast.Node):
969
+ if isinstance(val, ast.Stmt):
970
+ visit_stmt(val)
971
+ else:
972
+ visit_expr(val, "value")
973
+ elif isinstance(val, list):
974
+ for v in val:
975
+ if isinstance(v, ast.Node):
976
+ if isinstance(v, ast.Stmt):
977
+ visit_stmt(v)
978
+ else:
979
+ visit_expr(v, "value")
980
+
981
+ for stmt in decl.body.statements:
982
+ visit_stmt(stmt)
983
+
984
+ elide: dict[str, set[str]] = {}
985
+ for v in axon_locals:
986
+ if v in escaped or v in any_dynamic_read:
987
+ elide[v] = set()
988
+ else:
989
+ elide[v] = writes[v] - reads[v]
990
+ return elide
991
+
992
+
993
+ # _LOOP_T is now a per-instance attribute set in __init__ from the
994
+ # `loop_max_iterations` kwarg (default 50). The class attribute is
995
+ # kept as a fallback for any subclass that constructs the codegen
996
+ # without going through __init__.
997
+ _LOOP_T = 50
998
+
999
+ def _translate_loop_function_decl(
1000
+ self, decl: "ast.LoopFunctionDecl", *, class_name: Optional[str] = None
1001
+ ) -> None:
1002
+ """Emit a Python function for a loop function declaration.
1003
+
1004
+ When `class_name` is set, the loop function is being emitted on
1005
+ behalf of a class body (object loop, step 6 of the
1006
+ encapsulation taxonomy). The registry key uses the dotted form
1007
+ `Class.name`, and the Python identifier mangles `.` to `_` so
1008
+ `_loop_Greeter_run` is a valid name.
1009
+ """
1010
+ if class_name is not None:
1011
+ registry_key = f"{class_name}.{decl.name}"
1012
+ py_loop_name = f"_loop_{class_name}_{decl.name}"
1013
+ else:
1014
+ registry_key = decl.name
1015
+ py_loop_name = f"_loop_{decl.name}"
1016
+ # Register so LoopCallStmt knows the state-param shape.
1017
+ self._loop_decls[registry_key] = decl
1018
+
1019
+ state_names = [p.name for p in decl.state_params]
1020
+ init_param_names = [f"_init_{n}" for n in state_names]
1021
+
1022
+ # foreach_loop adds the array as the first Python parameter
1023
+ # (before the state inits). The condition Expr names it.
1024
+ py_params = list(init_param_names)
1025
+ if decl.kind == "foreach_loop":
1026
+ if isinstance(decl.condition, ast.Identifier):
1027
+ py_params.insert(0, decl.condition.name)
1028
+ self._emit(
1029
+ f"def {py_loop_name}({', '.join(py_params)}):"
1030
+ )
1031
+ self._indent += 1
1032
+ self._emit(
1033
+ f'"""Loop function `{decl.name}` (kind={decl.kind}).'
1034
+ )
1035
+ self._emit(f"")
1036
+ self._emit(
1037
+ f"T-step soft-halt cell. Returns ({', '.join(state_names) or 'no state'}, halted)."
1038
+ )
1039
+ self._emit(f'"""')
1040
+ # State locals init from caller args.
1041
+ for state_name, init_name in zip(state_names, init_param_names):
1042
+ self._emit(f"{state_name} = {init_name}")
1043
+ self._emit("_halted = 0.0")
1044
+
1045
+ # Push (loop_name, state_names) so PassStmt and tail-call
1046
+ # ReturnStmt translation know what to assign and which loop
1047
+ # name a `return NAME(args)` surface targets.
1048
+ self._loop_state_stack.append((decl.name, state_names))
1049
+ # For iterative_loop, `iterator` in the body resolves to the
1050
+ # runtime Python local `_iterator` instead of erroring.
1051
+ prior_iter_runtime = self._iterator_runtime_in_scope
1052
+ prior_elem_runtime = self._element_runtime_in_scope
1053
+ if decl.kind == "iterative_loop":
1054
+ self._iterator_runtime_in_scope = True
1055
+ if decl.kind == "foreach_loop":
1056
+ self._element_runtime_in_scope = True
1057
+
1058
+ # do_while: body runs once unconditionally first.
1059
+ if decl.kind == "do_while":
1060
+ self._emit(f"# do_while: body runs once unconditionally first.")
1061
+ for inner in decl.body.statements:
1062
+ self._translate_stmt(inner)
1063
+
1064
+ # Loop driver (Python). The body is substrate-pure; the driver
1065
+ # is Python and reads `_halted` at iteration boundary to
1066
+ # decide whether to continue — the same kind of boundary scalar
1067
+ # read as the codebook nearest_string lookup. There is no
1068
+ # compile-time iteration count: programs halt themselves when
1069
+ # the loop's halt condition fires, just like any other
1070
+ # programming language. `_t` is kept as a Python iteration
1071
+ # counter for diagnostics / iterative_loop arithmetic.
1072
+ self._emit("_t = 0")
1073
+ self._emit("while True:")
1074
+ self._indent += 1
1075
+ # Snapshot pre-step state for soft-mux freeze on halt.
1076
+ for state_name in state_names:
1077
+ self._emit(f"_pre_{state_name} = {state_name}")
1078
+ # Evaluate condition (semantics depend on kind).
1079
+ if decl.kind in ("do_while", "while_loop"):
1080
+ cond_src = self._translate_expr(decl.condition)
1081
+ self._emit(f"_cond = {cond_src}")
1082
+ self._emit(f"_cond_truth = _VSA.truth_axis(_cond)")
1083
+ self._emit(f"_keep = _VSA.heaviside(_cond_truth)")
1084
+ elif decl.kind == "iterative_loop":
1085
+ # condition is the count; iterator = _t + 1 (1-indexed).
1086
+ count_src = self._translate_expr(decl.condition)
1087
+ self._emit(f"# iterative_loop: tick = _t+1, halt when tick > count.")
1088
+ self._emit(f"_iterator = _t + 1")
1089
+ # Heaviside of (count - iterator + 1): positive while iterator
1090
+ # <= count; zero or negative once past. Substrate-pure scalar.
1091
+ self._emit(
1092
+ f"_keep = _VSA.heaviside(int({count_src}) - _iterator + 1)"
1093
+ )
1094
+ elif decl.kind == "foreach_loop":
1095
+ # foreach: condition is the array parameter (an Identifier
1096
+ # naming the array). The function takes the array as its
1097
+ # first parameter (in addition to state inits). Each tick:
1098
+ # halt when _t >= length; bind `element` to arr[_t].
1099
+ if not isinstance(decl.condition, ast.Identifier):
1100
+ raise CodegenNotSupported(
1101
+ decl.condition,
1102
+ "foreach_loop's first parameter must be a plain "
1103
+ "identifier naming the array (e.g. `arr`). Got "
1104
+ f"{type(decl.condition).__name__}.",
1105
+ )
1106
+ arr_param_name = decl.condition.name
1107
+ self._emit(f"# foreach_loop: array param `{arr_param_name}`,")
1108
+ self._emit(f"# bind `element` to {arr_param_name}[_t] each tick.")
1109
+ self._emit(f"_length = _VSA.array_length({arr_param_name})")
1110
+ # Heaviside of (_length - _t): positive while _t < _length.
1111
+ self._emit(f"_keep = _VSA.heaviside(_length - _t)")
1112
+ # Fetch the element BEFORE running body. Bind to `_element`.
1113
+ # For halted ticks the read is wasted but harmless (default
1114
+ # element-of-arr index is the last valid one or 0).
1115
+ self._emit(f"_element = _VSA.array_get({arr_param_name}, "
1116
+ f"min(_t, max(_length - 1, 0)))")
1117
+ else:
1118
+ raise CodegenNotSupported(
1119
+ decl, f"unknown loop kind `{decl.kind}`"
1120
+ )
1121
+ self._emit(f"_halt_term = 1.0 - _keep")
1122
+ # Substrate-pure saturation: numpy.minimum / torch.minimum, not
1123
+ # Python's min(). Keeps _halted a substrate scalar.
1124
+ self._emit(f"_halted = _VSA.saturate_unit(_halted + _halt_term)")
1125
+ # Body re-runs each tick; PassStmt updates state locals.
1126
+ for inner in decl.body.statements:
1127
+ self._translate_stmt(inner)
1128
+ # Soft mux: freeze state at pre-step value once halt saturates.
1129
+ # This makes the iteration that converges produce a state
1130
+ # numerically equivalent to its pre-state, so the early-break
1131
+ # below exits with the converged value.
1132
+ for state_name in state_names:
1133
+ self._emit(
1134
+ f"{state_name} = (1.0 - _halted) * {state_name} "
1135
+ f"+ _halted * _pre_{state_name}"
1136
+ )
1137
+ # Self-halt: programs terminate when the loop's halt condition
1138
+ # fires. `float(_halted)` is one boundary scalar read per
1139
+ # iteration (same kind of boundary op as the codebook lookup).
1140
+ # No fixed iteration cap; if the program writes a non-
1141
+ # converging loop, that's a programmer bug — same as any
1142
+ # `while True` in any other language.
1143
+ self._emit("_t += 1")
1144
+ self._emit("if float(_halted) >= 0.99:")
1145
+ self._indent += 1
1146
+ self._emit("break")
1147
+ self._indent -= 1
1148
+ self._indent -= 1 # close the while loop
1149
+
1150
+ # Pop state stack and restore iterator/element runtime flags.
1151
+ self._loop_state_stack.pop()
1152
+ self._iterator_runtime_in_scope = prior_iter_runtime
1153
+ self._element_runtime_in_scope = prior_elem_runtime
1154
+
1155
+ # Return final state values + halted (last).
1156
+ return_items = state_names + ["_halted"]
1157
+ self._emit(f"return ({', '.join(return_items)},)")
1158
+ self._indent -= 1 # close the function
1159
+
1160
+ def _translate_loop_call(self, stmt: "ast.LoopCallStmt") -> None:
1161
+ """Emit a call to a previously-declared loop function + writeback.
1162
+
1163
+ State args at the call site MUST be slot-variable names; on
1164
+ completion, the loop's final state values are written back into
1165
+ those slot vars (by-reference). The condition arg is evaluated
1166
+ once (for any side effects + visual symmetry with the function-
1167
+ decl form) but its value is unused — the loop function uses its
1168
+ own decl-time condition expression against the state locals each
1169
+ tick.
1170
+ """
1171
+ decl = self._loop_decls.get(stmt.name)
1172
+ if decl is None:
1173
+ raise CodegenNotSupported(
1174
+ stmt,
1175
+ f"loop function `{stmt.name}` is not declared. Loop "
1176
+ f"functions must be declared with one of `do_while`, "
1177
+ f"`while_loop`, `iterative_loop`, `foreach_loop` keywords "
1178
+ f"before being invoked with `loop NAME(...)`.",
1179
+ )
1180
+ if len(stmt.state_arg_names) != len(decl.state_params):
1181
+ raise CodegenNotSupported(
1182
+ stmt,
1183
+ f"loop call `{stmt.name}` expects "
1184
+ f"{len(decl.state_params)} state arg(s), got "
1185
+ f"{len(stmt.state_arg_names)}",
1186
+ )
1187
+ # Each state arg must be a slot variable in the caller.
1188
+ slot_args: List[tuple[str, int]] = []
1189
+ for arg_name in stmt.state_arg_names:
1190
+ if arg_name not in self._slot_vars:
1191
+ raise CodegenNotSupported(
1192
+ stmt,
1193
+ f"loop call state argument `{arg_name}` must be a "
1194
+ f"slot variable in the caller scope; was not declared "
1195
+ f"with `slot TYPE name = ...`.",
1196
+ )
1197
+ slot_args.append((arg_name, self._slot_vars[arg_name]))
1198
+ # Evaluate the condition arg. For foreach_loop with an array
1199
+ # literal, route through array_from_literal so the array is the
1200
+ # substrate-stored binding-array (not a plain Python list).
1201
+ if (decl.kind == "foreach_loop"
1202
+ and isinstance(stmt.condition_arg, ast.ArrayLiteral)):
1203
+ elem_srcs = [
1204
+ self._translate_expr(e)
1205
+ for e in stmt.condition_arg.elements
1206
+ ]
1207
+ cond_src = f"_VSA.array_from_literal({', '.join(elem_srcs)})"
1208
+ else:
1209
+ cond_src = self._translate_expr(stmt.condition_arg)
1210
+ self._emit(f"# loop call: {stmt.name}({cond_src}, ...)")
1211
+ # Read current slot values to pass as init args.
1212
+ init_args = [
1213
+ f"_VSA.slot_load(_slot_state, {idx})"
1214
+ for _, idx in slot_args
1215
+ ]
1216
+ # Generate distinct names for the unpacked return values.
1217
+ ret_names = [f"_loopret_{n}" for n, _ in slot_args] + ["_loopret_halt"]
1218
+ # Mangled python identifier for the emitted loop function.
1219
+ # For class-bodied loops the source name is dotted
1220
+ # (`Greeter.run`); replace `.` with `_` so the name is a valid
1221
+ # Python identifier (`_loop_Greeter_run`).
1222
+ py_loop_name = f"_loop_{stmt.name.replace('.', '_')}"
1223
+ if decl.kind == "foreach_loop":
1224
+ # Pass the array (cond_src) as the first Python arg, then
1225
+ # state inits. The function reads the array each tick to
1226
+ # fetch the next `element`.
1227
+ all_args = [cond_src] + init_args
1228
+ self._emit(
1229
+ f"({', '.join(ret_names)},) = {py_loop_name}("
1230
+ f"{', '.join(all_args)})"
1231
+ )
1232
+ else:
1233
+ # Other kinds: cond_src evaluated for side effects only;
1234
+ # runtime uses the loop's decl-time condition each tick.
1235
+ self._emit(f"# Condition arg evaluated for side effects; runtime")
1236
+ self._emit(f"# uses the loop's decl-time condition expression.")
1237
+ self._emit(f"_ = {cond_src}")
1238
+ self._emit(
1239
+ f"({', '.join(ret_names)},) = {py_loop_name}("
1240
+ f"{', '.join(init_args)})"
1241
+ )
1242
+ # Write back to caller's slot vars.
1243
+ for (arg_name, idx), ret_name in zip(slot_args, ret_names[:-1]):
1244
+ self._emit(
1245
+ f"_slot_state = _VSA.slot_store(_slot_state, {idx}, "
1246
+ f"{ret_name})"
1247
+ )
1248
+ # Accumulate halted into the function-scope program-halt so
1249
+ # this loop's completion gates the function's return value.
1250
+ self._emit("_program_halt = _program_halt * _loopret_halt")
1251
+
1252
+ # -- statements -------------------------------------------------------
1253
+
1254
+ def _translate_stmt(self, stmt: ast.Stmt) -> None:
1255
+ # PassStmt: tail-recursive yield in a loop function body.
1256
+ # Translates to assignment of the loop's state locals.
1257
+ # Handled here rather than in a more general dispatcher so that
1258
+ # it errors clearly outside a loop body.
1259
+ if isinstance(stmt, ast.PassStmt):
1260
+ if not self._loop_state_stack:
1261
+ raise CodegenNotSupported(
1262
+ stmt,
1263
+ "`pass` is only valid inside a loop function body. "
1264
+ "See planning/open-questions/loop-function-declarations.md.",
1265
+ )
1266
+ _loop_name, state_names = self._loop_state_stack[-1]
1267
+ if len(stmt.values) != len(state_names):
1268
+ raise CodegenNotSupported(
1269
+ stmt,
1270
+ f"`pass` expects {len(state_names)} value(s) (one per "
1271
+ f"state parameter `{', '.join(state_names)}`), got "
1272
+ f"{len(stmt.values)}",
1273
+ )
1274
+ for state_name, value in zip(state_names, stmt.values):
1275
+ if isinstance(value, ast.ReplaceMarker):
1276
+ # `replace` keyword: restore the parameter's input value.
1277
+ self._emit(f"{state_name} = _init_{state_name}")
1278
+ else:
1279
+ value_src = self._translate_expr(value)
1280
+ self._emit(f"{state_name} = {value_src}")
1281
+ return
1282
+ # LoopCallStmt: invoke a loop function and write back state.
1283
+ if isinstance(stmt, ast.LoopCallStmt):
1284
+ self._translate_loop_call(stmt)
1285
+ return
1286
+ if isinstance(stmt, ast.VarDecl):
1287
+ self._translate_var_decl(stmt, at_top_level=False)
1288
+ return
1289
+ if isinstance(stmt, ast.ReturnStmt):
1290
+ if (self._loop_state_stack
1291
+ and stmt.value is not None
1292
+ and isinstance(stmt.value, ast.Call)
1293
+ and isinstance(stmt.value.callee, ast.Identifier)):
1294
+ loop_name, state_names = self._loop_state_stack[-1]
1295
+ if stmt.value.callee.name == loop_name:
1296
+ args = stmt.value.args
1297
+ if len(args) != len(state_names):
1298
+ raise CodegenNotSupported(
1299
+ stmt,
1300
+ f"tail call `return {loop_name}(...)` expects "
1301
+ f"{len(state_names)} arg(s) (one per state "
1302
+ f"parameter `{', '.join(state_names)}`), got "
1303
+ f"{len(args)}",
1304
+ )
1305
+ for state_name, value in zip(state_names, args):
1306
+ if isinstance(value, ast.ReplaceMarker):
1307
+ self._emit(f"{state_name} = _init_{state_name}")
1308
+ else:
1309
+ value_src = self._translate_expr(value)
1310
+ self._emit(f"{state_name} = {value_src}")
1311
+ return
1312
+ if stmt.value is None:
1313
+ self._emit("return")
1314
+ else:
1315
+ # Multiply the returned value by _program_halt so that
1316
+ # any unconverged loop in this function (halted≈0)
1317
+ # wipes the output. For functions without loops the
1318
+ # accumulator stays 1.0 and this is a no-op. String
1319
+ # returns can't be multiplied by a float (codebook
1320
+ # nearest-string lookup at the edge yields a host
1321
+ # str), so we emit a bare return for those — halt
1322
+ # wipe doesn't apply at the string boundary anyway,
1323
+ # since a wiped-vector lookup already returns the
1324
+ # nearest-string of zero (which is the right
1325
+ # behavior).
1326
+ if self._current_return_type == "string":
1327
+ self._emit(f"return {self._translate_expr(stmt.value)}")
1328
+ else:
1329
+ self._emit(
1330
+ f"return ({self._translate_expr(stmt.value)}) "
1331
+ f"* _program_halt"
1332
+ )
1333
+ return
1334
+ if isinstance(stmt, ast.ExprStmt):
1335
+ expr = stmt.expr
1336
+ # `a.add(k, v);` as a statement on an Axon-typed local rebinds
1337
+ # `a` to the new axon. This is the augmented-assignment shape
1338
+ # for void-returning instance methods — see the spec rule in
1339
+ # planning/sutra-spec/axons.md ("axons are completely
1340
+ # un-imperative aside from the ergonomics"). For axons,
1341
+ # `add` is the mutating method; `item` is read-only and does
1342
+ # not rebind. The general "any void-returning instance method
1343
+ # on any class is augmented assignment" rule is not yet
1344
+ # implemented for non-axon classes.
1345
+ if (isinstance(expr, ast.Call)
1346
+ and isinstance(expr.callee, ast.MemberAccess)
1347
+ and isinstance(expr.callee.obj, ast.Identifier)
1348
+ and expr.callee.obj.name in self._axon_declared):
1349
+ obj_name = expr.callee.obj.name
1350
+ method_name = expr.callee.member
1351
+ runtime_name = {
1352
+ "add": "axon_add",
1353
+ "item": "axon_item",
1354
+ }.get(method_name)
1355
+ if runtime_name is not None:
1356
+ # SSA-elision: if this `add` writes a literal key
1357
+ # that's never read in this function (and the
1358
+ # axon doesn't escape), skip emission entirely.
1359
+ # The key flows nowhere; computing the bind would
1360
+ # be pure waste. See `_compute_axon_elision`.
1361
+ if (method_name == "add"
1362
+ and len(expr.args) >= 1
1363
+ and isinstance(expr.args[0], ast.StringLiteral)
1364
+ and expr.args[0].value in self._axon_elide_keys.get(obj_name, set())):
1365
+ return
1366
+ arg_srcs = [self._translate_expr(a) for a in expr.args]
1367
+ all_args = [obj_name] + arg_srcs
1368
+ if method_name == "add":
1369
+ # Mutating instance method → augmented assignment.
1370
+ self._emit(
1371
+ f"{obj_name} = _VSA.{runtime_name}"
1372
+ f"({', '.join(all_args)})"
1373
+ )
1374
+ else:
1375
+ # Read-only instance method as a discarded
1376
+ # statement (rare). Emit the call without rebind.
1377
+ self._emit(
1378
+ f"_VSA.{runtime_name}({', '.join(all_args)})"
1379
+ )
1380
+ return
1381
+ # General void-returning-instance-method rule: for any
1382
+ # class C, an instance method declared `method void m(...)`
1383
+ # called as a statement `obj.m(args);` rebinds the
1384
+ # receiver to the static form's return value. The static
1385
+ # form (mangled `C_m` or runtime `_VSA.m` for intrinsics)
1386
+ # takes the receiver as its first arg and returns the new
1387
+ # receiver value. This is the user's compilation rule:
1388
+ # "every void-returning instance method is an augmented
1389
+ # assignment." See planning/sutra-spec/axons.md and the
1390
+ # broader class-system note on it.
1391
+ if (isinstance(expr, ast.Call)
1392
+ and isinstance(expr.callee, ast.MemberAccess)
1393
+ and isinstance(expr.callee.obj, ast.Identifier)
1394
+ and expr.callee.obj.name in self._var_type):
1395
+ obj_name = expr.callee.obj.name
1396
+ obj_class = self._var_type[obj_name]
1397
+ method_name = expr.callee.member
1398
+ return_type = self._class_method_return_types.get(
1399
+ (obj_class, method_name)
1400
+ )
1401
+ if return_type == "void":
1402
+ arg_srcs = [self._translate_expr(a) for a in expr.args]
1403
+ all_args = [obj_name] + arg_srcs
1404
+ if (obj_class in self._class_intrinsic_methods
1405
+ and method_name in self._class_intrinsic_methods[obj_class]):
1406
+ self._emit(
1407
+ f"{obj_name} = _VSA.{method_name}"
1408
+ f"({', '.join(all_args)})"
1409
+ )
1410
+ return
1411
+ if (obj_class in self._class_instance_methods
1412
+ and method_name in self._class_instance_methods[obj_class]):
1413
+ self._emit(
1414
+ f"{obj_name} = {obj_class}_{method_name}"
1415
+ f"({', '.join(all_args)})"
1416
+ )
1417
+ return
1418
+ if isinstance(expr, ast.Assignment):
1419
+ # dict[key] = value dispatches to the rotation-hashmap
1420
+ # runtime's functional-update form (hashmap_set returns
1421
+ # a new accumulator). Only simple `=` is supported on
1422
+ # dict subscripts — compound assignment (`d[k] += v`) is
1423
+ # not yet specified.
1424
+ if (isinstance(expr.target, ast.Subscript)
1425
+ and isinstance(expr.target.target, ast.Identifier)
1426
+ and expr.target.target.name in self._dict_declared):
1427
+ if expr.op != "=":
1428
+ raise CodegenNotSupported(
1429
+ stmt,
1430
+ f"compound assignment on a dict subscript "
1431
+ f"(`{expr.op}`) is not yet supported",
1432
+ )
1433
+ dict_name = expr.target.target.name
1434
+ key_src = self._translate_expr(expr.target.index)
1435
+ value_src = self._translate_expr(expr.value)
1436
+ self._emit(
1437
+ f"{dict_name} = _VSA.hashmap_set({dict_name}, "
1438
+ f"{key_src}, {value_src})"
1439
+ )
1440
+ return
1441
+ # Slot-bound variable assignment: `x = expr;` where
1442
+ # x is a slot variable lowers to `_slot_state =
1443
+ # _VSA.slot_store(_slot_state, idx, value)`. Compound
1444
+ # assignment (+=, -=) on slot variables would need
1445
+ # to read-modify-write through the slot — left for a
1446
+ # follow-up since the imperative-reversible pattern
1447
+ # only needs plain `=` to demonstrate.
1448
+ if (isinstance(expr.target, ast.Identifier)
1449
+ and expr.target.name in self._slot_vars):
1450
+ if expr.op != "=":
1451
+ raise CodegenNotSupported(
1452
+ stmt,
1453
+ f"compound assignment on a slot variable "
1454
+ f"(`{expr.op}`) is not yet supported; use "
1455
+ "plain `=` for now",
1456
+ )
1457
+ idx = self._slot_vars[expr.target.name]
1458
+ value_src = self._translate_expr(expr.value)
1459
+ self._emit(
1460
+ f"_slot_state = _VSA.slot_store(_slot_state, "
1461
+ f"{idx}, {value_src})"
1462
+ )
1463
+ return
1464
+ target_src = self._translate_expr(expr.target)
1465
+ value_src = self._translate_expr(expr.value)
1466
+ self._emit(f"{target_src} {expr.op} {value_src}")
1467
+ return
1468
+ if isinstance(expr, ast.PostfixOp):
1469
+ # `i++` / `i--` as a statement. Lower to Python
1470
+ # `i += 1` / `i -= 1`. Used in expression position
1471
+ # (rare in Sutra; postfix's value is the OLD value of
1472
+ # i which Python can't express as an expression
1473
+ # without walrus + an extra binding) it remains
1474
+ # unsupported and the expression-translation path
1475
+ # below errors with a clear message.
1476
+ target_src = self._translate_expr(expr.operand)
1477
+ delta = "+= 1" if expr.op == "++" else "-= 1"
1478
+ self._emit(f"{target_src} {delta}")
1479
+ return
1480
+ self._emit(self._translate_expr(expr))
1481
+ return
1482
+ if isinstance(stmt, ast.Block):
1483
+ for inner in stmt.statements:
1484
+ self._translate_stmt(inner)
1485
+ return
1486
+ if isinstance(stmt, ast.LoopStmt):
1487
+ if stmt.count is not None:
1488
+ self._translate_bounded_loop(stmt)
1489
+ return
1490
+ # loop(cond) — old eigenrotation form. Rejected; superseded
1491
+ # by the function-declaration loop kinds.
1492
+ raise CodegenNotSupported(
1493
+ stmt,
1494
+ "`loop(cond) { body }` is no longer supported. The body-"
1495
+ "discard eigenrotation form is replaced by the function-"
1496
+ "declaration loop kinds (`do_while NAME(...)`, "
1497
+ "`while_loop NAME(...)`, `iterative_loop NAME(...)`, "
1498
+ "`foreach_loop NAME(...)` + `loop NAME(...);` call site). "
1499
+ "See planning/open-questions/loop-function-declarations.md.",
1500
+ )
1501
+ if isinstance(stmt, ast.WhileStmt):
1502
+ raise CodegenNotSupported(
1503
+ stmt,
1504
+ "C-style `while (cond) { body }` is no longer supported. "
1505
+ "Use a `while_loop NAME(cond, ...state) { ...; pass ...; }` "
1506
+ "function declaration + `loop NAME(cond, args);` call site. "
1507
+ "See planning/open-questions/loop-function-declarations.md.",
1508
+ )
1509
+ if isinstance(stmt, ast.ForStmt):
1510
+ raise CodegenNotSupported(
1511
+ stmt,
1512
+ "C-style `for (init; cond; step) { body }` is no longer "
1513
+ "supported. Use `iterative_loop NAME(count, ...state) { "
1514
+ "...; pass ...; }` for fixed-count iteration (with the "
1515
+ "`iterator` keyword for the tick number), or "
1516
+ "`while_loop NAME(cond, ...state) { ... }` for general "
1517
+ "data-dependent iteration. See "
1518
+ "planning/open-questions/loop-function-declarations.md.",
1519
+ )
1520
+ if isinstance(stmt, ast.DoWhileStmt):
1521
+ raise CodegenNotSupported(
1522
+ stmt,
1523
+ "C-style `do { body } while (cond);` is no longer "
1524
+ "supported. Use `do_while NAME(cond, ...state) { ...; "
1525
+ "pass ...; }` function declaration + "
1526
+ "`loop NAME(cond, args);` call site. See "
1527
+ "planning/open-questions/loop-function-declarations.md.",
1528
+ )
1529
+ if isinstance(stmt, ast.ForeachStmt):
1530
+ if isinstance(stmt.iterable, ast.ArrayLiteral):
1531
+ for element_expr in stmt.iterable.elements:
1532
+ element_src = self._translate_expr(element_expr)
1533
+ self._emit(f"{stmt.var_name} = {element_src}")
1534
+ for inner in stmt.body.statements:
1535
+ self._translate_stmt(inner)
1536
+ return
1537
+ raise CodegenNotSupported(
1538
+ stmt,
1539
+ f"`foreach` is only supported over compile-time-known "
1540
+ f"collections (array literals like `[a, b, c]`). The "
1541
+ f"iterable here is a "
1542
+ f"{type(stmt.iterable).__name__}, which would require "
1543
+ f"runtime iteration. Dynamic `foreach` over named "
1544
+ f"collections or computed expressions is future work. "
1545
+ f"Rewrite as `foreach (x in [a, b, c]) {{ ... }}` or "
1546
+ f"unroll by hand.",
1547
+ )
1548
+ if isinstance(stmt, ast.IfStmt):
1549
+ raise CodegenNotSupported(
1550
+ stmt,
1551
+ "if/else is not supported by the V1 codegen — the whole "
1552
+ "point is to compile it away into a prototype-table lookup",
1553
+ )
1554
+ raise CodegenNotSupported(
1555
+ stmt, f"unsupported statement: {type(stmt).__name__}"
1556
+ )
1557
+
1558
+ # -- loop compilation ---------------------------------------------------
1559
+ #
1560
+ # Sutra's `loop` construct has two forms:
1561
+ #
1562
+ # 1. Bounded: loop (N) { body } → unrolled at compile time
1563
+ # loop (N as i) { body } → unrolled with index
1564
+ # The body is emitted N times in sequence. No rotation, no
1565
+ # circuit iteration. Pure compile-time expansion.
1566
+ #
1567
+ # 2. Eigenrotation: loop (condition) { body } → geometric rotation
1568
+ # Compiles to _VSA.loop() — the brain iterates via rotation
1569
+ # in vector space with prototype matching for termination.
1570
+ #
1571
+ # The old while/for forms also compile to geometric rotation
1572
+ # (kept for backward compatibility with existing .su files).
1573
+ #
1574
+ # -- geometric loop compilation ----------------------------------------
1575
+ #
1576
+ # Sutra loops compile to geometric rotation on the brain, not to
1577
+ # host-runtime Python loops. The loop body is a rotation matrix R
1578
+ # applied at each iteration; each rotated state is snapped through
1579
+ # the mushroom body circuit; termination is by prototype matching
1580
+ # in the brain's native KC space.
1581
+ #
1582
+ # The generated code:
1583
+ # 1. Builds a rotation matrix R (from loop body analysis or default)
1584
+ # 2. Compiles the target condition as a KC-space prototype
1585
+ # 3. Calls _VSA.loop(state, R, prototypes) — the brain iterates
1586
+ #
1587
+ # This is how the brain counts: N iterations of rotation by angle
1588
+ # theta accumulates N*theta total rotation, and the loop terminates
1589
+ # when the trajectory enters the target prototype's basin.
1590
+
1591
+ def _translate_bounded_loop(self, stmt: ast.LoopStmt) -> None:
1592
+ """Compile loop (N) { body } — unrolls at compile time.
1593
+
1594
+ The body is emitted N times. No rotation matrix, no circuit
1595
+ iteration. This is syntactic sugar, not eigenrotation.
1596
+
1597
+ loop (N as i) adds an index variable that counts 0..N-1.
1598
+ """
1599
+ count_src = self._translate_expr(stmt.count)
1600
+
1601
+ if stmt.index_var:
1602
+ # loop (N as i) { body } → for i in range(N): body
1603
+ self._emit(f"for {stmt.index_var} in range({count_src}):")
1604
+ self._indent += 1
1605
+ if not stmt.body.statements:
1606
+ self._emit("pass")
1607
+ else:
1608
+ for inner in stmt.body.statements:
1609
+ self._translate_stmt(inner)
1610
+ self._indent -= 1
1611
+ else:
1612
+ # loop (N) { body } → unroll body N times
1613
+ # For literal integers, actually unroll. For expressions, use range.
1614
+ if isinstance(stmt.count, ast.IntLiteral):
1615
+ n = stmt.count.value
1616
+ # Save and restore _iterator_value across the unroll —
1617
+ # nested unrolling loops save the outer value and pop
1618
+ # it back when this loop finishes. The keyword always
1619
+ # binds to the innermost surrounding unrolled loop.
1620
+ saved_iter = self._iterator_value
1621
+ for i in range(n):
1622
+ self._iterator_value = i + 1 # 1-based: 1..N
1623
+ for inner in stmt.body.statements:
1624
+ self._translate_stmt(inner)
1625
+ self._iterator_value = saved_iter
1626
+ else:
1627
+ self._emit(f"for _ in range({count_src}):")
1628
+ self._indent += 1
1629
+ if not stmt.body.statements:
1630
+ self._emit("pass")
1631
+ else:
1632
+ for inner in stmt.body.statements:
1633
+ self._translate_stmt(inner)
1634
+ self._indent -= 1
1635
+
1636
+ def _translate_eigenrotation_loop(self, stmt: ast.LoopStmt) -> None:
1637
+ """Compile loop (condition) { body } — eigenrotation on the brain.
1638
+
1639
+ The condition determines the target prototype. The loop body
1640
+ is replaced by a rotation matrix. The brain iterates via
1641
+ _VSA.loop().
1642
+ """
1643
+ lid = self._next_loop_id()
1644
+ state_var = self._extract_loop_state_var(stmt.body)
1645
+ target_expr = self._extract_loop_target(stmt.condition)
1646
+
1647
+ self._emit(f"{lid}_R = _VSA.make_random_rotation("
1648
+ f"angle=_np.pi / 4, n_planes=20, seed=_VSA.seed)")
1649
+ self._emit(f"{lid}_target = {target_expr}")
1650
+ self._emit(f"{lid}_protos = _VSA.compile_prototypes("
1651
+ f"{{\"target\": {lid}_target}})")
1652
+ self._emit(f"{lid}_name, {state_var}, {lid}_iters = _VSA.loop(")
1653
+ self._indent += 1
1654
+ self._emit(f"{state_var}, {lid}_R, {lid}_protos,")
1655
+ self._emit(f"target_name=\"target\", max_iters=50)")
1656
+ self._indent -= 1
1657
+
1658
+ _loop_counter = 0 # unique names for loop temporaries
1659
+
1660
+ def _next_loop_id(self) -> str:
1661
+ BaseCodegen._loop_counter += 1
1662
+ return f"_loop{BaseCodegen._loop_counter}"
1663
+
1664
+ def _translate_while_as_geometric_loop(self, stmt: ast.WhileStmt) -> None:
1665
+ """Compile a while statement to a geometric loop on the brain.
1666
+
1667
+ The while condition determines the target prototype (what we're
1668
+ looping UNTIL), and the loop body determines the rotation (what
1669
+ each iteration does geometrically).
1670
+
1671
+ Generated code pattern:
1672
+ _loopN_R = _VSA.make_random_rotation(angle=pi/4, n_planes=20)
1673
+ _loopN_target = <condition target vector>
1674
+ _loopN_protos = _VSA.compile_prototypes({"target": _loopN_target})
1675
+ _loopN_name, <state_var>, _loopN_iters = _VSA.loop(
1676
+ <state_var>, _loopN_R, _loopN_protos, target_name="target")
1677
+ """
1678
+ lid = self._next_loop_id()
1679
+
1680
+ # Extract the state variable from the loop body.
1681
+ # Look for assignments of the form: state = <expr>
1682
+ # The assigned variable is the state being rotated.
1683
+ state_var = self._extract_loop_state_var(stmt.body)
1684
+
1685
+ # Extract the target from the condition.
1686
+ # The condition tells us what we're looping toward.
1687
+ target_expr = self._extract_loop_target(stmt.condition)
1688
+
1689
+ # Build rotation matrix — the geometric step per iteration.
1690
+ # Uses multi-plane rotation for good separation in high-D space.
1691
+ self._emit(f"{lid}_R = _VSA.make_random_rotation("
1692
+ f"angle=_np.pi / 4, n_planes=20, seed=_VSA.seed)")
1693
+
1694
+ # Compile the target as a KC-space prototype.
1695
+ self._emit(f"{lid}_target = {target_expr}")
1696
+ self._emit(f"{lid}_protos = _VSA.compile_prototypes("
1697
+ f"{{\"target\": {lid}_target}})")
1698
+
1699
+ # Execute the geometric loop on the brain.
1700
+ self._emit(f"{lid}_name, {state_var}, {lid}_iters = _VSA.loop(")
1701
+ self._indent += 1
1702
+ self._emit(f"{state_var}, {lid}_R, {lid}_protos,")
1703
+ self._emit(f"target_name=\"target\", max_iters=50)")
1704
+ self._indent -= 1
1705
+
1706
+ def _translate_for_as_geometric_loop(self, stmt: ast.ForStmt) -> None:
1707
+ """Compile a for statement to a bounded geometric loop.
1708
+
1709
+ A C-style for loop `for (init; cond; step)` compiles to N
1710
+ iterations of geometric rotation, where N is extracted from
1711
+ the condition bound when possible.
1712
+ """
1713
+ lid = self._next_loop_id()
1714
+
1715
+ # Emit the init statement (e.g., var i = 0)
1716
+ if stmt.init:
1717
+ self._translate_stmt(stmt.init)
1718
+
1719
+ # Extract loop bound from condition (e.g., i < 10 → 10 iterations)
1720
+ max_iters = self._extract_for_bound(stmt.condition)
1721
+
1722
+ # Extract state variable from body
1723
+ state_var = self._extract_loop_state_var(stmt.body)
1724
+
1725
+ # Build rotation and run
1726
+ self._emit(f"{lid}_R = _VSA.make_random_rotation("
1727
+ f"angle=_np.pi / {max_iters}, n_planes=20, seed=_VSA.seed)")
1728
+ self._emit(f"# Bounded geometric loop: {max_iters} rotation steps")
1729
+ self._emit(f"for {lid}_i in range({max_iters}):")
1730
+ self._indent += 1
1731
+ self._emit(f"{state_var} = {lid}_R @ {state_var}")
1732
+ self._emit(f"{state_var} = _VSA.snap({state_var})")
1733
+ self._indent -= 1
1734
+
1735
+ def _extract_loop_state_var(self, body: ast.Block) -> str:
1736
+ """Find the state variable being mutated in the loop body.
1737
+
1738
+ Looks for assignment statements like `current = snap(...)` or
1739
+ `state = bind(state, ...)` and returns the target variable name.
1740
+ Falls back to '_loop_state' if no assignment is found.
1741
+ """
1742
+ for stmt in body.statements:
1743
+ if isinstance(stmt, ast.ExprStmt) and isinstance(stmt.expr, ast.Assignment):
1744
+ if isinstance(stmt.expr.target, ast.Identifier):
1745
+ return stmt.expr.target.name
1746
+ if isinstance(stmt, ast.VarDecl):
1747
+ return stmt.name
1748
+ return "_loop_state"
1749
+
1750
+ def _extract_loop_target(self, condition: ast.Expr) -> str:
1751
+ """Extract the target vector from a while condition.
1752
+
1753
+ Handles patterns like:
1754
+ - similarity(current, target) < threshold → target
1755
+ - defuzzy(Cosine(current, target)) → target
1756
+ - a general expression → translate it as the target
1757
+ """
1758
+ # If condition is a comparison (e.g., similarity(x, y) < 0.9),
1759
+ # the second argument to the similarity call is the target.
1760
+ if isinstance(condition, ast.BinaryOp):
1761
+ if isinstance(condition.left, ast.Call):
1762
+ call = condition.left
1763
+ if (isinstance(call.callee, ast.Identifier)
1764
+ and call.callee.name in ("similarity", "Cosine")
1765
+ and len(call.args) >= 2):
1766
+ return self._translate_expr(call.args[1])
1767
+ # Also check right side
1768
+ if isinstance(condition.right, ast.Call):
1769
+ call = condition.right
1770
+ if (isinstance(call.callee, ast.Identifier)
1771
+ and call.callee.name in ("similarity", "Cosine")
1772
+ and len(call.args) >= 2):
1773
+ return self._translate_expr(call.args[1])
1774
+ # Fallback: translate the whole condition as an expression that
1775
+ # produces the target vector. The programmer should use
1776
+ # geometric_loop() directly for complex cases.
1777
+ return self._translate_expr(condition)
1778
+
1779
+ def _extract_for_bound(self, condition) -> int:
1780
+ """Extract the iteration count from a for-loop condition.
1781
+
1782
+ Handles `i < N` where N is an integer literal.
1783
+ Returns 20 as default if the bound can't be extracted.
1784
+ """
1785
+ if condition is None:
1786
+ return 20
1787
+ if isinstance(condition, ast.BinaryOp) and condition.op == "<":
1788
+ if isinstance(condition.right, ast.IntLiteral):
1789
+ return condition.right.value
1790
+ return 20
1791
+
1792
+ # -- expressions ------------------------------------------------------
1793
+
1794
+ def _char_literal_src(self, expr: ast.CharLiteral) -> str:
1795
+ """Override point for per-backend char literal lowering.
1796
+
1797
+ Char literals depend on the number-axis runtime, which the
1798
+ CPU IR and PyTorch backends implement via the extended-state
1799
+ layout. Base refuses; concrete backends override.
1800
+ """
1801
+ raise CodegenNotSupported(
1802
+ expr,
1803
+ "character literals require a number-axis runtime "
1804
+ "(extended-state layout) — overridden by the concrete backends",
1805
+ )
1806
+
1807
+ def _unknown_literal_src(self, expr: ast.UnknownLiteral) -> str:
1808
+ """Override point for the `unknown` keyword — truth-axis neutral.
1809
+
1810
+ Truth-axis representation lives on the extended-state-vector
1811
+ runtime. Base refuses; concrete backends override to emit
1812
+ `_VSA.make_truth(0.0)`.
1813
+ """
1814
+ raise CodegenNotSupported(
1815
+ expr,
1816
+ "`unknown` requires a truth-axis runtime "
1817
+ "(extended-state layout) — overridden by the concrete backends",
1818
+ )
1819
+
1820
+ def _imaginary_literal_src(self, expr: ast.ImaginaryLiteral) -> str:
1821
+ """Override point for `5i`-style imaginary literals.
1822
+
1823
+ Same extended-state-vector dependency as the truth-axis and
1824
+ char literals. Base refuses; concrete backends override to emit
1825
+ `_VSA.make_complex(0.0, magnitude)`.
1826
+ """
1827
+ raise CodegenNotSupported(
1828
+ expr,
1829
+ "imaginary literals require a complex-plane runtime "
1830
+ "(extended-state layout) — overridden by the concrete backends",
1831
+ )
1832
+
1833
+ def _complex_literal_src(self, expr: ast.ComplexLiteral) -> str:
1834
+ """Override point for fold-produced `ComplexLiteral(re, im)` nodes.
1835
+
1836
+ Only produced by the simplifier folding `N + Mi` / `N - Mi`
1837
+ patterns. Base refuses; concrete backends override.
1838
+ """
1839
+ raise CodegenNotSupported(
1840
+ expr,
1841
+ "complex literals require a complex-plane runtime "
1842
+ "(extended-state layout) — overridden by the concrete backends",
1843
+ )
1844
+
1845
+ def _bool_literal_src(self, expr: ast.BoolLiteral) -> str:
1846
+ """Override point for `true` / `false` lowering.
1847
+
1848
+ Base emits the Python literals directly. Concrete backends
1849
+ override to emit `_VSA.make_truth(±1.0)` so the entire runtime
1850
+ is vector-native (no Python-bool / vector split).
1851
+ """
1852
+ return "True" if expr.value else "False"
1853
+
1854
+ def _logical_op_src(self, expr: ast.BinaryOp, op: str,
1855
+ left_src: str, right_src: str) -> str:
1856
+ """Override point for `&&` and `||` on truth-axis values.
1857
+
1858
+ Without a truth-axis runtime the Zadeh-min / max semantics
1859
+ can't be honored, and a silent Python `and`/`or` fallback
1860
+ would be wrong for fuzzy operands. Base refuses; concrete
1861
+ backends override and implement properly.
1862
+ """
1863
+ raise CodegenNotSupported(
1864
+ expr,
1865
+ f"logical `{op}` requires a truth-axis runtime "
1866
+ "(extended-state layout) — overridden by the concrete backends",
1867
+ )
1868
+
1869
+ def _logical_not_src(self, expr: ast.UnaryOp, operand_src: str) -> str:
1870
+ """Override point for `!` (logical not) on truth-axis values.
1871
+
1872
+ Base refuses — the spec-aligned lowering is truth-axis
1873
+ negation, which requires the extended-state runtime.
1874
+ (The earlier permutation-based NOT was retired as a category
1875
+ error.)
1876
+ """
1877
+ raise CodegenNotSupported(
1878
+ expr,
1879
+ "source-level `!` is not yet lowered by the V1 codegen base; "
1880
+ "the spec-aligned lowering is truth-axis negation, "
1881
+ "implemented by the concrete backends",
1882
+ )
1883
+
1884
+ def _equality_src(self, expr: ast.BinaryOp, op: str,
1885
+ left_src: str, right_src: str) -> str:
1886
+ """Override point for `==` / `!=` on vectors.
1887
+
1888
+ Base refuses — naive Python `a == b` on numpy arrays returns
1889
+ an element-wise boolean array which then explodes with
1890
+ 'ambiguous truth value' when used in any boolean context.
1891
+ The spec-aligned lowering is cosine-similarity projected onto
1892
+ the truth axis, which requires the extended-state runtime.
1893
+ Numpy / pytorch override.
1894
+ """
1895
+ raise CodegenNotSupported(
1896
+ expr,
1897
+ f"source-level `{'==' if op == 'eq' else '!='}` on vectors "
1898
+ "is not supported by this backend; the spec-aligned lowering "
1899
+ "is cosine-similarity on the truth axis (numpy / pytorch only)",
1900
+ )
1901
+
1902
+ def _is_complex_expr(self, expr: ast.Expr) -> bool:
1903
+ """True iff expr is provably a complex-plane value at compile time.
1904
+
1905
+ Conservative: returns True only for cases the codegen can be
1906
+ certain about without full type inference. Returning False just
1907
+ means `*` falls through to element-wise multiply, so wrong
1908
+ answers only happen if the caller passes a complex-typed
1909
+ runtime value through a code path the compiler can't see.
1910
+ """
1911
+ if isinstance(expr, (ast.ComplexLiteral, ast.ImaginaryLiteral)):
1912
+ return True
1913
+ if isinstance(expr, ast.Identifier):
1914
+ return self._var_type.get(expr.name) == "complex"
1915
+ if isinstance(expr, ast.Parenthesized):
1916
+ return self._is_complex_expr(expr.inner)
1917
+ # Recurse into BinaryOp: if either side of an inner arithmetic
1918
+ # expression is complex, the whole expression is complex-typed.
1919
+ if isinstance(expr, ast.BinaryOp) and expr.op in ("+", "-", "*"):
1920
+ return (self._is_complex_expr(expr.left)
1921
+ or self._is_complex_expr(expr.right))
1922
+ if isinstance(expr, ast.UnaryOp) and expr.op in ("-", "+"):
1923
+ return self._is_complex_expr(expr.operand)
1924
+ return False
1925
+
1926
+ _TRUTH_TYPES = frozenset({"bool", "fuzzy", "trit"})
1927
+ _NUMBER_TYPES = frozenset({"int", "float", "complex", "scalar", "char"})
1928
+
1929
+ def _is_number_expr(self, expr: ast.Expr) -> bool:
1930
+ """True iff expr is provably a number-axis value at compile time.
1931
+
1932
+ Used by `<` / `>` dispatch to decide whether to route through
1933
+ the substrate's number-axis comparison. Numeric literals,
1934
+ number-typed identifiers, and unary +/- on same all qualify.
1935
+ Conservative — unknown types fall through to Python scalar
1936
+ comparison, which still handles plain Python ints / floats.
1937
+ """
1938
+ if isinstance(expr, (ast.IntLiteral, ast.FloatLiteral,
1939
+ ast.ImaginaryLiteral, ast.ComplexLiteral,
1940
+ ast.CharLiteral)):
1941
+ return True
1942
+ if isinstance(expr, ast.Identifier):
1943
+ return self._var_type.get(expr.name) in self._NUMBER_TYPES
1944
+ if isinstance(expr, ast.Parenthesized):
1945
+ return self._is_number_expr(expr.inner)
1946
+ if isinstance(expr, ast.BinaryOp) and expr.op in ("+", "-", "*", "/", "%"):
1947
+ return (self._is_number_expr(expr.left)
1948
+ or self._is_number_expr(expr.right))
1949
+ if isinstance(expr, ast.UnaryOp) and expr.op in ("-", "+"):
1950
+ return self._is_number_expr(expr.operand)
1951
+ return False
1952
+
1953
+ def _is_truth_expr(self, expr: ast.Expr) -> bool:
1954
+ """True iff expr is provably a truth-axis value at compile time.
1955
+
1956
+ Used by `<` / `>` / `<=` / `>=` dispatch so comparisons route
1957
+ through the polynomial form when operands are truth-family,
1958
+ and fall back to Python scalar comparison for plain numbers.
1959
+ Conservative: bool literals, unknown, truth-typed identifiers,
1960
+ and the output of logical / comparison / equality operators
1961
+ all count.
1962
+ """
1963
+ if isinstance(expr, (ast.BoolLiteral, ast.UnknownLiteral)):
1964
+ return True
1965
+ if isinstance(expr, ast.Identifier):
1966
+ return self._var_type.get(expr.name) in self._TRUTH_TYPES
1967
+ if isinstance(expr, ast.Parenthesized):
1968
+ return self._is_truth_expr(expr.inner)
1969
+ # Logical / comparison / equality ops all return truth-axis
1970
+ # values, so an expression built from them is truth-typed too.
1971
+ if isinstance(expr, ast.BinaryOp):
1972
+ if expr.op in ("&&", "||", "==", "!=", "<", ">", "<=", ">="):
1973
+ return True
1974
+ # Pass-through through arithmetic on truth-axis operands
1975
+ # (rare, but `fuzzy_a - fuzzy_b` is a truth-axis vector).
1976
+ if expr.op in ("+", "-"):
1977
+ return (self._is_truth_expr(expr.left)
1978
+ or self._is_truth_expr(expr.right))
1979
+ if isinstance(expr, ast.UnaryOp) and expr.op == "!":
1980
+ return True
1981
+ if isinstance(expr, ast.UnaryOp) and expr.op in ("-", "+"):
1982
+ return self._is_truth_expr(expr.operand)
1983
+ return False
1984
+
1985
+ def _complex_mul_src(self, expr: ast.BinaryOp,
1986
+ left_src: str, right_src: str) -> str:
1987
+ """Override point for `complex * anything` / `anything * complex`.
1988
+
1989
+ Base refuses — the complex-multiplication runtime lives on
1990
+ the extended-state backends (numpy / pytorch). Fly-brain
1991
+ has no real/imag-axis representation.
1992
+ """
1993
+ raise CodegenNotSupported(
1994
+ expr,
1995
+ "complex multiplication is not supported by this backend "
1996
+ "(no real/imag-axis runtime); use the numpy or pytorch backend",
1997
+ )
1998
+
1999
+ def _comparison_src(self, expr: ast.BinaryOp, op: str,
2000
+ left_src: str, right_src: str) -> str:
2001
+ """Override point for `<` / `>` / `<=` / `>=` on number-axis values.
2002
+
2003
+ `op` is `gt` or `lt` — `>=` maps to `gt` and `<=` to `lt`
2004
+ (ties give 0 = unknown in both, so the strict / non-strict
2005
+ distinction collapses). The numpy / pytorch backends project
2006
+ both operands onto the real axis, subtract, and sign the
2007
+ result onto the truth axis. Fly-brain refuses — no number-
2008
+ axis runtime.
2009
+ """
2010
+ raise CodegenNotSupported(
2011
+ expr,
2012
+ f"ordered comparison `{expr.op}` is not supported by this "
2013
+ "backend; use the numpy or pytorch backend",
2014
+ )
2015
+
2016
+ def _translate_expr(self, expr: ast.Expr, *, map_key_type: str | None = None) -> str:
2017
+ if isinstance(expr, ast.StringLiteral):
2018
+ return repr(expr.value)
2019
+ if isinstance(expr, ast.IntLiteral):
2020
+ return repr(expr.value)
2021
+ if isinstance(expr, ast.FloatLiteral):
2022
+ return repr(expr.value)
2023
+ if isinstance(expr, ast.CharLiteral):
2024
+ return self._char_literal_src(expr)
2025
+ if isinstance(expr, ast.ImaginaryLiteral):
2026
+ return self._imaginary_literal_src(expr)
2027
+ if isinstance(expr, ast.ComplexLiteral):
2028
+ return self._complex_literal_src(expr)
2029
+ if isinstance(expr, ast.BoolLiteral):
2030
+ return self._bool_literal_src(expr)
2031
+ if isinstance(expr, ast.UnknownLiteral):
2032
+ return self._unknown_literal_src(expr)
2033
+ if isinstance(expr, ast.Identifier):
2034
+ # `iterator`: contextual keyword inside an unrolling
2035
+ # `loop (N) { ... }` body. The bounded-loop translator
2036
+ # sets self._iterator_value to the current iteration's
2037
+ # constant (1..N) before translating each copy of the
2038
+ # body; here we substitute the literal. Outside an
2039
+ # unrolling context, the reference is a compile error.
2040
+ if expr.name == "iterator":
2041
+ # Two contexts where `iterator` is meaningful:
2042
+ # 1. Compile-time-unrolled `loop (N) { ... }`: substitute
2043
+ # the literal int (1..N) — handled via _iterator_value.
2044
+ # 2. Runtime `iterative_loop NAME(N, ...) { ... }`: refer
2045
+ # to the Python local `_iterator` (1-indexed tick count
2046
+ # set by the cell). Handled via _iterator_runtime_in_scope.
2047
+ if self._iterator_runtime_in_scope:
2048
+ return "_iterator"
2049
+ if self._iterator_value is None:
2050
+ raise CodegenNotSupported(
2051
+ expr,
2052
+ "`iterator` is only valid inside an unrolling "
2053
+ "`loop (N) { ... }` body or an `iterative_loop` "
2054
+ "function body. Use `loop (N as i)` and reference "
2055
+ "`i` for the compile-time named-index form.",
2056
+ )
2057
+ return repr(self._iterator_value)
2058
+ if expr.name == "element":
2059
+ # Contextual: only valid inside a foreach_loop function
2060
+ # body. Refers to the current array element on this tick
2061
+ # — the Python local `_element` set by the cell via
2062
+ # `_VSA.array_get(arr_param, _t)`.
2063
+ if not self._element_runtime_in_scope:
2064
+ raise CodegenNotSupported(
2065
+ expr,
2066
+ "`element` is only valid inside a `foreach_loop` "
2067
+ "function body, where it binds to the current "
2068
+ "array element each tick.",
2069
+ )
2070
+ return "_element"
2071
+ # If this identifier names a slot-bound variable, emit
2072
+ # the slot_load call instead of a bare name reference.
2073
+ # The slot table is per-function-scope.
2074
+ if expr.name in self._slot_vars:
2075
+ idx = self._slot_vars[expr.name]
2076
+ return f"_VSA.slot_load(_slot_state, {idx})"
2077
+ return expr.name
2078
+ if isinstance(expr, ast.Parenthesized):
2079
+ return f"({self._translate_expr(expr.inner)})"
2080
+ if isinstance(expr, ast.ArrayLiteral):
2081
+ inner = ", ".join(self._translate_expr(e) for e in expr.elements)
2082
+ return f"[{inner}]"
2083
+ if isinstance(expr, ast.MapLiteral):
2084
+ if map_key_type == "vector":
2085
+ pairs = ", ".join(
2086
+ f"({self._translate_expr(k)}, {self._translate_expr(v)})"
2087
+ for k, v in zip(expr.keys, expr.values)
2088
+ )
2089
+ return f"[{pairs}]"
2090
+ # Non-vector keys: real Python dict.
2091
+ pairs = ", ".join(
2092
+ f"{self._translate_expr(k)}: {self._translate_expr(v)}"
2093
+ for k, v in zip(expr.keys, expr.values)
2094
+ )
2095
+ return "{" + pairs + "}"
2096
+ if isinstance(expr, ast.Subscript):
2097
+ target_src = self._translate_expr(expr.target)
2098
+ index_src = self._translate_expr(expr.index)
2099
+ # dict<K, V> subscripts route through the rotation-hashmap.
2100
+ if (isinstance(expr.target, ast.Identifier)
2101
+ and expr.target.name in self._dict_declared):
2102
+ return f"_VSA.hashmap_get({target_src}, {index_src})"
2103
+ # Vector-keyed map lookups route through the identity-first helper.
2104
+ if (isinstance(expr.target, ast.Identifier)
2105
+ and self._map_key_type.get(expr.target.name) == "vector"):
2106
+ return f"_vector_map_lookup({target_src}, {index_src})"
2107
+ return f"{target_src}[{index_src}]"
2108
+ if isinstance(expr, ast.Call):
2109
+ return self._translate_call(expr)
2110
+ if isinstance(expr, ast.BinaryOp):
2111
+ left = self._translate_expr(expr.left)
2112
+ right = self._translate_expr(expr.right)
2113
+ # Logical operators dispatch through the substrate so they
2114
+ # work uniformly on bool / fuzzy / trit / truth-axis-vector
2115
+ # inputs. Zadeh fuzzy logic — min for AND, max for OR — on
2116
+ # the truth axis. See _logical_op_src for the override hook.
2117
+ if expr.op == "&&":
2118
+ return self._logical_op_src(expr, "and", left, right)
2119
+ if expr.op == "||":
2120
+ return self._logical_op_src(expr, "or", left, right)
2121
+ # Vector equality / inequality — cosine similarity projected
2122
+ # onto the truth axis. `a == b` returns a truth-axis vector
2123
+ # (a fuzzy), not a Python bool. Hook dispatched so backends
2124
+ # without a truth-axis runtime can refuse instead of emitting
2125
+ # Python == which does the wrong thing on numpy arrays.
2126
+ if expr.op == "==":
2127
+ return self._equality_src(expr, "eq", left, right)
2128
+ if expr.op == "!=":
2129
+ return self._equality_src(expr, "neq", left, right)
2130
+ # Complex multiplication dispatch: if either operand is
2131
+ # provably a complex-plane value (literal or complex-typed
2132
+ # variable), route `*` through the substrate's complex_mul
2133
+ # rather than element-wise Python multiply. Real-only
2134
+ # multiplication (int * int, float * float) stays on the
2135
+ # Python scalar fast path — there's no need to box scalars
2136
+ # into d-dim vectors to compute 5 * 3.
2137
+ if expr.op == "*" and (self._is_complex_expr(expr.left)
2138
+ or self._is_complex_expr(expr.right)):
2139
+ return self._complex_mul_src(expr, left, right)
2140
+ # Ordered comparison `>` / `<` / `>=` / `<=` is number-axis
2141
+ # only. Strict (>, <) give -1 on ties; non-strict (>=, <=)
2142
+ # give +1 on ties. Four distinct runtime methods — gt / lt
2143
+ # for strict, ge / le for non-strict. Truth-family operands
2144
+ # are rejected at compile time; plain Python scalars fall
2145
+ # through to Python's own comparison (which is fine for
2146
+ # int / float).
2147
+ _CMP_OP_NAMES = {">": "gt", "<": "lt", ">=": "ge", "<=": "le"}
2148
+ if expr.op in _CMP_OP_NAMES:
2149
+ if (self._is_truth_expr(expr.left)
2150
+ or self._is_truth_expr(expr.right)):
2151
+ raise CodegenNotSupported(
2152
+ expr,
2153
+ f"ordered comparison `{expr.op}` is not defined "
2154
+ "on truth-axis values (bool / fuzzy / trit); "
2155
+ "comparison is a number-axis operation. "
2156
+ "Override the operator on a custom class if you "
2157
+ "need comparison semantics for a truth-family type."
2158
+ )
2159
+ if (self._is_number_expr(expr.left)
2160
+ or self._is_number_expr(expr.right)):
2161
+ return self._comparison_src(
2162
+ expr, _CMP_OP_NAMES[expr.op], left, right
2163
+ )
2164
+ return f"({left} {expr.op} {right})"
2165
+ if isinstance(expr, ast.UnaryOp):
2166
+ if expr.op == "!":
2167
+ return self._logical_not_src(expr, self._translate_expr(expr.operand))
2168
+ return f"({expr.op}{self._translate_expr(expr.operand)})"
2169
+ if isinstance(expr, ast.ThisExpr):
2170
+ return "this"
2171
+ if isinstance(expr, ast.MemberAccess):
2172
+ return f"{self._translate_expr(expr.obj)}.{expr.member}"
2173
+ if isinstance(expr, ast.EmbedExpr):
2174
+ return self._embed_expr_src(expr)
2175
+ if isinstance(expr, ast.DefuzzyExpr):
2176
+ return self._defuzzy_expr_src(expr)
2177
+ raise CodegenNotSupported(
2178
+ expr, f"unsupported expression: {type(expr).__name__}"
2179
+ )
2180
+
2181
+ def _embed_expr_src(self, expr: ast.EmbedExpr) -> str:
2182
+ """Override point for per-backend `embed(<expr>)` lowering.
2183
+
2184
+ Base refuses — no frozen-LLM embedding runtime here.
2185
+ Concrete backends override to emit `_VSA.embed(<inner>)`.
2186
+ """
2187
+ raise CodegenNotSupported(
2188
+ expr,
2189
+ "embed(...) requires a frozen-LLM embedding runtime — "
2190
+ "overridden by the concrete backends",
2191
+ )
2192
+
2193
+ def _defuzzy_expr_src(self, expr: ast.DefuzzyExpr) -> str:
2194
+ """Override point for `defuzzy(<expr>)` lowering.
2195
+
2196
+ Base refuses — no truth-axis runtime to project onto.
2197
+ Concrete backends override to emit `_VSA.defuzzify(<inner>)`
2198
+ which matmul-projects onto the truth axis then iterates
2199
+ `eq(., true)` N times (default 10, matching the user's
2200
+ stated semantics).
2201
+ """
2202
+ raise CodegenNotSupported(
2203
+ expr,
2204
+ "defuzzy(...) requires a truth-axis runtime "
2205
+ "(extended-state layout) — overridden by the concrete backends",
2206
+ )
2207
+
2208
+ def _translate_call(self, call: ast.Call) -> str:
2209
+ # Resolve the callee: we only support direct calls to a VSA builtin
2210
+ # identifier in V1. User-defined function calls *within* the module
2211
+ # do work because they emit as plain Python function calls.
2212
+ callee = call.callee
2213
+ if isinstance(callee, ast.Identifier):
2214
+ name = callee.name
2215
+ if (name == "bundle"
2216
+ and len(call.args) >= 2
2217
+ and all(_is_bind_call(a) for a in call.args)):
2218
+ pair_srcs = []
2219
+ for bind_call in call.args:
2220
+ role_src = self._translate_expr(bind_call.args[0])
2221
+ filler_src = self._translate_expr(bind_call.args[1])
2222
+ pair_srcs.append(f"({role_src}, {filler_src})")
2223
+ return f"_VSA.bundle_of_binds({', '.join(pair_srcs)})"
2224
+ if name in ("hasOrder", "hasOrderOrEqual"):
2225
+ for arg in call.args:
2226
+ if isinstance(arg, ast.Call):
2227
+ raise CodegenNotSupported(
2228
+ call,
2229
+ f"`{name}(...)` with a nested `Equals(...)` "
2230
+ "group arg is reserved syntax — produced by "
2231
+ "the parser for source like `a == b > c == "
2232
+ "d > e` (equality groups separated by "
2233
+ "ordering). The expansion (chain-AND with "
2234
+ "internal group equality plus cross-group "
2235
+ "ordering) is not yet wired in codegen. "
2236
+ "For now, rewrite the comparison without "
2237
+ "the mixed `==` and ordering pattern (e.g. "
2238
+ "as separate AND-joined comparisons).",
2239
+ )
2240
+ if name in BUILTINS:
2241
+ emitter, arity = BUILTINS[name]
2242
+ if arity is not None and len(call.args) != arity:
2243
+ raise CodegenNotSupported(
2244
+ call,
2245
+ f"builtin `{name}` expects {arity} argument(s), "
2246
+ f"got {len(call.args)}",
2247
+ )
2248
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2249
+ return emitter(arg_srcs)
2250
+ if name in _TRANSCENDENTALS_DISABLED:
2251
+ raise CodegenNotSupported(
2252
+ call,
2253
+ f"transcendental intrinsic `{name}` is not implemented. "
2254
+ f"The 2026-04-29 implementation was withdrawn 2026-04-30 "
2255
+ f"because it ran as host Python scalar arithmetic at "
2256
+ f"runtime, violating the substrate-purity contract. "
2257
+ f"See `sdk/sutra-compiler/sutra_compiler/stdlib/math.su` "
2258
+ f"and `planning/findings/2026-04-30-runtime-substrate-purity-audit.md` "
2259
+ f"for the rationale and the eigenrotation-as-modulus future direction.",
2260
+ )
2261
+ # Stdlib intrinsic? Route to the runtime class so the leaf
2262
+ # primitive (dot, sqrt, tanh, make_truth, embed, ...) is
2263
+ # dispatched to _VSA.<name>(...) instead of a bare identifier
2264
+ # call that would fail to resolve in the emitted Python.
2265
+ from .stdlib_loader import intrinsic_names
2266
+ if name in intrinsic_names():
2267
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2268
+ return f"_VSA.{name}({', '.join(arg_srcs)})"
2269
+ # User-defined call: emit as-is.
2270
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2271
+ return f"{name}({', '.join(arg_srcs)})"
2272
+ if isinstance(callee, ast.MemberAccess):
2273
+ # `this.method(args)` from inside a class method body —
2274
+ # dispatch to `{CurrentClass}_{method}(this, *args)`. The
2275
+ # current class name is captured in
2276
+ # `_current_class_name` while a class method body is
2277
+ # being emitted.
2278
+ if (isinstance(callee.obj, ast.ThisExpr)
2279
+ and self._current_class_name is not None):
2280
+ cls_name = self._current_class_name
2281
+ method_name = callee.member
2282
+ if (cls_name in self._class_instance_methods
2283
+ and method_name in self._class_instance_methods[cls_name]):
2284
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2285
+ all_args = ["this", *arg_srcs]
2286
+ return f"{cls_name}_{method_name}({', '.join(all_args)})"
2287
+ if (cls_name in self._class_static_methods
2288
+ and method_name in self._class_static_methods[cls_name]):
2289
+ # `this.staticMethod(...)` — surfaces as a class-
2290
+ # namespace call too (static doesn't take `this`).
2291
+ if (cls_name in self._class_intrinsic_methods
2292
+ and method_name in self._class_intrinsic_methods[cls_name]):
2293
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2294
+ return f"_VSA.{method_name}({', '.join(arg_srcs)})"
2295
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2296
+ return f"{cls_name}_{method_name}({', '.join(arg_srcs)})"
2297
+ # Class-namespace dispatch: `Math.log(x)` where `Math` is a
2298
+ # declared class and `log` is a static method on it. We
2299
+ # emit it as `Math_log(x)` — the mangled name that
2300
+ # _translate_class_method registered. Instance method
2301
+ # dispatch (`g.Hello()` on a Greeter instance) is not
2302
+ # wired today; that path falls through to the generic
2303
+ # `obj.member(args)` form which works iff `obj` is a
2304
+ # native Python object that already has the method (e.g.
2305
+ # vector accessors handled in the numpy-backend override).
2306
+ if isinstance(callee.obj, ast.Identifier):
2307
+ # Instance dispatch on an Axon-typed local:
2308
+ # `a.add(k, v)` and `a.item(k)` route to the runtime
2309
+ # axon methods with `a` as the first argument.
2310
+ # Statement-context augmented-assignment (rebinding `a`
2311
+ # for void-returning calls) is handled in
2312
+ # `_translate_stmt`; this expression path always emits
2313
+ # the call as a value.
2314
+ if callee.obj.name in self._axon_declared:
2315
+ method_name = callee.member
2316
+ runtime_name = {
2317
+ "add": "axon_add",
2318
+ "item": "axon_item",
2319
+ }.get(method_name)
2320
+ if runtime_name is not None:
2321
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2322
+ all_args = [callee.obj.name] + arg_srcs
2323
+ return f"_VSA.{runtime_name}({', '.join(all_args)})"
2324
+ # String runtime methods: when an expression like
2325
+ # `<expr>.string_length()` appears, route to the runtime
2326
+ # regardless of whether the receiver is a known typed
2327
+ # local. The receiver is whatever expression evaluates
2328
+ # there at runtime; the String runtime methods accept any
2329
+ # tensor and behave correctly when the AXIS_STRING_FLAG is
2330
+ # set. Same convention will extend to other class-bound
2331
+ # runtime intrinsics as the language adds them.
2332
+ _RUNTIME_INSTANCE_METHODS = {
2333
+ "string_length", "string_char_at", "is_string",
2334
+ }
2335
+ if callee.member in _RUNTIME_INSTANCE_METHODS:
2336
+ obj_src = self._translate_expr(callee.obj)
2337
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2338
+ all_args = [obj_src] + arg_srcs
2339
+ return f"_VSA.{callee.member}({', '.join(all_args)})"
2340
+ if isinstance(callee.obj, ast.Identifier):
2341
+ # General instance dispatch: when `obj` is a typed
2342
+ # local whose declared type is a known class with the
2343
+ # called method, route to the appropriate static-form
2344
+ # name (`_VSA.<name>` for intrinsics, `Class_<name>`
2345
+ # for non-intrinsic instance methods). This is the
2346
+ # generalized version of the axon hardcode above —
2347
+ # the rule "any class's instance method is callable
2348
+ # via dot syntax on a typed receiver" applies to all
2349
+ # classes, not just Axon. Statement-context
2350
+ # augmented-assignment for void-returning methods is
2351
+ # handled in `_translate_stmt`; this path is the
2352
+ # expression-form translation.
2353
+ obj_name = callee.obj.name
2354
+ if obj_name in self._var_type:
2355
+ obj_class = self._var_type[obj_name]
2356
+ method_name_g = callee.member
2357
+ if (obj_class in self._class_intrinsic_methods
2358
+ and method_name_g in self._class_intrinsic_methods[obj_class]):
2359
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2360
+ all_args = [obj_name] + arg_srcs
2361
+ return f"_VSA.{method_name_g}({', '.join(all_args)})"
2362
+ if (obj_class in self._class_instance_methods
2363
+ and method_name_g in self._class_instance_methods[obj_class]):
2364
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2365
+ all_args = [obj_name] + arg_srcs
2366
+ return f"{obj_class}_{method_name_g}({', '.join(all_args)})"
2367
+ cls_name = callee.obj.name
2368
+ method_name = callee.member
2369
+ # Intrinsic methods on a class route directly to the
2370
+ # runtime: `Math.log(x)` -> `_VSA.log(x)`. The mangled
2371
+ # wrapper isn't emitted for intrinsic-marked methods.
2372
+ if (cls_name in self._class_intrinsic_methods
2373
+ and method_name in self._class_intrinsic_methods[cls_name]):
2374
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2375
+ return f"_VSA.{method_name}({', '.join(arg_srcs)})"
2376
+ if (cls_name in self._class_static_methods
2377
+ and method_name in self._class_static_methods[cls_name]):
2378
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2379
+ return f"{cls_name}_{method_name}({', '.join(arg_srcs)})"
2380
+ # Non-static class method called via class-namespace
2381
+ # syntax: `Greeter.Hello(g, ...)`. The first arg is the
2382
+ # instance and becomes `this` inside the method body.
2383
+ # The mangled function takes `this` as its first param,
2384
+ # so we just emit the args straight through — Python
2385
+ # doesn't care about the param name at the call site.
2386
+ if (cls_name in self._class_instance_methods
2387
+ and method_name in self._class_instance_methods[cls_name]):
2388
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2389
+ return f"{cls_name}_{method_name}({', '.join(arg_srcs)})"
2390
+ arg_srcs = [self._translate_expr(a) for a in call.args]
2391
+ return f"{self._translate_expr(callee)}({', '.join(arg_srcs)})"
2392
+ raise CodegenNotSupported(
2393
+ call, f"unsupported callee expression: {type(callee).__name__}"
2394
+ )
2395
+
2396
+
2397
+ # ---------------------------------------------------------------------
2398
+ # Helpers
2399
+ # ---------------------------------------------------------------------
2400
+
2401
+ def _has_slot_decl(block: ast.Block) -> bool:
2402
+ """True iff `block` contains a `slot TYPE name [= expr];`
2403
+ declaration anywhere in its statement list. Used by
2404
+ `_translate_function_decl` to decide whether to emit the
2405
+ `_slot_state = _VSA.zero_vector()` initializer at the top of
2406
+ the function body. Doesn't recurse into nested control flow —
2407
+ if a slot decl appears inside a loop / branch, it'll still
2408
+ work at runtime (the slot_store call references _slot_state),
2409
+ so the only thing this scan affects is whether _slot_state is
2410
+ initialized when the function has zero slot decls at the top
2411
+ level.
2412
+ """
2413
+ if block is None:
2414
+ return False
2415
+ for stmt in block.statements:
2416
+ if isinstance(stmt, ast.VarDecl) and stmt.is_slot:
2417
+ return True
2418
+ # Recurse into common containers so a slot decl inside an
2419
+ # if/loop branch still triggers the init.
2420
+ if isinstance(stmt, ast.IfStmt):
2421
+ if _has_slot_decl(stmt.then_branch):
2422
+ return True
2423
+ if stmt.else_branch is not None and _has_slot_decl(stmt.else_branch):
2424
+ return True
2425
+ elif isinstance(stmt, ast.Block):
2426
+ if _has_slot_decl(stmt):
2427
+ return True
2428
+ elif isinstance(stmt, ast.LoopStmt):
2429
+ if _has_slot_decl(stmt.body):
2430
+ return True
2431
+ elif isinstance(stmt, ast.WhileStmt):
2432
+ if _has_slot_decl(stmt.body):
2433
+ return True
2434
+ return False
2435
+
2436
+