sutra-dev 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sutra_compiler/__init__.py +49 -0
- sutra_compiler/__main__.py +514 -0
- sutra_compiler/ast_nodes.py +553 -0
- sutra_compiler/codegen.py +1811 -0
- sutra_compiler/codegen_base.py +2436 -0
- sutra_compiler/codegen_pytorch.py +1472 -0
- sutra_compiler/diagnostics.py +145 -0
- sutra_compiler/inliner.py +581 -0
- sutra_compiler/lexer.py +821 -0
- sutra_compiler/parser.py +2112 -0
- sutra_compiler/review.py +322 -0
- sutra_compiler/simplify.py +1046 -0
- sutra_compiler/simplify_egglog.py +674 -0
- sutra_compiler/stdlib/axons.su +53 -0
- sutra_compiler/stdlib/embed.su +48 -0
- sutra_compiler/stdlib/javascript_object.su +18 -0
- sutra_compiler/stdlib/logic.su +202 -0
- sutra_compiler/stdlib/math.su +12 -0
- sutra_compiler/stdlib/memory.su +82 -0
- sutra_compiler/stdlib/numbers.su +99 -0
- sutra_compiler/stdlib/rotation.su +83 -0
- sutra_compiler/stdlib/similarity.su +97 -0
- sutra_compiler/stdlib/strings.su +56 -0
- sutra_compiler/stdlib/tensor.su +82 -0
- sutra_compiler/stdlib/vectors.su +119 -0
- sutra_compiler/stdlib_loader.py +219 -0
- sutra_compiler/sutradb_embedded.py +273 -0
- sutra_compiler/trace.py +135 -0
- sutra_compiler/validator.py +552 -0
- sutra_compiler/workspace.py +655 -0
- sutra_dev-0.2.0.dist-info/METADATA +80 -0
- sutra_dev-0.2.0.dist-info/RECORD +36 -0
- sutra_dev-0.2.0.dist-info/WHEEL +5 -0
- sutra_dev-0.2.0.dist-info/entry_points.txt +2 -0
- sutra_dev-0.2.0.dist-info/licenses/LICENSE +201 -0
- sutra_dev-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1811 @@
|
|
|
1
|
+
"""AST -> Python source translator — DEPRECATED numpy backend.
|
|
2
|
+
|
|
3
|
+
**STATUS: DEPRECATED.** PyTorch is the canonical codegen target.
|
|
4
|
+
This file emits a `_NumpyVSA` runtime class and is retained only
|
|
5
|
+
as an emit-shape reference for the tests; new code should use
|
|
6
|
+
`codegen_pytorch.PyTorchCodegen`.
|
|
7
|
+
|
|
8
|
+
It still provides:
|
|
9
|
+
|
|
10
|
+
- The literal-lowering hooks (`_char_literal_src`, `_embed_expr_src`,
|
|
11
|
+
`_bool_literal_src`, `_equality_src`, `_complex_mul_src`, etc.)
|
|
12
|
+
that `PyTorchCodegen` inherits from. These are backend-agnostic
|
|
13
|
+
(they emit `_VSA.X(...)` calls; both runtime classes implement
|
|
14
|
+
the same method names).
|
|
15
|
+
- The `_emit_prelude` numpy runtime emit, which a few tests
|
|
16
|
+
(`test_codegen.py`, `test_inliner.py`) still assert against for
|
|
17
|
+
emit-shape verification.
|
|
18
|
+
- `_translate_eigenrotation_loop` (numpy-specific).
|
|
19
|
+
|
|
20
|
+
**Migration path** (queue item 6): move literal hooks into
|
|
21
|
+
`BaseCodegen`, make `PyTorchCodegen` extend `BaseCodegen` directly,
|
|
22
|
+
then delete this file. Tests that assert on numpy-specific emit
|
|
23
|
+
shapes either move to PyTorch-equivalent assertions or get retired.
|
|
24
|
+
|
|
25
|
+
**For new test code:** import `PyTorchCodegen` from
|
|
26
|
+
`codegen_pytorch` and use `cg.translate(module)`. The runtime
|
|
27
|
+
emit (the `_TorchVSA` class) is the same shape as `_NumpyVSA`
|
|
28
|
+
with torch tensors instead of numpy ndarrays.
|
|
29
|
+
|
|
30
|
+
`snap` is not supported here (this substrate has no cleanup circuit).
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
from typing import List
|
|
36
|
+
|
|
37
|
+
from . import ast_nodes as ast
|
|
38
|
+
from .codegen_base import BaseCodegen, CodegenNotSupported
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Codegen(BaseCodegen):
|
|
42
|
+
"""Emits a self-contained Sutra module against the default runtime.
|
|
43
|
+
|
|
44
|
+
Overrides the prelude and rejects `snap()` at codegen time. Everything
|
|
45
|
+
else (function bodies, bind/bundle/unbind/similarity/argmax_cosine,
|
|
46
|
+
map lookup, loop unrolling) is inherited unchanged.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
# Frozen-LLM substrate. The numpy backend runs on frozen LLM
|
|
50
|
+
# embeddings via Ollama — no random-vector fallback. If Ollama is
|
|
51
|
+
# unavailable or the model is missing, compiled programs raise.
|
|
52
|
+
# Default model: nomic-embed-text (768-dim). mxbai-embed-large
|
|
53
|
+
# has a documented attention-sink defect on diacritics and is
|
|
54
|
+
# used in the paper as a known-broken baseline rather than a
|
|
55
|
+
# default substrate.
|
|
56
|
+
DEFAULT_LLM_MODEL = "nomic-embed-text"
|
|
57
|
+
DEFAULT_LLM_DIM = 768
|
|
58
|
+
DEFAULT_SYNTHETIC_DIM = 100
|
|
59
|
+
|
|
60
|
+
def __init__(self, *, runtime_dim: int | None = None,
|
|
61
|
+
runtime_seed: int = 42,
|
|
62
|
+
llm_model: str | None = None,
|
|
63
|
+
synthetic_dim: int | None = None,
|
|
64
|
+
loop_max_iterations: int = 50) -> None:
|
|
65
|
+
self._llm_model = llm_model if llm_model is not None else self.DEFAULT_LLM_MODEL
|
|
66
|
+
# `runtime_dim` now names the SEMANTIC subspace size (the block
|
|
67
|
+
# the LLM fills). Synthetic dims are appended on top. Total
|
|
68
|
+
# runtime vector size = semantic + synthetic, stored on the
|
|
69
|
+
# parent as `runtime_dim` so downstream plumbing (prelude's
|
|
70
|
+
# `dim=...` literal, hemibrain wiring if ever re-enabled) sees
|
|
71
|
+
# the full extended state.
|
|
72
|
+
if runtime_dim is None:
|
|
73
|
+
runtime_dim = self.DEFAULT_LLM_DIM
|
|
74
|
+
self._semantic_dim = runtime_dim
|
|
75
|
+
self._synthetic_dim = (synthetic_dim if synthetic_dim is not None
|
|
76
|
+
else self.DEFAULT_SYNTHETIC_DIM)
|
|
77
|
+
# List of strings that appear in `basis_vector("...")` calls,
|
|
78
|
+
# populated by translate_module() between simplify and codegen.
|
|
79
|
+
# The codegen emits a batched Ollama pre-fetch at module init
|
|
80
|
+
# to replace N sequential HTTP round-trips with one call.
|
|
81
|
+
self._prefetch_strings: list[str] = []
|
|
82
|
+
super().__init__(
|
|
83
|
+
runtime_dim=self._semantic_dim + self._synthetic_dim,
|
|
84
|
+
runtime_seed=runtime_seed,
|
|
85
|
+
runtime_n_kc=0,
|
|
86
|
+
runtime_use_hemibrain=False,
|
|
87
|
+
loop_max_iterations=loop_max_iterations,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Ops not supported by the pure-numpy substrate. `snap` requires a
|
|
91
|
+
# cleanup circuit (MB spiking model or equivalent); rotation-based
|
|
92
|
+
# loop primitives need the same. These are spec'd ops without a
|
|
93
|
+
# runtime implementation here; programs that use them are rejected
|
|
94
|
+
# at codegen time.
|
|
95
|
+
_UNSUPPORTED_BUILTINS = frozenset({
|
|
96
|
+
"snap",
|
|
97
|
+
"make_rotation",
|
|
98
|
+
"compile_prototypes",
|
|
99
|
+
"geometric_loop",
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
def _char_literal_src(self, expr: ast.CharLiteral) -> str:
|
|
103
|
+
"""Lower `'a'` to a runtime make_char call with the code point."""
|
|
104
|
+
return f"_VSA.make_char({int(expr.value)})"
|
|
105
|
+
|
|
106
|
+
def _embed_expr_src(self, expr: ast.EmbedExpr) -> str:
|
|
107
|
+
"""Lower `embed(<inner>)` to a _VSA.embed runtime call.
|
|
108
|
+
|
|
109
|
+
Covers both explicit `embed("foo")` source-level calls and
|
|
110
|
+
implicit wrappings inserted by `_auto_embed_var_decl_init`
|
|
111
|
+
(`vector v = "foo"` → `vector v = embed("foo")`).
|
|
112
|
+
"""
|
|
113
|
+
inner_src = self._translate_expr(expr.expr)
|
|
114
|
+
return f"_VSA.embed({inner_src})"
|
|
115
|
+
|
|
116
|
+
def _defuzzy_expr_src(self, expr: ast.DefuzzyExpr) -> str:
|
|
117
|
+
"""Lower `defuzzy(<inner>)` by compile-time expansion of the
|
|
118
|
+
stdlib `defuzzy` body.
|
|
119
|
+
|
|
120
|
+
The canonical stdlib definition (stdlib/logic.su) is:
|
|
121
|
+
|
|
122
|
+
function fuzzy defuzzy(fuzzy v) {
|
|
123
|
+
loop (10) {
|
|
124
|
+
v = v == true;
|
|
125
|
+
}
|
|
126
|
+
return v;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
We emit that unrolled inline as a nested expression: ten
|
|
130
|
+
`_VSA.eq(_, make_truth(1.0))` calls wrapping the truth-axis
|
|
131
|
+
projection of the input. Expressing it as one compound
|
|
132
|
+
expression lets the downstream fusion pass see the whole
|
|
133
|
+
chain — a runtime `for _ in range(10)` loop hides the
|
|
134
|
+
iteration from the compiler. When the fusion pass lands it
|
|
135
|
+
collapses this nested chain into a single cached matrix
|
|
136
|
+
applied in one matmul; until then it's ten eq calls
|
|
137
|
+
straight-line in the emitted Python.
|
|
138
|
+
"""
|
|
139
|
+
DEFUZZ_ITERS = 10
|
|
140
|
+
inner_src = self._translate_expr(expr.expr)
|
|
141
|
+
acc = (f"_VSA._truth_projector() @ "
|
|
142
|
+
f"_VSA._as_any_vector({inner_src})")
|
|
143
|
+
for _ in range(DEFUZZ_ITERS):
|
|
144
|
+
acc = f"_VSA.eq({acc}, _VSA.make_truth(1.0))"
|
|
145
|
+
return acc
|
|
146
|
+
|
|
147
|
+
def _unknown_literal_src(self, expr: ast.UnknownLiteral) -> str:
|
|
148
|
+
"""Lower `unknown` to the truth-axis neutral vector.
|
|
149
|
+
|
|
150
|
+
`unknown` is the explicit-neutrality literal — identical
|
|
151
|
+
runtime to `make_truth(0.0)` but named semantically. In a
|
|
152
|
+
trit-typed context the fold in _fuzzy_literal_init_src will
|
|
153
|
+
redirect through `make_trit(0.0)` for emitted-source
|
|
154
|
+
readability; in any other context this direct lowering is
|
|
155
|
+
used.
|
|
156
|
+
"""
|
|
157
|
+
return "_VSA.make_truth(0.0)"
|
|
158
|
+
|
|
159
|
+
def _imaginary_literal_src(self, expr: ast.ImaginaryLiteral) -> str:
|
|
160
|
+
"""Lower `5i` to `_VSA.make_complex(0.0, 5.0)`."""
|
|
161
|
+
return f"_VSA.make_complex(0.0, {float(expr.value)!r})"
|
|
162
|
+
|
|
163
|
+
def _bool_literal_src(self, expr: ast.BoolLiteral) -> str:
|
|
164
|
+
"""Lower `true` / `false` to truth-axis vectors unconditionally.
|
|
165
|
+
|
|
166
|
+
The base class emits Python `True` / `False`; numpy overrides
|
|
167
|
+
so the entire demo-path runtime operates on vectors, not on
|
|
168
|
+
Python bools. This is the prerequisite for the logical
|
|
169
|
+
operators being pure vector arithmetic — if `true` is a
|
|
170
|
+
Python bool there's no vector to operate on.
|
|
171
|
+
|
|
172
|
+
`true` → _VSA.make_truth( 1.0)
|
|
173
|
+
`false` → _VSA.make_truth(-1.0)
|
|
174
|
+
"""
|
|
175
|
+
return f"_VSA.make_truth({1.0 if expr.value else -1.0!r})"
|
|
176
|
+
|
|
177
|
+
def _logical_op_src(self, expr: ast.BinaryOp, op: str,
|
|
178
|
+
left_src: str, right_src: str) -> str:
|
|
179
|
+
"""Unreachable under the v0.3 pipeline. `&&` / `||` are lowered
|
|
180
|
+
to stdlib `logical_and` / `logical_or` Call nodes by the
|
|
181
|
+
operator-lowering pass in `inliner.py`, then inlined to the
|
|
182
|
+
Lagrange-polynomial expression form. If this hook fires,
|
|
183
|
+
operator lowering didn't run and the inlined polynomial is
|
|
184
|
+
missing — loud failure is better than silently emitting a
|
|
185
|
+
call to a runtime method that no longer exists."""
|
|
186
|
+
raise CodegenNotSupported(
|
|
187
|
+
expr,
|
|
188
|
+
f"codegen saw a `{expr.op}` BinaryOp that the stdlib "
|
|
189
|
+
f"operator-lowering pass should have replaced with a "
|
|
190
|
+
f"Call(logical_{'and' if op == 'and' else 'or'}, ...). "
|
|
191
|
+
f"Check that `inline_stdlib_calls` ran before codegen.",
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
def _logical_not_src(self, expr: ast.UnaryOp, operand_src: str) -> str:
|
|
195
|
+
"""Unreachable under the v0.3 pipeline — see _logical_op_src."""
|
|
196
|
+
raise CodegenNotSupported(
|
|
197
|
+
expr,
|
|
198
|
+
"codegen saw a `!` UnaryOp that the stdlib operator-"
|
|
199
|
+
"lowering pass should have replaced with a Call("
|
|
200
|
+
"logical_not, ...). Check that `inline_stdlib_calls` ran "
|
|
201
|
+
"before codegen.",
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def _equality_src(self, expr: ast.BinaryOp, op: str,
|
|
205
|
+
left_src: str, right_src: str) -> str:
|
|
206
|
+
"""Lower `==` / `!=` to _VSA.eq / _VSA.neq.
|
|
207
|
+
|
|
208
|
+
Vector cosine similarity projected onto the truth axis. The
|
|
209
|
+
runtime computes dot(a, b) / (||a|| · ||b||) via pure vector
|
|
210
|
+
arithmetic (element-wise multiplies, sums, sqrt), then places
|
|
211
|
+
the resulting scalar on the truth axis. Differentiable almost
|
|
212
|
+
everywhere; the only singularity is at a zero-norm input
|
|
213
|
+
which we guard with a truth=0 fallback.
|
|
214
|
+
"""
|
|
215
|
+
assert op in ("eq", "neq")
|
|
216
|
+
return f"_VSA.{op}({left_src}, {right_src})"
|
|
217
|
+
|
|
218
|
+
def _complex_mul_src(self, expr: ast.BinaryOp,
|
|
219
|
+
left_src: str, right_src: str) -> str:
|
|
220
|
+
"""Lower `complex * *` to _VSA.complex_mul.
|
|
221
|
+
|
|
222
|
+
The runtime reads the two relevant (real, imag) scalar pairs,
|
|
223
|
+
computes the complex product in 2D, and returns a fresh
|
|
224
|
+
make_complex vector. Scalar operands get auto-promoted via
|
|
225
|
+
make_real inside complex_mul, so `int_literal * complex_var`
|
|
226
|
+
and similar mixed forms work without additional codegen.
|
|
227
|
+
"""
|
|
228
|
+
return f"_VSA.complex_mul({left_src}, {right_src})"
|
|
229
|
+
|
|
230
|
+
def _comparison_src(self, expr: ast.BinaryOp, op: str,
|
|
231
|
+
left_src: str, right_src: str) -> str:
|
|
232
|
+
"""Lower `>` / `<` / `>=` / `<=` to _VSA.gt / _VSA.lt / _VSA.ge / _VSA.le.
|
|
233
|
+
|
|
234
|
+
All four runtime methods project both sides onto the real
|
|
235
|
+
axis, subtract, and map the sign componentwise onto the
|
|
236
|
+
truth axis. Strict (gt / lt) give -1 on ties, non-strict
|
|
237
|
+
(ge / le) give +1 on ties.
|
|
238
|
+
"""
|
|
239
|
+
assert op in ("gt", "lt", "ge", "le")
|
|
240
|
+
return f"_VSA.{op}({left_src}, {right_src})"
|
|
241
|
+
|
|
242
|
+
def _complex_literal_src(self, expr: ast.ComplexLiteral) -> str:
|
|
243
|
+
"""Lower the folded `N + Mi` form to `_VSA.make_complex(N, M)`."""
|
|
244
|
+
return f"_VSA.make_complex({float(expr.re)!r}, {float(expr.im)!r})"
|
|
245
|
+
|
|
246
|
+
# Three-valued primitive class — same truth-axis storage as
|
|
247
|
+
# `fuzzy`, but defuzzification polarizes toward {-1, 0, +1}
|
|
248
|
+
# instead of just {-1, +1}. The distinguishing runtime op is
|
|
249
|
+
# defuzzify_trit, not the storage layout.
|
|
250
|
+
_TRIT_TYPE_NAMES = frozenset({"trit"})
|
|
251
|
+
|
|
252
|
+
def _fuzzy_literal_init_src(self, decl: ast.VarDecl) -> str | None:
|
|
253
|
+
"""Compile-time fold of `fuzzy x = <literal>` to make_truth(value).
|
|
254
|
+
|
|
255
|
+
`fuzzy x = 0.7` is the 2026-04-23 design's implicit form for
|
|
256
|
+
`fuzzy x = true * 0.7` — a truth-axis vector scaled by 0.7. Since
|
|
257
|
+
`true` lives at +1 on the truth axis, this reduces at compile
|
|
258
|
+
time to a direct `_VSA.make_truth(0.7)` allocation with no
|
|
259
|
+
runtime scalar multiplication.
|
|
260
|
+
|
|
261
|
+
Bool literals use the truth-axis polarity: `true` → +1.0,
|
|
262
|
+
`false` → -1.0. Unary `-` on a numeric literal is folded too
|
|
263
|
+
so `fuzzy x = -0.3` works. Only triggers for literal initializers
|
|
264
|
+
— non-literal RHS expressions (e.g. `fuzzy x = compute()`) fall
|
|
265
|
+
through to normal codegen.
|
|
266
|
+
|
|
267
|
+
`trit x = 0.7` uses the same fold but emits `make_trit` —
|
|
268
|
+
same storage, different compile-time tag. The three-valued
|
|
269
|
+
distinguishing behavior lives in defuzzify_trit, not here.
|
|
270
|
+
"""
|
|
271
|
+
if decl.initializer is None:
|
|
272
|
+
return None
|
|
273
|
+
if decl.type_ref is None:
|
|
274
|
+
return None
|
|
275
|
+
type_name = decl.type_ref.name
|
|
276
|
+
# Complex-typed slot with a literal initializer: lift the
|
|
277
|
+
# real/imag scalar into a single make_complex call. Per user
|
|
278
|
+
# direction ("every number is on the complex plane"), a plain
|
|
279
|
+
# int or float in a `complex` slot coerces to (value, 0);
|
|
280
|
+
# `5i` → (0, 5), `5 + 5i` → (5, 5) via the simplify fold.
|
|
281
|
+
if type_name == "complex":
|
|
282
|
+
return self._complex_init_src(decl.initializer)
|
|
283
|
+
if type_name == "fuzzy":
|
|
284
|
+
ctor = "make_truth"
|
|
285
|
+
elif type_name in self._TRIT_TYPE_NAMES:
|
|
286
|
+
ctor = "make_trit"
|
|
287
|
+
else:
|
|
288
|
+
return None
|
|
289
|
+
scalar = self._fuzzy_constant_scalar(decl.initializer)
|
|
290
|
+
if scalar is None:
|
|
291
|
+
return None
|
|
292
|
+
return f"_VSA.{ctor}({scalar!r})"
|
|
293
|
+
|
|
294
|
+
def _complex_init_src(self, expr: ast.Expr) -> str | None:
|
|
295
|
+
"""Fold a literal initializer for a `complex`-typed slot.
|
|
296
|
+
|
|
297
|
+
Covers: IntLiteral / FloatLiteral (real-only), ImaginaryLiteral
|
|
298
|
+
(imag-only), ComplexLiteral (both), unary ± on same,
|
|
299
|
+
Parenthesized wrappers. Returns None to fall through to normal
|
|
300
|
+
codegen for non-literal RHS.
|
|
301
|
+
"""
|
|
302
|
+
if isinstance(expr, ast.ComplexLiteral):
|
|
303
|
+
return f"_VSA.make_complex({float(expr.re)!r}, {float(expr.im)!r})"
|
|
304
|
+
if isinstance(expr, ast.ImaginaryLiteral):
|
|
305
|
+
return f"_VSA.make_complex(0.0, {float(expr.value)!r})"
|
|
306
|
+
if isinstance(expr, (ast.IntLiteral, ast.FloatLiteral)):
|
|
307
|
+
return f"_VSA.make_complex({float(expr.value)!r}, 0.0)"
|
|
308
|
+
if isinstance(expr, ast.UnaryOp) and expr.op in ("-", "+"):
|
|
309
|
+
inner = self._complex_init_src(expr.operand)
|
|
310
|
+
if inner is None:
|
|
311
|
+
return None
|
|
312
|
+
if expr.op == "+":
|
|
313
|
+
return inner
|
|
314
|
+
# Unary minus — re-parse the inner to flip sign. Cheapest
|
|
315
|
+
# path: recompute from the operand shape directly.
|
|
316
|
+
if isinstance(expr.operand, ast.ComplexLiteral):
|
|
317
|
+
return (
|
|
318
|
+
f"_VSA.make_complex({(-float(expr.operand.re))!r}, "
|
|
319
|
+
f"{(-float(expr.operand.im))!r})"
|
|
320
|
+
)
|
|
321
|
+
if isinstance(expr.operand, ast.ImaginaryLiteral):
|
|
322
|
+
return (
|
|
323
|
+
f"_VSA.make_complex(0.0, "
|
|
324
|
+
f"{(-float(expr.operand.value))!r})"
|
|
325
|
+
)
|
|
326
|
+
if isinstance(expr.operand, (ast.IntLiteral, ast.FloatLiteral)):
|
|
327
|
+
return (
|
|
328
|
+
f"_VSA.make_complex({(-float(expr.operand.value))!r}, "
|
|
329
|
+
"0.0)"
|
|
330
|
+
)
|
|
331
|
+
if isinstance(expr, ast.Parenthesized):
|
|
332
|
+
return self._complex_init_src(expr.inner)
|
|
333
|
+
return None
|
|
334
|
+
|
|
335
|
+
def _fuzzy_constant_scalar(self, expr: ast.Expr) -> float | None:
|
|
336
|
+
"""Fold a literal expression to a single fuzzy-axis scalar.
|
|
337
|
+
|
|
338
|
+
Accepts int/float/bool literals, the `unknown` neutral
|
|
339
|
+
literal, and unary `-` on same. Returns None for anything
|
|
340
|
+
that needs runtime evaluation.
|
|
341
|
+
"""
|
|
342
|
+
if isinstance(expr, ast.FloatLiteral):
|
|
343
|
+
return float(expr.value)
|
|
344
|
+
if isinstance(expr, ast.IntLiteral):
|
|
345
|
+
return float(expr.value)
|
|
346
|
+
if isinstance(expr, ast.BoolLiteral):
|
|
347
|
+
return 1.0 if expr.value else -1.0
|
|
348
|
+
if isinstance(expr, ast.UnknownLiteral):
|
|
349
|
+
return 0.0
|
|
350
|
+
if isinstance(expr, ast.UnaryOp) and expr.op == "-":
|
|
351
|
+
inner = self._fuzzy_constant_scalar(expr.operand)
|
|
352
|
+
if inner is not None:
|
|
353
|
+
return -inner
|
|
354
|
+
if isinstance(expr, ast.UnaryOp) and expr.op == "+":
|
|
355
|
+
return self._fuzzy_constant_scalar(expr.operand)
|
|
356
|
+
if isinstance(expr, ast.Parenthesized):
|
|
357
|
+
return self._fuzzy_constant_scalar(expr.inner)
|
|
358
|
+
return None
|
|
359
|
+
|
|
360
|
+
def _translate_eigenrotation_loop(self, stmt):
|
|
361
|
+
"""Eigenrotation on the numpy substrate.
|
|
362
|
+
|
|
363
|
+
Uses a Haar-random orthogonal matrix and parses the threshold
|
|
364
|
+
from the condition (the numeric literal side of
|
|
365
|
+
`similarity(state, target) < T`). Cosine-based matching, so
|
|
366
|
+
thresholds default to 0.9 unless the source overrides.
|
|
367
|
+
"""
|
|
368
|
+
from . import ast_nodes as ast
|
|
369
|
+
lid = self._next_loop_id()
|
|
370
|
+
state_var = self._extract_loop_state_var(stmt.body)
|
|
371
|
+
target_expr = self._extract_loop_target(stmt.condition)
|
|
372
|
+
|
|
373
|
+
threshold = 0.9
|
|
374
|
+
cond = stmt.condition
|
|
375
|
+
if isinstance(cond, ast.BinaryOp):
|
|
376
|
+
for side in (cond.left, cond.right):
|
|
377
|
+
if isinstance(side, ast.FloatLiteral):
|
|
378
|
+
threshold = side.value
|
|
379
|
+
elif isinstance(side, ast.IntLiteral):
|
|
380
|
+
threshold = float(side.value)
|
|
381
|
+
|
|
382
|
+
self._emit(f"{lid}_R = _VSA.make_random_rotation("
|
|
383
|
+
f"angle=1.0, n_planes=_VSA.dim // 2, seed=_VSA.seed)")
|
|
384
|
+
self._emit(f"{lid}_target = {target_expr}")
|
|
385
|
+
self._emit(f"{lid}_protos = _VSA.compile_prototypes("
|
|
386
|
+
f"{{\"target\": {lid}_target}})")
|
|
387
|
+
self._emit(f"{lid}_name, {state_var}, {lid}_iters = _VSA.loop(")
|
|
388
|
+
self._indent += 1
|
|
389
|
+
self._emit(f"{state_var}, {lid}_R, {lid}_protos,")
|
|
390
|
+
self._emit(f"target_name=\"target\", threshold={threshold}, max_iters=500)")
|
|
391
|
+
self._indent -= 1
|
|
392
|
+
|
|
393
|
+
_VECTOR_ACCESSORS = frozenset({
|
|
394
|
+
"component", "semantic", "synthetic",
|
|
395
|
+
"real", "imag", "truth",
|
|
396
|
+
})
|
|
397
|
+
|
|
398
|
+
def _translate_call(self, call: ast.Call) -> str:
|
|
399
|
+
callee = call.callee
|
|
400
|
+
if isinstance(callee, ast.Identifier):
|
|
401
|
+
if callee.name in self._UNSUPPORTED_BUILTINS:
|
|
402
|
+
raise CodegenNotSupported(
|
|
403
|
+
call,
|
|
404
|
+
f"`{callee.name}` is not supported on the pure-numpy "
|
|
405
|
+
f"substrate (no cleanup circuit at runtime)",
|
|
406
|
+
)
|
|
407
|
+
if (isinstance(callee, ast.MemberAccess)
|
|
408
|
+
and callee.member in self._VECTOR_ACCESSORS):
|
|
409
|
+
obj_src = self._translate_expr(callee.obj)
|
|
410
|
+
arg_srcs = [self._translate_expr(a) for a in call.args]
|
|
411
|
+
joined = ", ".join([obj_src, *arg_srcs])
|
|
412
|
+
return f"_VSA.{callee.member}({joined})"
|
|
413
|
+
return super()._translate_call(call)
|
|
414
|
+
|
|
415
|
+
def _emit_prelude(self) -> None:
|
|
416
|
+
self._emit('"""Generated by sutra_compiler.codegen. Do not edit by hand."""')
|
|
417
|
+
self._emit("from __future__ import annotations")
|
|
418
|
+
self._emit()
|
|
419
|
+
self._emit("import numpy as _np")
|
|
420
|
+
self._emit()
|
|
421
|
+
self._emit()
|
|
422
|
+
self._emit("class _NumpyVSA:")
|
|
423
|
+
self._indent += 1
|
|
424
|
+
self._emit('"""Frozen-LLM-backed VSA runtime. Rotation binding, normalized bundle.')
|
|
425
|
+
self._emit('')
|
|
426
|
+
self._emit('State vectors carry an extended layout: each vector is')
|
|
427
|
+
self._emit('`[semantic (semantic_dim) | synthetic (synthetic_dim)]`. The')
|
|
428
|
+
self._emit('semantic block is filled by `embed()` from the frozen LLM; the')
|
|
429
|
+
self._emit('synthetic block is reserved computational/symbolic space that')
|
|
430
|
+
self._emit('starts zero and is touched only by operations that explicitly')
|
|
431
|
+
self._emit('write to it. See')
|
|
432
|
+
self._emit('planning/findings/2026-04-21-extended-state-and-rotation-binding.md.')
|
|
433
|
+
self._emit('')
|
|
434
|
+
self._emit('Bind is role-seeded Haar-random orthogonal rotation applied to')
|
|
435
|
+
self._emit('filler: bind(filler, role) = Q_role @ filler, with Q_role cached')
|
|
436
|
+
self._emit('by role-vector hash. The rotation is block-diagonal — Haar in')
|
|
437
|
+
self._emit('the semantic block, identity in the synthetic block — so rotation')
|
|
438
|
+
self._emit('acts only on semantic content and the synthetic block is')
|
|
439
|
+
self._emit('preserved through bind/unbind. Unbind is the transpose.')
|
|
440
|
+
self._emit('"""')
|
|
441
|
+
self._emit()
|
|
442
|
+
self._emit("def __init__(self, semantic_dim, synthetic_dim, seed, llm_model):")
|
|
443
|
+
self._indent += 1
|
|
444
|
+
self._emit("self.semantic_dim = semantic_dim")
|
|
445
|
+
self._emit("self.synthetic_dim = synthetic_dim")
|
|
446
|
+
self._emit("self.dim = semantic_dim + synthetic_dim")
|
|
447
|
+
self._emit("self.seed = seed")
|
|
448
|
+
self._emit("self.llm_model = llm_model")
|
|
449
|
+
self._emit("self._codebook = {}")
|
|
450
|
+
self._emit("# Rotation matrix cache: role-vector-hash -> orthogonal matrix.")
|
|
451
|
+
self._emit("# Generating a 768x768 Haar rotation is O(d^3); caching makes")
|
|
452
|
+
self._emit("# repeated bind/unbind with the same role O(d^2) lookup + matmul.")
|
|
453
|
+
self._emit("self._rot_cache = {}")
|
|
454
|
+
self._emit("# On-disk embedding cache. Second-and-later runs load every")
|
|
455
|
+
self._emit("# previously-seen basis_vector(...) string from disk instead of")
|
|
456
|
+
self._emit("# hitting Ollama. Cache is keyed by (model, dim) so changing")
|
|
457
|
+
self._emit("# either invalidates cleanly (different cache file).")
|
|
458
|
+
self._emit("import os as _os")
|
|
459
|
+
self._emit("self._cache_dir = _os.path.join(")
|
|
460
|
+
self._indent += 1
|
|
461
|
+
self._emit("_os.environ.get('XDG_CACHE_HOME', _os.path.expanduser('~/.cache')),")
|
|
462
|
+
self._emit("'sutra', 'embeddings')")
|
|
463
|
+
self._indent -= 1
|
|
464
|
+
self._emit("_os.makedirs(self._cache_dir, exist_ok=True)")
|
|
465
|
+
self._emit("# Sanitize model name for use as filename.")
|
|
466
|
+
self._emit("_safe_model = llm_model.replace('/', '_').replace(':', '_')")
|
|
467
|
+
self._emit("self._cache_path = _os.path.join(")
|
|
468
|
+
self._indent += 1
|
|
469
|
+
self._emit("self._cache_dir, f'{_safe_model}-d{self.dim}.npz')")
|
|
470
|
+
self._indent -= 1
|
|
471
|
+
self._emit("self._load_disk_cache()")
|
|
472
|
+
self._indent -= 1
|
|
473
|
+
self._emit()
|
|
474
|
+
self._emit("def _load_disk_cache(self):")
|
|
475
|
+
self._indent += 1
|
|
476
|
+
self._emit('"""Populate self._codebook from the on-disk embedding cache.')
|
|
477
|
+
self._emit('')
|
|
478
|
+
self._emit("Tolerant of a missing or corrupt cache file — a failed load")
|
|
479
|
+
self._emit("leaves self._codebook empty and lets Ollama fetches repopulate")
|
|
480
|
+
self._emit("it. The cache is performance, not correctness.")
|
|
481
|
+
self._emit('"""')
|
|
482
|
+
self._emit("import os as _os")
|
|
483
|
+
self._emit("if not _os.path.exists(self._cache_path):")
|
|
484
|
+
self._indent += 1
|
|
485
|
+
self._emit("return")
|
|
486
|
+
self._indent -= 1
|
|
487
|
+
self._emit("try:")
|
|
488
|
+
self._indent += 1
|
|
489
|
+
self._emit("with _np.load(self._cache_path, allow_pickle=False) as data:")
|
|
490
|
+
self._indent += 1
|
|
491
|
+
self._emit("for key in data.files:")
|
|
492
|
+
self._indent += 1
|
|
493
|
+
self._emit("self._codebook[key] = data[key].astype(_np.float64)")
|
|
494
|
+
self._indent -= 1
|
|
495
|
+
self._indent -= 1
|
|
496
|
+
self._indent -= 1
|
|
497
|
+
self._emit("except Exception:")
|
|
498
|
+
self._indent += 1
|
|
499
|
+
self._emit("# Corrupt cache: ignore and let Ollama repopulate.")
|
|
500
|
+
self._emit("self._codebook = {}")
|
|
501
|
+
self._indent -= 1
|
|
502
|
+
self._indent -= 1
|
|
503
|
+
self._emit()
|
|
504
|
+
self._emit("def _write_disk_cache(self):")
|
|
505
|
+
self._indent += 1
|
|
506
|
+
self._emit('"""Persist self._codebook atomically to disk.')
|
|
507
|
+
self._emit('')
|
|
508
|
+
self._emit("Writes to a tempfile then renames, so a partial write (crash,")
|
|
509
|
+
self._emit("SIGKILL) leaves the old cache intact rather than corrupted.")
|
|
510
|
+
self._emit("Called whenever embed / embed_batch fetches new vectors so")
|
|
511
|
+
self._emit("subsequent runs hit the cache on module init.")
|
|
512
|
+
self._emit('"""')
|
|
513
|
+
self._emit("import os as _os, tempfile as _tempfile")
|
|
514
|
+
self._emit("if not self._codebook:")
|
|
515
|
+
self._indent += 1
|
|
516
|
+
self._emit("return")
|
|
517
|
+
self._indent -= 1
|
|
518
|
+
self._emit("fd, tmp = _tempfile.mkstemp(")
|
|
519
|
+
self._indent += 1
|
|
520
|
+
self._emit("dir=self._cache_dir, prefix='.tmp-', suffix='.npz')")
|
|
521
|
+
self._indent -= 1
|
|
522
|
+
self._emit("_os.close(fd)")
|
|
523
|
+
self._emit("try:")
|
|
524
|
+
self._indent += 1
|
|
525
|
+
self._emit("_np.savez(tmp, **self._codebook)")
|
|
526
|
+
self._emit("# _np.savez writes tmp.npz, but tempfile handed us tmp ending")
|
|
527
|
+
self._emit("# in .npz already — reconcile: savez appends .npz only if the")
|
|
528
|
+
self._emit("# path does not already end in .npz. Python tempfile gives us")
|
|
529
|
+
self._emit("# a .npz path, so savez leaves it as-is.")
|
|
530
|
+
self._emit("_os.replace(tmp, self._cache_path)")
|
|
531
|
+
self._indent -= 1
|
|
532
|
+
self._emit("except Exception:")
|
|
533
|
+
self._indent += 1
|
|
534
|
+
self._emit("# Cache-write failure is non-fatal. Remove the tmp and continue.")
|
|
535
|
+
self._emit("try:")
|
|
536
|
+
self._indent += 1
|
|
537
|
+
self._emit("_os.unlink(tmp)")
|
|
538
|
+
self._indent -= 1
|
|
539
|
+
self._emit("except OSError:")
|
|
540
|
+
self._indent += 1
|
|
541
|
+
self._emit("pass")
|
|
542
|
+
self._indent -= 1
|
|
543
|
+
self._indent -= 1
|
|
544
|
+
self._indent -= 1
|
|
545
|
+
self._emit()
|
|
546
|
+
self._emit("def embed(self, name):")
|
|
547
|
+
self._indent += 1
|
|
548
|
+
self._emit('"""Frozen-LLM embedding via Ollama. No random fallback.')
|
|
549
|
+
self._emit("If Ollama is unavailable or the model is missing, this raises.")
|
|
550
|
+
self._emit("The numpy backend is defined as running on frozen LLM embeddings;")
|
|
551
|
+
self._emit("a random-vector fallback is not Sutra.")
|
|
552
|
+
self._emit("")
|
|
553
|
+
self._emit("Output is the extended-state-vector layout:")
|
|
554
|
+
self._emit("`[semantic (semantic_dim) | zeros (synthetic_dim)]`. The semantic")
|
|
555
|
+
self._emit("block is the LLM embedding (truncated or zero-padded to")
|
|
556
|
+
self._emit("semantic_dim as needed); the synthetic block is reserved and")
|
|
557
|
+
self._emit('starts at zero."""')
|
|
558
|
+
self._emit("if name not in self._codebook:")
|
|
559
|
+
self._indent += 1
|
|
560
|
+
self._emit("import ollama")
|
|
561
|
+
self._emit("r = ollama.embed(model=self.llm_model, input=name)")
|
|
562
|
+
self._emit("v = _np.array(r['embeddings'][0], dtype=_np.float64)")
|
|
563
|
+
self._emit("# Mean-center. Raw LLM embeddings cluster in a cone (all-")
|
|
564
|
+
self._emit("# positive-ish); centering keeps rotation/bind algebra")
|
|
565
|
+
self._emit("# well-behaved.")
|
|
566
|
+
self._emit("v = v - _np.mean(v)")
|
|
567
|
+
self._emit("n = _np.linalg.norm(v)")
|
|
568
|
+
self._emit("if n > 0: v = v / n")
|
|
569
|
+
self._emit("# Fit the LLM output to the semantic block. Truncate if the")
|
|
570
|
+
self._emit("# LLM is wider than semantic_dim, zero-pad if narrower.")
|
|
571
|
+
self._emit("if v.shape[0] > self.semantic_dim:")
|
|
572
|
+
self._indent += 1
|
|
573
|
+
self._emit("v = v[:self.semantic_dim]")
|
|
574
|
+
self._indent -= 1
|
|
575
|
+
self._emit("elif v.shape[0] < self.semantic_dim:")
|
|
576
|
+
self._indent += 1
|
|
577
|
+
self._emit("v = _np.concatenate([v, _np.zeros(self.semantic_dim - v.shape[0])])")
|
|
578
|
+
self._indent -= 1
|
|
579
|
+
self._emit("# Append the synthetic block — reserved, starts zero.")
|
|
580
|
+
self._emit("v = _np.concatenate([v, _np.zeros(self.synthetic_dim)])")
|
|
581
|
+
self._emit("n = _np.linalg.norm(v)")
|
|
582
|
+
self._emit("if n > 0: v = v / n")
|
|
583
|
+
self._emit("self._codebook[name] = v")
|
|
584
|
+
self._emit("self._write_disk_cache()")
|
|
585
|
+
self._indent -= 1
|
|
586
|
+
self._emit("return self._codebook[name].copy()")
|
|
587
|
+
self._indent -= 1
|
|
588
|
+
self._emit()
|
|
589
|
+
self._emit("def embed_batch(self, names):")
|
|
590
|
+
self._indent += 1
|
|
591
|
+
self._emit('"""Batched Ollama embed: one HTTP round-trip for many names.')
|
|
592
|
+
self._emit('')
|
|
593
|
+
self._emit("Populates self._codebook for every name in `names` that isn't")
|
|
594
|
+
self._emit("already cached. Subsequent embed(name) calls hit the cache in")
|
|
595
|
+
self._emit("memory with no network round-trip. Replaces N sequential")
|
|
596
|
+
self._emit("embed() calls at module init with one batched call; real")
|
|
597
|
+
self._emit("wall-clock win on programs with many basis_vector strings.")
|
|
598
|
+
self._emit('"""')
|
|
599
|
+
self._emit("missing = [n for n in names if n not in self._codebook]")
|
|
600
|
+
self._emit("if not missing:")
|
|
601
|
+
self._indent += 1
|
|
602
|
+
self._emit("return")
|
|
603
|
+
self._indent -= 1
|
|
604
|
+
self._emit("import ollama")
|
|
605
|
+
self._emit("r = ollama.embed(model=self.llm_model, input=missing)")
|
|
606
|
+
self._emit("for i, name in enumerate(missing):")
|
|
607
|
+
self._indent += 1
|
|
608
|
+
self._emit("v = _np.array(r['embeddings'][i], dtype=_np.float64)")
|
|
609
|
+
self._emit("v = v - _np.mean(v)")
|
|
610
|
+
self._emit("n = _np.linalg.norm(v)")
|
|
611
|
+
self._emit("if n > 0: v = v / n")
|
|
612
|
+
self._emit("# Fit to the semantic block, then append the zero-initialized")
|
|
613
|
+
self._emit("# synthetic block. Same layout as embed().")
|
|
614
|
+
self._emit("if v.shape[0] > self.semantic_dim:")
|
|
615
|
+
self._indent += 1
|
|
616
|
+
self._emit("v = v[:self.semantic_dim]")
|
|
617
|
+
self._indent -= 1
|
|
618
|
+
self._emit("elif v.shape[0] < self.semantic_dim:")
|
|
619
|
+
self._indent += 1
|
|
620
|
+
self._emit("v = _np.concatenate([v, _np.zeros(self.semantic_dim - v.shape[0])])")
|
|
621
|
+
self._indent -= 1
|
|
622
|
+
self._emit("v = _np.concatenate([v, _np.zeros(self.synthetic_dim)])")
|
|
623
|
+
self._emit("n = _np.linalg.norm(v)")
|
|
624
|
+
self._emit("if n > 0: v = v / n")
|
|
625
|
+
self._emit("self._codebook[name] = v")
|
|
626
|
+
self._indent -= 1
|
|
627
|
+
self._emit("# One batched write after all fetches in this call.")
|
|
628
|
+
self._emit("self._write_disk_cache()")
|
|
629
|
+
self._indent -= 1
|
|
630
|
+
self._emit()
|
|
631
|
+
self._emit("def _role_hash(self, role_vec):")
|
|
632
|
+
self._indent += 1
|
|
633
|
+
self._emit('"""Deterministic uint32 seed from a role vector.')
|
|
634
|
+
self._emit('')
|
|
635
|
+
self._emit("Uses the float64 bytes of the vector, so tiny numerical noise")
|
|
636
|
+
self._emit("produces the same seed as long as the vector is bit-identical.")
|
|
637
|
+
self._emit("Bit-level determinism is what we want here — callers should")
|
|
638
|
+
self._emit("not retrieve via a different-but-similar role; that's what")
|
|
639
|
+
self._emit("hashmap_get's continuous-projection path is for.")
|
|
640
|
+
self._emit('"""')
|
|
641
|
+
self._emit("import hashlib")
|
|
642
|
+
self._emit("h = hashlib.blake2b(role_vec.tobytes(), digest_size=8).digest()")
|
|
643
|
+
self._emit("return int.from_bytes(h, 'little') & 0xFFFFFFFF")
|
|
644
|
+
self._indent -= 1
|
|
645
|
+
self._emit()
|
|
646
|
+
self._emit("def _rotation_for(self, role_vec):")
|
|
647
|
+
self._indent += 1
|
|
648
|
+
self._emit('"""Block-diagonal Haar-random orthogonal matrix seeded by the role.')
|
|
649
|
+
self._emit('')
|
|
650
|
+
self._emit("Haar-uniform in the semantic block (top-left semantic_dim x")
|
|
651
|
+
self._emit("semantic_dim), identity in the synthetic block (bottom-right")
|
|
652
|
+
self._emit("synthetic_dim x synthetic_dim). Bind and unbind therefore rotate")
|
|
653
|
+
self._emit("only the semantic content and leave the synthetic block fixed —")
|
|
654
|
+
self._emit("which is what the extended-state-vector design requires: the")
|
|
655
|
+
self._emit("synthetic block is reserved for computational/symbolic state and")
|
|
656
|
+
self._emit("rotation bind must not mix semantic content into it.")
|
|
657
|
+
self._emit('')
|
|
658
|
+
self._emit("Cached per role-hash so the same role always produces the same")
|
|
659
|
+
self._emit("rotation — required for bind/unbind round-trip.")
|
|
660
|
+
self._emit('"""')
|
|
661
|
+
self._emit("key = self._role_hash(role_vec)")
|
|
662
|
+
self._emit("if key not in self._rot_cache:")
|
|
663
|
+
self._indent += 1
|
|
664
|
+
self._emit("rng = _np.random.RandomState(key)")
|
|
665
|
+
self._emit("A = rng.randn(self.semantic_dim, self.semantic_dim)")
|
|
666
|
+
self._emit("Q_sem, _R = _np.linalg.qr(A)")
|
|
667
|
+
self._emit("# Flip sign of rows where R's diagonal was negative, so the QR")
|
|
668
|
+
self._emit("# output is Haar-uniform rather than biased by the QR sign.")
|
|
669
|
+
self._emit("d = _np.sign(_np.diag(_R))")
|
|
670
|
+
self._emit("d[d == 0] = 1.0")
|
|
671
|
+
self._emit("Q_sem = Q_sem * d")
|
|
672
|
+
self._emit("# Block-diagonal: Q_sem on the semantic block, identity elsewhere.")
|
|
673
|
+
self._emit("Q = _np.eye(self.dim, dtype=_np.float64)")
|
|
674
|
+
self._emit("Q[:self.semantic_dim, :self.semantic_dim] = Q_sem")
|
|
675
|
+
self._emit("self._rot_cache[key] = Q")
|
|
676
|
+
self._indent -= 1
|
|
677
|
+
self._emit("return self._rot_cache[key]")
|
|
678
|
+
self._indent -= 1
|
|
679
|
+
self._emit()
|
|
680
|
+
self._emit("def bind(self, role, filler):")
|
|
681
|
+
self._indent += 1
|
|
682
|
+
self._emit("# Rotation binding. Role-first convention matches the majority")
|
|
683
|
+
self._emit("# of .su demos (analogy, fuzzy_dispatch, knowledge_graph, etc.):")
|
|
684
|
+
self._emit("# bind(role, filler) = Q_role @ filler")
|
|
685
|
+
self._emit("# Q_role is the Haar-random rotation seeded by the role vector.")
|
|
686
|
+
self._emit("Q = self._rotation_for(role)")
|
|
687
|
+
self._emit("return Q @ filler")
|
|
688
|
+
self._indent -= 1
|
|
689
|
+
self._emit()
|
|
690
|
+
self._emit("def unbind(self, role, record):")
|
|
691
|
+
self._indent += 1
|
|
692
|
+
self._emit("# Role-first, matching bind. Q is orthogonal so inverse = transpose:")
|
|
693
|
+
self._emit("# unbind(role, record) = Q_role^T @ record")
|
|
694
|
+
self._emit("# For the matched-pair term in the bundle,")
|
|
695
|
+
self._emit("# Q_role^T @ Q_role @ filler = filler exactly.")
|
|
696
|
+
self._emit("# Other bundled terms appear as Q_role^T @ Q_other @ ... which")
|
|
697
|
+
self._emit("# is random-ish noise with ~1/sqrt(d) magnitude per term.")
|
|
698
|
+
self._emit("Q = self._rotation_for(role)")
|
|
699
|
+
self._emit("return Q.T @ record")
|
|
700
|
+
self._indent -= 1
|
|
701
|
+
self._emit()
|
|
702
|
+
self._emit("def bundle(self, *vectors):")
|
|
703
|
+
self._indent += 1
|
|
704
|
+
self._emit("s = _np.sum(vectors, axis=0)")
|
|
705
|
+
self._emit("n = _np.linalg.norm(s)")
|
|
706
|
+
self._emit("return s / n if n > 0 else s")
|
|
707
|
+
self._indent -= 1
|
|
708
|
+
self._emit()
|
|
709
|
+
self._emit("def zero_vector(self):")
|
|
710
|
+
self._indent += 1
|
|
711
|
+
self._emit('"""Zero vector in the runtime dim.')
|
|
712
|
+
self._emit('')
|
|
713
|
+
self._emit("Emitted by the simplifier for identities that resolve to zero")
|
|
714
|
+
self._emit("(e.g. displacement(a, a) → zero, bundle(zero_vector()) absorbed).")
|
|
715
|
+
self._emit("Also the starting accumulator for hashmap_new; kept as its own")
|
|
716
|
+
self._emit("method so future substrates can override (e.g. a connectome")
|
|
717
|
+
self._emit("backend's no-spike state instead of numeric zero).")
|
|
718
|
+
self._emit('"""')
|
|
719
|
+
self._emit("return _np.zeros(self.dim, dtype=_np.float64)")
|
|
720
|
+
self._indent -= 1
|
|
721
|
+
self._emit()
|
|
722
|
+
self._emit("def bundle_of_binds(self, *role_filler_pairs):")
|
|
723
|
+
self._indent += 1
|
|
724
|
+
self._emit('"""Fused bind+sum+normalize over N role-filler pairs.')
|
|
725
|
+
self._emit('')
|
|
726
|
+
self._emit("Emitted by the compiler when every arg to bundle() is itself")
|
|
727
|
+
self._emit("a bind() call. The N binds are independent (no shared state),")
|
|
728
|
+
self._emit("so executing them as a batch instead of sequentially is")
|
|
729
|
+
self._emit("correct and ~Nx faster on GPU-class hardware.")
|
|
730
|
+
self._emit("")
|
|
731
|
+
self._emit("numpy implementation: stack the per-role rotation matrices")
|
|
732
|
+
self._emit("into (N, d, d), stack fillers into (N, d), batched einsum")
|
|
733
|
+
self._emit("for the bind, sum over N, normalize. Same result as sequential")
|
|
734
|
+
self._emit("bind+sum+normalize, in a single einsum + reduce.")
|
|
735
|
+
self._emit("")
|
|
736
|
+
self._emit("This is the independence-structure case that justified")
|
|
737
|
+
self._emit("the PyTorch/GPU backend: the fused form collapses N small")
|
|
738
|
+
self._emit("kernel launches into O(1) big ones.")
|
|
739
|
+
self._emit('"""')
|
|
740
|
+
self._emit("if not role_filler_pairs:")
|
|
741
|
+
self._indent += 1
|
|
742
|
+
self._emit("return self.zero_vector()")
|
|
743
|
+
self._indent -= 1
|
|
744
|
+
self._emit("roles = [rf[0] for rf in role_filler_pairs]")
|
|
745
|
+
self._emit("fillers = [rf[1] for rf in role_filler_pairs]")
|
|
746
|
+
self._emit("Q_stack = _np.stack([self._rotation_for(r) for r in roles]) # (N, d, d)")
|
|
747
|
+
self._emit("F_stack = _np.stack([_np.asarray(f, dtype=_np.float64) for f in fillers]) # (N, d)")
|
|
748
|
+
self._emit("# Batched bind: element-i is Q_i @ f_i; shape (N, d).")
|
|
749
|
+
self._emit("bound = _np.einsum('nij,nj->ni', Q_stack, F_stack)")
|
|
750
|
+
self._emit("s = bound.sum(axis=0)")
|
|
751
|
+
self._emit("n = _np.linalg.norm(s)")
|
|
752
|
+
self._emit("return s / n if n > 0 else s")
|
|
753
|
+
self._indent -= 1
|
|
754
|
+
self._emit()
|
|
755
|
+
self._emit("# ---- Rotation-hashmap (library pattern per open question) ----")
|
|
756
|
+
self._emit("#")
|
|
757
|
+
self._emit("# Prototype of the rotation-hashmap described in")
|
|
758
|
+
self._emit("# planning/open-questions/rotation-hashmap-as-language-feature.md.")
|
|
759
|
+
self._emit("# Implemented as runtime methods — accessed by test scripts, not")
|
|
760
|
+
self._emit("# wired into the .su surface syntax yet. If the mechanism works,")
|
|
761
|
+
self._emit("# this is evidence for Candidate A (first-class map<K,V>); if")
|
|
762
|
+
self._emit("# capacity is poor, evidence for Candidate B (library-only).")
|
|
763
|
+
self._emit()
|
|
764
|
+
self._emit("def hashmap_new(self):")
|
|
765
|
+
self._indent += 1
|
|
766
|
+
self._emit('"""Empty accumulator — a zero vector in the runtime dim."""')
|
|
767
|
+
self._emit("return _np.zeros(self.dim, dtype=_np.float64)")
|
|
768
|
+
self._indent -= 1
|
|
769
|
+
self._emit()
|
|
770
|
+
self._emit("def hashmap_set(self, acc, key_vec, val_vec):")
|
|
771
|
+
self._indent += 1
|
|
772
|
+
self._emit('"""Store val under key: acc + bind(key, val).')
|
|
773
|
+
self._emit('')
|
|
774
|
+
self._emit("Reuses the same role-seeded Haar rotation as bind itself, so")
|
|
775
|
+
self._emit("the hashmap has identical capacity / cross-talk behavior as a")
|
|
776
|
+
self._emit("bundle of role-filler pairs. The only difference from bind + ")
|
|
777
|
+
self._emit("bundle is the API — the caller doesn't have to construct the")
|
|
778
|
+
self._emit("bundle themselves; set() just accumulates additively.")
|
|
779
|
+
self._emit("")
|
|
780
|
+
self._emit("Storage is additive WITHOUT normalization. Normalizing after")
|
|
781
|
+
self._emit("every set would destroy the magnitude information downstream")
|
|
782
|
+
self._emit("retrieval depends on. Normalize at retrieval time if needed.")
|
|
783
|
+
self._emit("")
|
|
784
|
+
self._emit("LIMITATION: key lookup is by bit-identical hash of key_vec, so")
|
|
785
|
+
self._emit("soft lookup (noisy query key -> approximate recovery) does NOT")
|
|
786
|
+
self._emit("work with this prototype. A continuous-hash variant using")
|
|
787
|
+
self._emit("Householder reflections or learned projections would enable")
|
|
788
|
+
self._emit("soft lookup; future work per the open question.")
|
|
789
|
+
self._emit('"""')
|
|
790
|
+
self._emit("return acc + self.bind(key_vec, val_vec)")
|
|
791
|
+
self._indent -= 1
|
|
792
|
+
self._emit()
|
|
793
|
+
self._emit("def hashmap_get(self, acc, key_vec):")
|
|
794
|
+
self._indent += 1
|
|
795
|
+
self._emit('"""Retrieve val associated with key: unbind(key, acc).')
|
|
796
|
+
self._emit('')
|
|
797
|
+
self._emit("Returns the raw recovered vector; caller applies cleanup")
|
|
798
|
+
self._emit("(argmax_cosine against a codebook) or uses it directly.")
|
|
799
|
+
self._emit("Cross-talk from other stored entries appears as noise with")
|
|
800
|
+
self._emit("~1/sqrt(d) magnitude per other entry. For N stored entries")
|
|
801
|
+
self._emit("and a d-dim substrate, recovered signal-to-noise is ~1/sqrt(N).")
|
|
802
|
+
self._emit('"""')
|
|
803
|
+
self._emit("return self.unbind(key_vec, acc)")
|
|
804
|
+
self._indent -= 1
|
|
805
|
+
self._emit()
|
|
806
|
+
self._emit("# ---- Binding-array (substrate-stored ordered list) ----")
|
|
807
|
+
self._emit("#")
|
|
808
|
+
self._emit("# An array stores N scalar values in a single substrate vector,")
|
|
809
|
+
self._emit("# with a length prefix at index 0. Layout:")
|
|
810
|
+
self._emit("# arr[0] = length (number of valid elements)")
|
|
811
|
+
self._emit("# arr[1..length] = the elements (in order)")
|
|
812
|
+
self._emit("# Capacity is fixed at allocation time (the vector's full length")
|
|
813
|
+
self._emit("# minus 1). foreach_loop walks 0..length-1 and binds each element")
|
|
814
|
+
self._emit("# to the `element` keyword in the body.")
|
|
815
|
+
self._emit("#")
|
|
816
|
+
self._emit("# Used by foreach_loop. Pure tensor reads/writes; no Python list,")
|
|
817
|
+
self._emit("# no heap allocation beyond the initial vector.")
|
|
818
|
+
self._emit()
|
|
819
|
+
self._emit("def array_from_literal(self, *values):")
|
|
820
|
+
self._indent += 1
|
|
821
|
+
self._emit('"""Build an array from compile-time-known scalar values."""')
|
|
822
|
+
self._emit("arr = _np.zeros(len(values) + 1, dtype=_np.float64)")
|
|
823
|
+
self._emit("arr[0] = float(len(values))")
|
|
824
|
+
self._emit("for i, v in enumerate(values):")
|
|
825
|
+
self._indent += 1
|
|
826
|
+
self._emit("arr[1 + i] = float(v)")
|
|
827
|
+
self._indent -= 1
|
|
828
|
+
self._emit("return arr")
|
|
829
|
+
self._indent -= 1
|
|
830
|
+
self._emit()
|
|
831
|
+
self._emit("def array_length(self, arr):")
|
|
832
|
+
self._indent += 1
|
|
833
|
+
self._emit('"""Read the length prefix as an int."""')
|
|
834
|
+
self._emit("return int(arr[0])")
|
|
835
|
+
self._indent -= 1
|
|
836
|
+
self._emit()
|
|
837
|
+
self._emit("def array_get(self, arr, i):")
|
|
838
|
+
self._indent += 1
|
|
839
|
+
self._emit('"""Read element at index i (0-based). Returns substrate scalar.')
|
|
840
|
+
self._emit('')
|
|
841
|
+
self._emit("Returns the underlying numpy scalar rather than a Python float so")
|
|
842
|
+
self._emit("downstream arithmetic stays in substrate land. Indexing arithmetic")
|
|
843
|
+
self._emit("(`int(i)`) is the only Python crossing remaining; that's the loop")
|
|
844
|
+
self._emit("tick counter, removable only by full unroll (queue item 5).")
|
|
845
|
+
self._emit('"""')
|
|
846
|
+
self._emit("return arr[1 + int(i)]")
|
|
847
|
+
self._indent -= 1
|
|
848
|
+
self._emit()
|
|
849
|
+
self._emit("# ---- Substrate scalar primitives (boundary-leak reductions) ----")
|
|
850
|
+
self._emit("# Added 2026-04-30 to remove the Python-bool / Python-min crossings")
|
|
851
|
+
self._emit("# in the loop halt check. See planning/findings/")
|
|
852
|
+
self._emit("# 2026-04-30-substrate-purity-leak-enumeration.md.")
|
|
853
|
+
self._emit()
|
|
854
|
+
self._emit("def truth_axis(self, vec_or_scalar):")
|
|
855
|
+
self._indent += 1
|
|
856
|
+
self._emit('"""Read AXIS_TRUTH from a fuzzy-vector result, or pass scalars through.')
|
|
857
|
+
self._emit('')
|
|
858
|
+
self._emit("Returns a substrate scalar (numpy 0-dim) rather than a Python float;")
|
|
859
|
+
self._emit("substrate-pure loop halt checks consume the result without crossing")
|
|
860
|
+
self._emit("the Python boundary.")
|
|
861
|
+
self._emit('"""')
|
|
862
|
+
self._emit("if hasattr(vec_or_scalar, '__len__') and len(vec_or_scalar) > 1:")
|
|
863
|
+
self._indent += 1
|
|
864
|
+
self._emit("return vec_or_scalar[self.semantic_dim + self.AXIS_TRUTH]")
|
|
865
|
+
self._indent -= 1
|
|
866
|
+
self._emit("return _np.asarray(vec_or_scalar)")
|
|
867
|
+
self._indent -= 1
|
|
868
|
+
self._emit()
|
|
869
|
+
self._emit("def heaviside(self, x):")
|
|
870
|
+
self._indent += 1
|
|
871
|
+
self._emit('"""Step function: 1.0 where x > 0, else 0.0. Substrate scalar.')
|
|
872
|
+
self._emit('')
|
|
873
|
+
self._emit("Used by the loop halt check to convert a substrate truth scalar to")
|
|
874
|
+
self._emit("a substrate-resident keep-mask, without Python's `1.0 if x > 0 else")
|
|
875
|
+
self._emit("0.0` ternary.")
|
|
876
|
+
self._emit('"""')
|
|
877
|
+
self._emit("return (_np.asarray(x) > 0.0).astype(_np.float64)")
|
|
878
|
+
self._indent -= 1
|
|
879
|
+
self._emit()
|
|
880
|
+
self._emit("def saturate_unit(self, x):")
|
|
881
|
+
self._indent += 1
|
|
882
|
+
self._emit('"""min(x, 1.0) implemented as a substrate op rather than Python\'s min().')
|
|
883
|
+
self._emit('')
|
|
884
|
+
self._emit("Used by the halt accumulator: halted = saturate_unit(halted +")
|
|
885
|
+
self._emit("halt_term). Numpy minimum() preserves the substrate-scalar dtype")
|
|
886
|
+
self._emit("rather than coercing to Python float.")
|
|
887
|
+
self._emit('"""')
|
|
888
|
+
self._emit("return _np.minimum(_np.asarray(x), 1.0)")
|
|
889
|
+
self._indent -= 1
|
|
890
|
+
self._emit()
|
|
891
|
+
self._emit("# ---- 2D-Givens-per-slot rotation binding (synthetic subspace) ----")
|
|
892
|
+
self._emit("#")
|
|
893
|
+
self._emit("# Design:")
|
|
894
|
+
self._emit("# planning/findings/2026-04-21-extended-state-and-rotation-binding.md")
|
|
895
|
+
self._emit("# Validation:")
|
|
896
|
+
self._emit("# planning/findings/2026-04-24-synthetic-subspace-validation.md")
|
|
897
|
+
self._emit("#")
|
|
898
|
+
self._emit("# Each positional / variable slot gets one disjoint 2D plane in the")
|
|
899
|
+
self._emit("# synthetic subspace, starting after the canonical axes. Slot s uses")
|
|
900
|
+
self._emit("# plane (SLOT_BASE + 2*s, SLOT_BASE + 2*s + 1). A slot-rotation is a")
|
|
901
|
+
self._emit("# 2D Givens rotation in that plane; slots do not overlap (until")
|
|
902
|
+
self._emit("# capacity runs out at synthetic_dim - SLOT_BASE pairs), so retrieval")
|
|
903
|
+
self._emit("# from slot i is orthogonal to content at slot j by construction.")
|
|
904
|
+
self._emit("#")
|
|
905
|
+
self._emit("# Storage / retrieval convention (reversible imperative state):")
|
|
906
|
+
self._emit("# slot_store(state, s, scalar) = state + scalar * e_{SLOT_BASE+2s}")
|
|
907
|
+
self._emit("# (after zeroing the slot's plane)")
|
|
908
|
+
self._emit("# slot_load(state, s) = state[SLOT_BASE+2s] (cosine component)")
|
|
909
|
+
self._emit("#")
|
|
910
|
+
self._emit("# An explicit rotation angle `theta` applied via rotate_slot(s, theta)")
|
|
911
|
+
self._emit("# rotates the scalar by theta into the imaginary leg of the plane,")
|
|
912
|
+
self._emit("# which is what makes assignments reversible as exact inverses.")
|
|
913
|
+
self._emit()
|
|
914
|
+
self._emit("# First synthetic axis used for slot planes. Reserves the canonical")
|
|
915
|
+
self._emit("# axes (0..AXIS_LOOP_DONE) for int/complex/truth/char/loop-flag.")
|
|
916
|
+
self._emit("SLOT_BASE = 5")
|
|
917
|
+
self._emit()
|
|
918
|
+
self._emit("def _slot_plane(self, slot_idx):")
|
|
919
|
+
self._indent += 1
|
|
920
|
+
self._emit('"""Return (i, j) — the two synthetic-block indices for slot.')
|
|
921
|
+
self._emit('')
|
|
922
|
+
self._emit("Slot 0 -> (SLOT_BASE, SLOT_BASE+1); slot 1 -> (SLOT_BASE+2,")
|
|
923
|
+
self._emit("SLOT_BASE+3); etc. Wraps modulo (synthetic_dim - SLOT_BASE) // 2")
|
|
924
|
+
self._emit("so out-of-capacity slots share planes (capacity-experiment finding:")
|
|
925
|
+
self._emit("sharing beyond N/2 degrades accuracy past ~65% at k=N/2+8).")
|
|
926
|
+
self._emit('"""')
|
|
927
|
+
self._emit("n_planes = (self.synthetic_dim - self.SLOT_BASE) // 2")
|
|
928
|
+
self._emit("if n_planes <= 0:")
|
|
929
|
+
self._indent += 1
|
|
930
|
+
self._emit("raise RuntimeError(")
|
|
931
|
+
self._indent += 1
|
|
932
|
+
self._emit('"synthetic subspace has no room for slot planes; "')
|
|
933
|
+
self._emit('"increase synthetic_dim or SLOT_BASE budget")')
|
|
934
|
+
self._indent -= 1
|
|
935
|
+
self._indent -= 1
|
|
936
|
+
self._emit("s = int(slot_idx) % n_planes")
|
|
937
|
+
self._emit("base = self.semantic_dim + self.SLOT_BASE + 2 * s")
|
|
938
|
+
self._emit("return (base, base + 1)")
|
|
939
|
+
self._indent -= 1
|
|
940
|
+
self._emit()
|
|
941
|
+
self._emit("def slot_store(self, state, slot_idx, scalar):")
|
|
942
|
+
self._indent += 1
|
|
943
|
+
self._emit('"""Write scalar to slot slot_idx. Overwrites the slot\'s plane.')
|
|
944
|
+
self._emit('')
|
|
945
|
+
self._emit("The scalar lives on the real leg of the slot\'s 2D plane; the")
|
|
946
|
+
self._emit("imaginary leg is zeroed. A subsequent slot_load returns the scalar")
|
|
947
|
+
self._emit("exactly. This is the reversible-imperative-state primitive: a")
|
|
948
|
+
self._emit("variable assignment = one slot_store; the inverse is slot_store of")
|
|
949
|
+
self._emit("the previous value. State outside the slot\'s plane is unchanged.")
|
|
950
|
+
self._emit('"""')
|
|
951
|
+
self._emit("i, j = self._slot_plane(slot_idx)")
|
|
952
|
+
self._emit("new = state.copy() if hasattr(state, 'copy') else _np.asarray(state).copy()")
|
|
953
|
+
self._emit("new[i] = float(scalar)")
|
|
954
|
+
self._emit("new[j] = 0.0")
|
|
955
|
+
self._emit("return new")
|
|
956
|
+
self._indent -= 1
|
|
957
|
+
self._emit()
|
|
958
|
+
self._emit("def slot_load(self, state, slot_idx):")
|
|
959
|
+
self._indent += 1
|
|
960
|
+
self._emit('"""Read the scalar stored at slot slot_idx (the real leg).')
|
|
961
|
+
self._emit('')
|
|
962
|
+
self._emit("Returns a substrate scalar (numpy 0-dim from state[i]) rather than a")
|
|
963
|
+
self._emit("Python float — downstream arithmetic stays in substrate land. Other")
|
|
964
|
+
self._emit("slots and semantic content do not contribute; the disjoint-plane")
|
|
965
|
+
self._emit("allocation makes this a projection, not a noisy readout.")
|
|
966
|
+
self._emit('"""')
|
|
967
|
+
self._emit("i, _j = self._slot_plane(slot_idx)")
|
|
968
|
+
self._emit("return state[i]")
|
|
969
|
+
self._indent -= 1
|
|
970
|
+
self._emit()
|
|
971
|
+
self._emit("def rotate_slot(self, state, slot_idx, angle):")
|
|
972
|
+
self._indent += 1
|
|
973
|
+
self._emit('"""Apply a 2D Givens rotation by `angle` in slot slot_idx\'s plane.')
|
|
974
|
+
self._emit('')
|
|
975
|
+
self._emit("Pure rotation in the slot\'s 2D plane; content in other slots, in")
|
|
976
|
+
self._emit("canonical axes, and in the semantic block is untouched. The")
|
|
977
|
+
self._emit("inverse is rotate_slot(state, slot_idx, -angle); applying rotate")
|
|
978
|
+
self._emit("forward and backward on any sequence returns to the starting state")
|
|
979
|
+
self._emit("within floating-point roundoff (validated empirically 2026-04-24,")
|
|
980
|
+
self._emit("100-op sequence: 6e-16 roundtrip error).")
|
|
981
|
+
self._emit('"""')
|
|
982
|
+
self._emit("i, j = self._slot_plane(slot_idx)")
|
|
983
|
+
self._emit("c, s = _np.cos(float(angle)), _np.sin(float(angle))")
|
|
984
|
+
self._emit("new = state.copy() if hasattr(state, 'copy') else _np.asarray(state).copy()")
|
|
985
|
+
self._emit("xi, xj = state[i], state[j]")
|
|
986
|
+
self._emit("new[i] = c * xi - s * xj")
|
|
987
|
+
self._emit("new[j] = s * xi + c * xj")
|
|
988
|
+
self._emit("return new")
|
|
989
|
+
self._indent -= 1
|
|
990
|
+
self._emit()
|
|
991
|
+
self._emit("def similarity(self, a, b):")
|
|
992
|
+
self._indent += 1
|
|
993
|
+
self._emit("na = _np.linalg.norm(a)")
|
|
994
|
+
self._emit("nb = _np.linalg.norm(b)")
|
|
995
|
+
self._emit("# eps-guarded divide — zero-norm case evaluates to 0 without branch.")
|
|
996
|
+
self._emit("return float(_np.dot(a, b) / (na * nb + _np.finfo(_np.float64).tiny))")
|
|
997
|
+
self._indent -= 1
|
|
998
|
+
self._emit()
|
|
999
|
+
# General-purpose tensor operations on vectors and matrices.
|
|
1000
|
+
# The Sutra language exposes these via the `Tensor` namespace
|
|
1001
|
+
# (stdlib/tensor.su):
|
|
1002
|
+
# Tensor.MatrixMul(A, B) -> _VSA.matmul
|
|
1003
|
+
# Tensor.TensorProduct(a, b) -> _VSA.tensor_product
|
|
1004
|
+
# Tensor.Outer(a, b) -> _VSA.outer
|
|
1005
|
+
# Tensor.Dot(a, b) -> _VSA.dot (scalar)
|
|
1006
|
+
# Tensor.Transpose(M) -> _VSA.transpose
|
|
1007
|
+
# Each is a thin wrapper over numpy; the linear-algebra
|
|
1008
|
+
# behavior is whatever numpy does. These are general
|
|
1009
|
+
# tensor-algebra primitives, not VSA primitives — bind /
|
|
1010
|
+
# unbind / bundle remain the canonical VSA operations.
|
|
1011
|
+
self._emit("def matmul(self, a, b):")
|
|
1012
|
+
self._indent += 1
|
|
1013
|
+
self._emit('"""Matrix multiplication (numpy `a @ b`). Works on 1-D, 2-D, or higher-rank arrays per numpy semantics."""')
|
|
1014
|
+
self._emit("return _np.matmul(a, b)")
|
|
1015
|
+
self._indent -= 1
|
|
1016
|
+
self._emit()
|
|
1017
|
+
self._emit("def tensor_product(self, a, b):")
|
|
1018
|
+
self._indent += 1
|
|
1019
|
+
self._emit('"""Tensor / Kronecker product (numpy `kron`)."""')
|
|
1020
|
+
self._emit("return _np.kron(a, b)")
|
|
1021
|
+
self._indent -= 1
|
|
1022
|
+
self._emit()
|
|
1023
|
+
self._emit("def outer(self, a, b):")
|
|
1024
|
+
self._indent += 1
|
|
1025
|
+
self._emit('"""Vector outer product → rank-2 array."""')
|
|
1026
|
+
self._emit("return _np.outer(a, b)")
|
|
1027
|
+
self._indent -= 1
|
|
1028
|
+
self._emit()
|
|
1029
|
+
self._emit("def dot(self, a, b):")
|
|
1030
|
+
self._indent += 1
|
|
1031
|
+
self._emit('"""Inner / dot product → scalar."""')
|
|
1032
|
+
self._emit("return float(_np.dot(a, b))")
|
|
1033
|
+
self._indent -= 1
|
|
1034
|
+
self._emit()
|
|
1035
|
+
self._emit("def transpose(self, m):")
|
|
1036
|
+
self._indent += 1
|
|
1037
|
+
self._emit('"""Transpose. For 1-D inputs, returns the input unchanged (numpy convention)."""')
|
|
1038
|
+
self._emit("return _np.transpose(m)")
|
|
1039
|
+
self._indent -= 1
|
|
1040
|
+
self._emit()
|
|
1041
|
+
self._emit("def norm(self, v):")
|
|
1042
|
+
self._indent += 1
|
|
1043
|
+
self._emit('"""L2 norm. Scalar result."""')
|
|
1044
|
+
self._emit("return float(_np.linalg.norm(v))")
|
|
1045
|
+
self._indent -= 1
|
|
1046
|
+
self._emit()
|
|
1047
|
+
self._emit("def normalize(self, v):")
|
|
1048
|
+
self._indent += 1
|
|
1049
|
+
self._emit('"""L2-normalize with an eps-guard so zero-norm input returns zero."""')
|
|
1050
|
+
self._emit("n = _np.linalg.norm(v)")
|
|
1051
|
+
self._emit("return v / (n + _np.finfo(_np.float64).tiny)")
|
|
1052
|
+
self._indent -= 1
|
|
1053
|
+
self._emit()
|
|
1054
|
+
self._emit("def rotation_for(self, role):")
|
|
1055
|
+
self._indent += 1
|
|
1056
|
+
self._emit('"""Cached Haar-random orthogonal rotation matrix for the role vector."""')
|
|
1057
|
+
self._emit("return self._rotation_for(role)")
|
|
1058
|
+
self._indent -= 1
|
|
1059
|
+
self._emit()
|
|
1060
|
+
# PascalCase aliases — the preferred Sutra-side spelling.
|
|
1061
|
+
# Bound at the class level so `_VSA.MatrixMul(a, b)` resolves
|
|
1062
|
+
# via Python's descriptor protocol and binds self correctly.
|
|
1063
|
+
self._emit("MatrixMul = matmul")
|
|
1064
|
+
self._emit("TensorProduct = tensor_product")
|
|
1065
|
+
self._emit("Outer = outer")
|
|
1066
|
+
self._emit("Dot = dot")
|
|
1067
|
+
self._emit("Transpose = transpose")
|
|
1068
|
+
self._emit("Norm = norm")
|
|
1069
|
+
self._emit("Normalize = normalize")
|
|
1070
|
+
self._emit("RotationFor = rotation_for")
|
|
1071
|
+
self._emit()
|
|
1072
|
+
self._emit("# ---- Vector component accessors (debugging / teaching) ----")
|
|
1073
|
+
self._emit("#")
|
|
1074
|
+
self._emit("# Lowered from the surface-level method calls `v.component(i)`,")
|
|
1075
|
+
self._emit("# `v.semantic(i)`, `v.synthetic(i)`. Zero-indexed. Return a Python")
|
|
1076
|
+
self._emit("# float so the value can be printed, compared, or fed back into")
|
|
1077
|
+
self._emit("# Sutra as a scalar. Not part of the substrate's algebra — these")
|
|
1078
|
+
self._emit("# only exist to make the [semantic | synthetic] layout legible.")
|
|
1079
|
+
self._emit()
|
|
1080
|
+
self._emit("def component(self, v, i):")
|
|
1081
|
+
self._indent += 1
|
|
1082
|
+
self._emit('"""Return element i of v over the full extended state vector."""')
|
|
1083
|
+
self._emit("return float(v[int(i)])")
|
|
1084
|
+
self._indent -= 1
|
|
1085
|
+
self._emit()
|
|
1086
|
+
self._emit("def semantic(self, v, i):")
|
|
1087
|
+
self._indent += 1
|
|
1088
|
+
self._emit('"""Return element i of v within the semantic block (0..semantic_dim).')
|
|
1089
|
+
self._emit('')
|
|
1090
|
+
self._emit("Equivalent to `v.component(i)` while i < semantic_dim, but named")
|
|
1091
|
+
self._emit("so the reader can see which subspace is being addressed.")
|
|
1092
|
+
self._emit('"""')
|
|
1093
|
+
self._emit("idx = int(i)")
|
|
1094
|
+
self._emit("if idx < 0 or idx >= self.semantic_dim:")
|
|
1095
|
+
self._indent += 1
|
|
1096
|
+
self._emit("raise IndexError(")
|
|
1097
|
+
self._indent += 1
|
|
1098
|
+
self._emit('f"semantic index {idx} out of range [0, {self.semantic_dim})")')
|
|
1099
|
+
self._indent -= 1
|
|
1100
|
+
self._indent -= 1
|
|
1101
|
+
self._emit("return float(v[idx])")
|
|
1102
|
+
self._indent -= 1
|
|
1103
|
+
self._emit()
|
|
1104
|
+
self._emit("def synthetic(self, v, i):")
|
|
1105
|
+
self._indent += 1
|
|
1106
|
+
self._emit('"""Return element i of v within the synthetic block (0..synthetic_dim).')
|
|
1107
|
+
self._emit('')
|
|
1108
|
+
self._emit("Equivalent to `v.component(semantic_dim + i)` — the synthetic block")
|
|
1109
|
+
self._emit("starts right after the semantic block in the extended state vector.")
|
|
1110
|
+
self._emit("Iterating `i` from 0 to synthetic_dim-1 walks the reserved")
|
|
1111
|
+
self._emit("computational-state slots.")
|
|
1112
|
+
self._emit('"""')
|
|
1113
|
+
self._emit("idx = int(i)")
|
|
1114
|
+
self._emit("if idx < 0 or idx >= self.synthetic_dim:")
|
|
1115
|
+
self._indent += 1
|
|
1116
|
+
self._emit("raise IndexError(")
|
|
1117
|
+
self._indent += 1
|
|
1118
|
+
self._emit('f"synthetic index {idx} out of range [0, {self.synthetic_dim})")')
|
|
1119
|
+
self._indent -= 1
|
|
1120
|
+
self._indent -= 1
|
|
1121
|
+
self._emit("return float(v[self.semantic_dim + idx])")
|
|
1122
|
+
self._indent -= 1
|
|
1123
|
+
self._emit()
|
|
1124
|
+
self._emit("# ---- Canonical synthetic-axis allocation ----")
|
|
1125
|
+
self._emit("#")
|
|
1126
|
+
self._emit("# First five synthetic axes have designated semantics (per")
|
|
1127
|
+
self._emit("# 2026-04-23 + 2026-04-30 design; see")
|
|
1128
|
+
self._emit("# planning/findings/2026-04-21-extended-state-and-rotation-binding.md):")
|
|
1129
|
+
self._emit("#")
|
|
1130
|
+
self._emit("# synthetic[0] = real component of a number")
|
|
1131
|
+
self._emit("# synthetic[1] = imaginary component of a number")
|
|
1132
|
+
self._emit("# synthetic[2] = truth axis (higher = more true)")
|
|
1133
|
+
self._emit("# synthetic[3] = char-vs-int discriminator flag")
|
|
1134
|
+
self._emit("# synthetic[4] = loop-completion flag (0 = not done, 1 = converged)")
|
|
1135
|
+
self._emit("#")
|
|
1136
|
+
self._emit("# Pinning the allocation to named class attributes so the layout")
|
|
1137
|
+
self._emit("# is legible at runtime and from the REPL.")
|
|
1138
|
+
self._emit("#")
|
|
1139
|
+
self._emit("# AXIS_LOOP_DONE is the substrate-side completion flag set by the")
|
|
1140
|
+
self._emit("# RNN-style branchless loop. It carries the cumulative soft-halt")
|
|
1141
|
+
self._emit("# value (in [0, 1]); programs that read this can detect non-")
|
|
1142
|
+
self._emit("# convergence without host-side conditionals. Output-gating multiplies")
|
|
1143
|
+
self._emit("# value axes by this flag so an incomplete loop emits a zero-vector.")
|
|
1144
|
+
self._emit("# Same shape as the broader exception-channel pattern used for")
|
|
1145
|
+
self._emit("# divide-by-zero and NaN propagation elsewhere in the runtime.")
|
|
1146
|
+
self._emit("AXIS_REAL = 0")
|
|
1147
|
+
self._emit("AXIS_IMAG = 1")
|
|
1148
|
+
self._emit("AXIS_TRUTH = 2")
|
|
1149
|
+
self._emit("AXIS_CHAR_FLAG = 3")
|
|
1150
|
+
self._emit("AXIS_LOOP_DONE = 4")
|
|
1151
|
+
self._emit()
|
|
1152
|
+
self._emit("def real(self, v):")
|
|
1153
|
+
self._indent += 1
|
|
1154
|
+
self._emit('"""Real component of v — synthetic[AXIS_REAL]."""')
|
|
1155
|
+
self._emit("return float(v[self.semantic_dim + self.AXIS_REAL])")
|
|
1156
|
+
self._indent -= 1
|
|
1157
|
+
self._emit()
|
|
1158
|
+
self._emit("def imag(self, v):")
|
|
1159
|
+
self._indent += 1
|
|
1160
|
+
self._emit('"""Imaginary component of v — synthetic[AXIS_IMAG].')
|
|
1161
|
+
self._emit('')
|
|
1162
|
+
self._emit("Zero for a purely real number; nonzero for complex. Sutra's")
|
|
1163
|
+
self._emit("commitment is first-class complex numbers sharing the allocator")
|
|
1164
|
+
self._emit("with int/float — a complex number is just a vector with both")
|
|
1165
|
+
self._emit("the real and imaginary synthetic axes populated.")
|
|
1166
|
+
self._emit('"""')
|
|
1167
|
+
self._emit("return float(v[self.semantic_dim + self.AXIS_IMAG])")
|
|
1168
|
+
self._indent -= 1
|
|
1169
|
+
self._emit()
|
|
1170
|
+
self._emit("def truth(self, v):")
|
|
1171
|
+
self._indent += 1
|
|
1172
|
+
self._emit('"""Truth value carried by v — synthetic[AXIS_TRUTH].')
|
|
1173
|
+
self._emit('')
|
|
1174
|
+
self._emit("Higher scalar → more true; lower (including negative) → more")
|
|
1175
|
+
self._emit("false. Orthogonal to semantic content and to the real/imag")
|
|
1176
|
+
self._emit("axes by construction, so a number's value does not bleed into")
|
|
1177
|
+
self._emit("its truth and vice versa.")
|
|
1178
|
+
self._emit('"""')
|
|
1179
|
+
self._emit("return float(v[self.semantic_dim + self.AXIS_TRUTH])")
|
|
1180
|
+
self._indent -= 1
|
|
1181
|
+
self._emit()
|
|
1182
|
+
self._emit("def make_real(self, x):")
|
|
1183
|
+
self._indent += 1
|
|
1184
|
+
self._emit('"""Extended-state vector carrying x at synthetic[AXIS_REAL].')
|
|
1185
|
+
self._emit('')
|
|
1186
|
+
self._emit("The rest of the vector is zero — no semantic content, no")
|
|
1187
|
+
self._emit("imaginary component, no truth. Analog of a bare float or int")
|
|
1188
|
+
self._emit("literal in the Sutra runtime.")
|
|
1189
|
+
self._emit('"""')
|
|
1190
|
+
self._emit("v = _np.zeros(self.dim, dtype=_np.float64)")
|
|
1191
|
+
self._emit("v[self.semantic_dim + self.AXIS_REAL] = float(x)")
|
|
1192
|
+
self._emit("return v")
|
|
1193
|
+
self._indent -= 1
|
|
1194
|
+
self._emit()
|
|
1195
|
+
self._emit("def make_complex(self, re, im):")
|
|
1196
|
+
self._indent += 1
|
|
1197
|
+
self._emit('"""Extended-state vector carrying (re, im) on the real/imag axes.')
|
|
1198
|
+
self._emit('')
|
|
1199
|
+
self._emit("A complex number is a vector with synthetic[0] = Re(z) and")
|
|
1200
|
+
self._emit("synthetic[1] = Im(z). No separate wrapper type, no parallel")
|
|
1201
|
+
self._emit("storage — the extended state vector carries the whole number.")
|
|
1202
|
+
self._emit('"""')
|
|
1203
|
+
self._emit("v = _np.zeros(self.dim, dtype=_np.float64)")
|
|
1204
|
+
self._emit("v[self.semantic_dim + self.AXIS_REAL] = float(re)")
|
|
1205
|
+
self._emit("v[self.semantic_dim + self.AXIS_IMAG] = float(im)")
|
|
1206
|
+
self._emit("return v")
|
|
1207
|
+
self._indent -= 1
|
|
1208
|
+
self._emit()
|
|
1209
|
+
self._emit("# Three cached matrices for the pure-matmul complex-product form:")
|
|
1210
|
+
self._emit("# _swap_ri — swaps real and imag axes, zeroes elsewhere")
|
|
1211
|
+
self._emit("# _cm_real — picks (input.REAL − input.IMAG) into REAL slot")
|
|
1212
|
+
self._emit("# _cm_imag — picks (input.REAL + input.IMAG) into IMAG slot")
|
|
1213
|
+
self._emit("# Combined with one element-wise multiply, they compute the")
|
|
1214
|
+
self._emit("# complex product with no scalar extraction — preserving the")
|
|
1215
|
+
self._emit("# invariant that matrix operations stay matrix operations all")
|
|
1216
|
+
self._emit("# the way down, so chains of complex multiplications can be")
|
|
1217
|
+
self._emit("# compile-time-fused into a single matrix.")
|
|
1218
|
+
self._emit()
|
|
1219
|
+
self._emit("def _swap_ri_matrix(self):")
|
|
1220
|
+
self._indent += 1
|
|
1221
|
+
self._emit("if not hasattr(self, '_swap_ri_cache') or self._swap_ri_cache is None:")
|
|
1222
|
+
self._indent += 1
|
|
1223
|
+
self._emit("M = _np.zeros((self.dim, self.dim), dtype=_np.float64)")
|
|
1224
|
+
self._emit("r = self.semantic_dim + self.AXIS_REAL")
|
|
1225
|
+
self._emit("i = self.semantic_dim + self.AXIS_IMAG")
|
|
1226
|
+
self._emit("M[r, i] = 1.0; M[i, r] = 1.0")
|
|
1227
|
+
self._emit("self._swap_ri_cache = M")
|
|
1228
|
+
self._indent -= 1
|
|
1229
|
+
self._emit("return self._swap_ri_cache")
|
|
1230
|
+
self._indent -= 1
|
|
1231
|
+
self._emit()
|
|
1232
|
+
self._emit("def _cm_real_matrix(self):")
|
|
1233
|
+
self._indent += 1
|
|
1234
|
+
self._emit("if not hasattr(self, '_cm_real_cache') or self._cm_real_cache is None:")
|
|
1235
|
+
self._indent += 1
|
|
1236
|
+
self._emit("M = _np.zeros((self.dim, self.dim), dtype=_np.float64)")
|
|
1237
|
+
self._emit("r = self.semantic_dim + self.AXIS_REAL")
|
|
1238
|
+
self._emit("i = self.semantic_dim + self.AXIS_IMAG")
|
|
1239
|
+
self._emit("M[r, r] = 1.0; M[r, i] = -1.0")
|
|
1240
|
+
self._emit("self._cm_real_cache = M")
|
|
1241
|
+
self._indent -= 1
|
|
1242
|
+
self._emit("return self._cm_real_cache")
|
|
1243
|
+
self._indent -= 1
|
|
1244
|
+
self._emit()
|
|
1245
|
+
self._emit("def _cm_imag_matrix(self):")
|
|
1246
|
+
self._indent += 1
|
|
1247
|
+
self._emit("if not hasattr(self, '_cm_imag_cache') or self._cm_imag_cache is None:")
|
|
1248
|
+
self._indent += 1
|
|
1249
|
+
self._emit("M = _np.zeros((self.dim, self.dim), dtype=_np.float64)")
|
|
1250
|
+
self._emit("r = self.semantic_dim + self.AXIS_REAL")
|
|
1251
|
+
self._emit("i = self.semantic_dim + self.AXIS_IMAG")
|
|
1252
|
+
self._emit("M[i, r] = 1.0; M[i, i] = 1.0")
|
|
1253
|
+
self._emit("self._cm_imag_cache = M")
|
|
1254
|
+
self._indent -= 1
|
|
1255
|
+
self._emit("return self._cm_imag_cache")
|
|
1256
|
+
self._indent -= 1
|
|
1257
|
+
self._emit()
|
|
1258
|
+
self._emit("def complex_mul(self, a, b):")
|
|
1259
|
+
self._indent += 1
|
|
1260
|
+
self._emit('"""Complex multiplication via pure matmul + element-wise.')
|
|
1261
|
+
self._emit('')
|
|
1262
|
+
self._emit("Given a = (r1 + i1·i) and b = (r2 + i2·i) encoded as vectors")
|
|
1263
|
+
self._emit("with their scalar parts on the real/imag axes, the complex")
|
|
1264
|
+
self._emit("product is:")
|
|
1265
|
+
self._emit("")
|
|
1266
|
+
self._emit(" c = _cm_real @ (a ⊙ b) + _cm_imag @ ((_swap_ri @ a) ⊙ b)")
|
|
1267
|
+
self._emit("")
|
|
1268
|
+
self._emit("where ⊙ is element-wise multiply. No scalar extraction; the")
|
|
1269
|
+
self._emit("operation stays in vector space throughout, so a compile-time")
|
|
1270
|
+
self._emit("simplifier can fuse chains of complex multiplications (by")
|
|
1271
|
+
self._emit("constants) into a single cached matrix.")
|
|
1272
|
+
self._emit('"""')
|
|
1273
|
+
self._emit("av = self._as_complex_vector(a)")
|
|
1274
|
+
self._emit("bv = self._as_complex_vector(b)")
|
|
1275
|
+
self._emit("ab = av * bv")
|
|
1276
|
+
self._emit("swapped_ab = (self._swap_ri_matrix() @ av) * bv")
|
|
1277
|
+
self._emit("return self._cm_real_matrix() @ ab + self._cm_imag_matrix() @ swapped_ab")
|
|
1278
|
+
self._indent -= 1
|
|
1279
|
+
self._emit()
|
|
1280
|
+
self._emit("def _as_complex_vector(self, x):")
|
|
1281
|
+
self._indent += 1
|
|
1282
|
+
self._emit('"""Coerce a Python scalar / vector to complex-plane form."""')
|
|
1283
|
+
self._emit("if isinstance(x, _np.ndarray):")
|
|
1284
|
+
self._indent += 1
|
|
1285
|
+
self._emit("return x")
|
|
1286
|
+
self._indent -= 1
|
|
1287
|
+
self._emit("if isinstance(x, bool):")
|
|
1288
|
+
self._indent += 1
|
|
1289
|
+
self._emit("return self.make_real(1.0 if x else 0.0)")
|
|
1290
|
+
self._indent -= 1
|
|
1291
|
+
self._emit("return self.make_real(float(x))")
|
|
1292
|
+
self._indent -= 1
|
|
1293
|
+
self._emit()
|
|
1294
|
+
self._emit("def make_truth(self, t):")
|
|
1295
|
+
self._indent += 1
|
|
1296
|
+
self._emit('"""Extended-state vector carrying truth value t at synthetic[AXIS_TRUTH]."""')
|
|
1297
|
+
self._emit("v = _np.zeros(self.dim, dtype=_np.float64)")
|
|
1298
|
+
self._emit("v[self.semantic_dim + self.AXIS_TRUTH] = float(t)")
|
|
1299
|
+
self._emit("return v")
|
|
1300
|
+
self._indent -= 1
|
|
1301
|
+
self._emit()
|
|
1302
|
+
self._emit("def make_char(self, codepoint):")
|
|
1303
|
+
self._indent += 1
|
|
1304
|
+
self._emit('"""Extended-state vector for a character literal.')
|
|
1305
|
+
self._emit('')
|
|
1306
|
+
self._emit("Unicode code point at synthetic[AXIS_REAL] (same slot as")
|
|
1307
|
+
self._emit("int/float); synthetic[AXIS_CHAR_FLAG] set to 1.0 to")
|
|
1308
|
+
self._emit("distinguish `'a'` (97 with flag) from `97` (97 without).")
|
|
1309
|
+
self._emit("Arithmetic on chars works the same as on ints — both")
|
|
1310
|
+
self._emit("live on the number axis. Downstream code that cares")
|
|
1311
|
+
self._emit("about the distinction can read the flag via `is_char`.")
|
|
1312
|
+
self._emit('"""')
|
|
1313
|
+
self._emit("v = _np.zeros(self.dim, dtype=_np.float64)")
|
|
1314
|
+
self._emit("v[self.semantic_dim + self.AXIS_REAL] = float(codepoint)")
|
|
1315
|
+
self._emit("v[self.semantic_dim + self.AXIS_CHAR_FLAG] = 1.0")
|
|
1316
|
+
self._emit("return v")
|
|
1317
|
+
self._indent -= 1
|
|
1318
|
+
self._emit()
|
|
1319
|
+
self._emit("def is_char(self, v):")
|
|
1320
|
+
self._indent += 1
|
|
1321
|
+
self._emit('"""True iff v was produced as a character literal."""')
|
|
1322
|
+
self._emit("return bool(v[self.semantic_dim + self.AXIS_CHAR_FLAG] >= 0.5)")
|
|
1323
|
+
self._indent -= 1
|
|
1324
|
+
self._emit()
|
|
1325
|
+
self._emit("def make_trit(self, t):")
|
|
1326
|
+
self._indent += 1
|
|
1327
|
+
self._emit('"""Three-valued primitive class allocated on the truth axis.')
|
|
1328
|
+
self._emit('')
|
|
1329
|
+
self._emit("Shares storage with `make_truth` — a trit is a truth-axis")
|
|
1330
|
+
self._emit("scalar, same as a fuzzy. The difference is compile-time: trit")
|
|
1331
|
+
self._emit("values polarize to {-1, 0, +1} under `defuzzify_trit`, whereas")
|
|
1332
|
+
self._emit("fuzzy values polarize to {-1, +1}. Use `trit` when the")
|
|
1333
|
+
self._emit('"explicitly neutral" case is a first-class meaning you want')
|
|
1334
|
+
self._emit("the defuzzifier to preserve, rather than collapse to a pole.")
|
|
1335
|
+
self._emit('"""')
|
|
1336
|
+
self._emit("return self.make_truth(t)")
|
|
1337
|
+
self._indent -= 1
|
|
1338
|
+
self._emit()
|
|
1339
|
+
self._emit("def defuzzify_trit(self, v, iters=10, beta=2.0):")
|
|
1340
|
+
self._indent += 1
|
|
1341
|
+
self._emit('"""Three-way differentiable polarizer toward {-1, 0, +1}.')
|
|
1342
|
+
self._emit('')
|
|
1343
|
+
self._emit("Softmax over exp(-β · (x - pole)²) with poles at -1, 0, +1;")
|
|
1344
|
+
self._emit("output is the weighted-mean position. As β grows the weight")
|
|
1345
|
+
self._emit("concentrates on the nearest pole, so iterating with β doubling")
|
|
1346
|
+
self._emit("each pass sharpens toward a pole without ever binarizing. The")
|
|
1347
|
+
self._emit("output stays in [-1, +1] and differentiable — no hard commit.")
|
|
1348
|
+
self._emit('')
|
|
1349
|
+
self._emit("Semantic mirror of the binary `defuzzify` but with the neutral")
|
|
1350
|
+
self._emit("point preserved as a first-class attractor. A trit near zero")
|
|
1351
|
+
self._emit("stays near zero; a trit biased toward one of the poles sharpens")
|
|
1352
|
+
self._emit("toward that pole.")
|
|
1353
|
+
self._emit('"""')
|
|
1354
|
+
self._emit("x = float(v[self.semantic_dim + self.AXIS_TRUTH])")
|
|
1355
|
+
self._emit("b = float(beta)")
|
|
1356
|
+
self._emit("for _ in range(int(iters)):")
|
|
1357
|
+
self._indent += 1
|
|
1358
|
+
self._emit("w_neg = _np.exp(-b * (x + 1.0) ** 2)")
|
|
1359
|
+
self._emit("w_zero = _np.exp(-b * x ** 2)")
|
|
1360
|
+
self._emit("w_pos = _np.exp(-b * (x - 1.0) ** 2)")
|
|
1361
|
+
self._emit("s = w_neg + w_zero + w_pos")
|
|
1362
|
+
self._emit("x = float((-w_neg + w_pos) / s)")
|
|
1363
|
+
self._emit("b *= 2.0")
|
|
1364
|
+
self._indent -= 1
|
|
1365
|
+
self._emit("out = v.copy()")
|
|
1366
|
+
self._emit("out[self.semantic_dim + self.AXIS_TRUTH] = x")
|
|
1367
|
+
self._emit("return out")
|
|
1368
|
+
self._indent -= 1
|
|
1369
|
+
self._emit()
|
|
1370
|
+
|
|
1371
|
+
self._emit("# ---- Logical operators — smooth polynomial form ----")
|
|
1372
|
+
self._emit("#")
|
|
1373
|
+
self._emit("# min and max expressed as degree-4 polynomials derived by")
|
|
1374
|
+
self._emit("# Lagrange interpolation on the three-valued grid")
|
|
1375
|
+
self._emit("# {-1, 0, +1}². Exact on the grid (all 9 points match min /")
|
|
1376
|
+
self._emit("# max) and C^∞ everywhere — no |.|, no kink, no subgradient")
|
|
1377
|
+
self._emit("# dispatch. Compile-time simplification passes can apply")
|
|
1378
|
+
self._emit("# standard polynomial rewrites without special-casing the")
|
|
1379
|
+
self._emit("# absolute-value branches.")
|
|
1380
|
+
self._emit("#")
|
|
1381
|
+
self._emit("# min(a, b) = (a + b + ab - a² - b² + a²b²) / 2 elem-wise")
|
|
1382
|
+
self._emit("# max(a, b) = (a + b - ab + a² + b² - a²b²) / 2 elem-wise")
|
|
1383
|
+
self._emit("# not(x) = -x elem-wise")
|
|
1384
|
+
self._emit("#")
|
|
1385
|
+
self._emit("# Identities on {-1, 0, +1}:")
|
|
1386
|
+
self._emit("# min(1, 1) = 1 min(0, 0) = 0 min(-1, -1) = -1")
|
|
1387
|
+
self._emit("# min(1, -1) = -1 min(1, 0) = 0 min(-1, 0) = -1")
|
|
1388
|
+
self._emit("# (max is the symmetric mirror — swap sign on the odd terms.)")
|
|
1389
|
+
self._emit("#")
|
|
1390
|
+
self._emit("# For continuous fuzzy values in (-1, +1) these are polynomial")
|
|
1391
|
+
self._emit("# approximations to true min / max rather than exact equals —")
|
|
1392
|
+
self._emit("# e.g. min(0.7, 0.3) → 0.342 vs true 0.3. Functional")
|
|
1393
|
+
self._emit("# completeness on the three-valued set holds regardless.")
|
|
1394
|
+
self._emit("#")
|
|
1395
|
+
self._emit("# `true` and `false` are vectors too — the _bool_literal_src")
|
|
1396
|
+
self._emit("# override emits make_truth(±1) for bool literals, so the")
|
|
1397
|
+
self._emit("# entire numpy demo path is vector-native.")
|
|
1398
|
+
self._emit("#")
|
|
1399
|
+
self._emit("# Unlike JavaScript / TypeScript / C#, these do NOT short-")
|
|
1400
|
+
self._emit("# circuit — both sides evaluate because min / max need both.")
|
|
1401
|
+
self._emit()
|
|
1402
|
+
self._emit("def _as_truth_vector(self, x):")
|
|
1403
|
+
self._indent += 1
|
|
1404
|
+
self._emit('"""Return x as a vector. Already-a-vector passes through;')
|
|
1405
|
+
self._emit("a Python scalar / bool is lifted to make_truth(scalar).")
|
|
1406
|
+
self._emit('"""')
|
|
1407
|
+
self._emit("if isinstance(x, _np.ndarray):")
|
|
1408
|
+
self._indent += 1
|
|
1409
|
+
self._emit("return x")
|
|
1410
|
+
self._indent -= 1
|
|
1411
|
+
self._emit("if isinstance(x, bool):")
|
|
1412
|
+
self._indent += 1
|
|
1413
|
+
self._emit("return self.make_truth(1.0 if x else -1.0)")
|
|
1414
|
+
self._indent -= 1
|
|
1415
|
+
self._emit("return self.make_truth(float(x))")
|
|
1416
|
+
self._indent -= 1
|
|
1417
|
+
self._emit()
|
|
1418
|
+
# logical_and / logical_or / logical_not runtime methods were
|
|
1419
|
+
# deleted in v0.3 step 4. The operator-lowering pass in
|
|
1420
|
+
# `inliner.py` rewrites `&&`, `||`, `!` as Call nodes targeting
|
|
1421
|
+
# the stdlib `logical_and` / `logical_or` / `logical_not`
|
|
1422
|
+
# functions defined in `stdlib/logic.su`, and the inliner
|
|
1423
|
+
# expands those to the inline polynomial forms before codegen
|
|
1424
|
+
# runs. No runtime method is needed.
|
|
1425
|
+
|
|
1426
|
+
self._emit("# ---- Ordered comparison — differentiable, no predicate ----")
|
|
1427
|
+
self._emit("#")
|
|
1428
|
+
self._emit("# `>`, `<`, `>=`, `<=` operate on number-family values by")
|
|
1429
|
+
self._emit("# projecting onto the real axis and applying a steep tanh")
|
|
1430
|
+
self._emit("# to the difference. Pure tensor arithmetic — no componentwise")
|
|
1431
|
+
self._emit("# predicate, no branch, differentiable everywhere. The steep")
|
|
1432
|
+
self._emit("# slope at zero means integer differences saturate at ±1;")
|
|
1433
|
+
self._emit("# near-ties get a smoothly-varying truth value.")
|
|
1434
|
+
self._emit("#")
|
|
1435
|
+
self._emit("# Pipeline:")
|
|
1436
|
+
self._emit("# diff = a - b element-wise vec sub")
|
|
1437
|
+
self._emit("# diff_r = _real_projector @ diff matmul projection")
|
|
1438
|
+
self._emit("# signed = tanh(CMP_SLOPE * diff_r) componentwise smooth sign")
|
|
1439
|
+
self._emit("# result = _truth_from_real @ signed matmul placement")
|
|
1440
|
+
self._emit("#")
|
|
1441
|
+
self._emit("# Strict (`>`, `<`) and non-strict (`>=`, `<=`) collapse on")
|
|
1442
|
+
self._emit("# this scheme — the tie case gives tanh(0) = 0 in all four.")
|
|
1443
|
+
self._emit("# Programs that need to distinguish strict from tie compose")
|
|
1444
|
+
self._emit("# with `==` (cosine similarity, crisp on identical operands).")
|
|
1445
|
+
self._emit("#")
|
|
1446
|
+
self._emit("# Truth-family operands (bool, fuzzy, trit) are rejected at")
|
|
1447
|
+
self._emit("# codegen time — ordered comparison has no natural meaning")
|
|
1448
|
+
self._emit("# on the truth axis. Custom classes can override.")
|
|
1449
|
+
self._emit()
|
|
1450
|
+
self._emit("def _real_projector(self):")
|
|
1451
|
+
self._indent += 1
|
|
1452
|
+
self._emit('"""Diagonal dim×dim projector onto the real axis. Cached."""')
|
|
1453
|
+
self._emit("if not hasattr(self, '_real_proj_cache') or self._real_proj_cache is None:")
|
|
1454
|
+
self._indent += 1
|
|
1455
|
+
self._emit("M = _np.zeros((self.dim, self.dim), dtype=_np.float64)")
|
|
1456
|
+
self._emit("idx = self.semantic_dim + self.AXIS_REAL")
|
|
1457
|
+
self._emit("M[idx, idx] = 1.0")
|
|
1458
|
+
self._emit("self._real_proj_cache = M")
|
|
1459
|
+
self._indent -= 1
|
|
1460
|
+
self._emit("return self._real_proj_cache")
|
|
1461
|
+
self._indent -= 1
|
|
1462
|
+
self._emit()
|
|
1463
|
+
self._emit("def _truth_from_real(self):")
|
|
1464
|
+
self._indent += 1
|
|
1465
|
+
self._emit('"""Matrix that moves the real-axis entry to the truth axis.')
|
|
1466
|
+
self._emit('')
|
|
1467
|
+
self._emit("Has a single nonzero entry: M[TRUTH, REAL] = 1. Applied to")
|
|
1468
|
+
self._emit("a vector with content only at the real axis (the post-sign")
|
|
1469
|
+
self._emit("result from a comparison), it places that content at the")
|
|
1470
|
+
self._emit("truth axis and zeros everywhere else.")
|
|
1471
|
+
self._emit('"""')
|
|
1472
|
+
self._emit("if not hasattr(self, '_t_from_r_cache') or self._t_from_r_cache is None:")
|
|
1473
|
+
self._indent += 1
|
|
1474
|
+
self._emit("M = _np.zeros((self.dim, self.dim), dtype=_np.float64)")
|
|
1475
|
+
self._emit("M[self.semantic_dim + self.AXIS_TRUTH,")
|
|
1476
|
+
self._indent += 1
|
|
1477
|
+
self._emit("self.semantic_dim + self.AXIS_REAL] = 1.0")
|
|
1478
|
+
self._indent -= 1
|
|
1479
|
+
self._emit("self._t_from_r_cache = M")
|
|
1480
|
+
self._indent -= 1
|
|
1481
|
+
self._emit("return self._t_from_r_cache")
|
|
1482
|
+
self._indent -= 1
|
|
1483
|
+
self._emit()
|
|
1484
|
+
self._emit("# Slope on the tanh — high enough that integer differences")
|
|
1485
|
+
self._emit("# saturate (tanh(100) ≈ 1 to double precision), near-zero")
|
|
1486
|
+
self._emit("# differences traverse the smooth region.")
|
|
1487
|
+
self._emit("CMP_SLOPE = 100.0")
|
|
1488
|
+
self._emit()
|
|
1489
|
+
self._emit("def gt(self, a, b):")
|
|
1490
|
+
self._indent += 1
|
|
1491
|
+
self._emit('"""a > b — differentiable smooth sign on the real-axis difference."""')
|
|
1492
|
+
self._emit("av = self._as_complex_vector(a)")
|
|
1493
|
+
self._emit("bv = self._as_complex_vector(b)")
|
|
1494
|
+
self._emit("diff_r = self._real_projector() @ (av - bv)")
|
|
1495
|
+
self._emit("signed = _np.tanh(self.CMP_SLOPE * diff_r)")
|
|
1496
|
+
self._emit("return self._truth_from_real() @ signed")
|
|
1497
|
+
self._indent -= 1
|
|
1498
|
+
self._emit()
|
|
1499
|
+
# lt / ge / le runtime methods were deleted in v0.3 step 4.
|
|
1500
|
+
# The operator-lowering pass rewrites `<`, `<=`, `>=` as
|
|
1501
|
+
# Call nodes targeting stdlib `lt` / `ge` / `le`, and the
|
|
1502
|
+
# inliner expands them to `b > a`, `a > b`, `b > a` before
|
|
1503
|
+
# codegen — `gt` stays as the single runtime method for the
|
|
1504
|
+
# comparison family until gt's own stdlib body unblocks.
|
|
1505
|
+
|
|
1506
|
+
self._emit("# ---- Equality and inequality — vector cosine similarity ----")
|
|
1507
|
+
self._emit("#")
|
|
1508
|
+
self._emit("# a == b produces a truth-axis vector whose truth coordinate")
|
|
1509
|
+
self._emit("# is cos(a, b). Identical vectors → truth +1 (true); opposite")
|
|
1510
|
+
self._emit("# vectors → truth -1 (false); orthogonal vectors → truth 0")
|
|
1511
|
+
self._emit("# (unknown). Differentiable almost everywhere — the only")
|
|
1512
|
+
self._emit("# singularity is at a zero input vector, which we guard with")
|
|
1513
|
+
self._emit("# an explicit fallback to truth 0.")
|
|
1514
|
+
self._emit("#")
|
|
1515
|
+
self._emit("# The reduction (dot product + norms) is the natural shape of")
|
|
1516
|
+
self._emit("# the semantic question — 'how similar are these two vectors'")
|
|
1517
|
+
self._emit("# — not a scalar-extraction cheat on top of what should have")
|
|
1518
|
+
self._emit("# been a vector op. The math lives in vector arithmetic up to")
|
|
1519
|
+
self._emit("# the reduction, then places the answer on the truth axis.")
|
|
1520
|
+
self._emit()
|
|
1521
|
+
self._emit("def eq(self, a, b):")
|
|
1522
|
+
self._indent += 1
|
|
1523
|
+
self._emit('"""Vector equality — cosine similarity projected onto truth axis.')
|
|
1524
|
+
self._emit('')
|
|
1525
|
+
self._emit("Pure tensor ops: dot products (matmul), sqrt (tensor), add,")
|
|
1526
|
+
self._emit("divide. An eps is added to the denominator so the zero-norm")
|
|
1527
|
+
self._emit("case evaluates to 0/eps = 0 (the neutral) without a predicate.")
|
|
1528
|
+
self._emit('"""')
|
|
1529
|
+
self._emit("av = self._as_any_vector(a)")
|
|
1530
|
+
self._emit("bv = self._as_any_vector(b)")
|
|
1531
|
+
self._emit("na = _np.sqrt(_np.dot(av, av))")
|
|
1532
|
+
self._emit("nb = _np.sqrt(_np.dot(bv, bv))")
|
|
1533
|
+
self._emit("# tiny eps (~2.2e-308) guards the divide without branching;")
|
|
1534
|
+
self._emit("# at normal norms it's lost in roundoff, at zero norms it")
|
|
1535
|
+
self._emit("# makes the result exactly 0 (neutral).")
|
|
1536
|
+
self._emit("return self.make_truth(float(_np.dot(av, bv) / (na * nb + _np.finfo(_np.float64).tiny)))")
|
|
1537
|
+
self._indent -= 1
|
|
1538
|
+
self._emit()
|
|
1539
|
+
# neq runtime method was deleted in v0.3 step 4. `!=` lowers
|
|
1540
|
+
# to Call(neq, ...) which inlines to `!(a == b)`; the `!` then
|
|
1541
|
+
# lowers to Call(logical_not, ...) and inlines to `0 - _`.
|
|
1542
|
+
# Final form: `0 - _VSA.eq(a, b)`. No runtime method needed.
|
|
1543
|
+
|
|
1544
|
+
self._emit("def _as_any_vector(self, x):")
|
|
1545
|
+
self._indent += 1
|
|
1546
|
+
self._emit('"""Coerce any runtime value to a d-dim vector for comparison.')
|
|
1547
|
+
self._emit('')
|
|
1548
|
+
self._emit("Vectors pass through. Bool → make_truth(±1). Other scalars →")
|
|
1549
|
+
self._emit("make_real(x) (on the number axis, not the truth axis — the")
|
|
1550
|
+
self._emit("semantic question 'is 3 == 3.0' is about the number, not the")
|
|
1551
|
+
self._emit("truth value). A string falls back to embed() so `s == embed`")
|
|
1552
|
+
self._emit("works consistently.")
|
|
1553
|
+
self._emit('"""')
|
|
1554
|
+
self._emit("if isinstance(x, _np.ndarray):")
|
|
1555
|
+
self._indent += 1
|
|
1556
|
+
self._emit("return x")
|
|
1557
|
+
self._indent -= 1
|
|
1558
|
+
self._emit("if isinstance(x, bool):")
|
|
1559
|
+
self._indent += 1
|
|
1560
|
+
self._emit("return self.make_truth(1.0 if x else -1.0)")
|
|
1561
|
+
self._indent -= 1
|
|
1562
|
+
self._emit("if isinstance(x, (int, float)):")
|
|
1563
|
+
self._indent += 1
|
|
1564
|
+
self._emit("return self.make_real(float(x))")
|
|
1565
|
+
self._indent -= 1
|
|
1566
|
+
self._emit("if isinstance(x, str):")
|
|
1567
|
+
self._indent += 1
|
|
1568
|
+
self._emit("return self.embed(x)")
|
|
1569
|
+
self._indent -= 1
|
|
1570
|
+
self._emit("raise TypeError(f'cannot coerce {type(x).__name__} to a vector for comparison')")
|
|
1571
|
+
self._indent -= 1
|
|
1572
|
+
self._emit()
|
|
1573
|
+
self._emit("# ---- Defuzzification — matrix projection + iterated eq ----")
|
|
1574
|
+
self._emit("#")
|
|
1575
|
+
self._emit("# defuzzify(x, iters=10):")
|
|
1576
|
+
self._emit("# 1. Matrix-multiply by the truth-axis projector — a dim×dim")
|
|
1577
|
+
self._emit("# diagonal matrix with a single 1 at the truth axis.")
|
|
1578
|
+
self._emit("# Zeroes every other coordinate, including real/imag/")
|
|
1579
|
+
self._emit("# semantic. Non-truth-axis inputs (int, semantic")
|
|
1580
|
+
self._emit("# vector, char, etc.) go to truth=0 → unknown.")
|
|
1581
|
+
self._emit("# 2. Iterate `f = f == true` N times. Under cosine equality")
|
|
1582
|
+
self._emit("# on a truth-axis vector this snaps to ±1 in one pass if")
|
|
1583
|
+
self._emit("# truth≠0, or stays at 0 (the zero-norm guard in eq)")
|
|
1584
|
+
self._emit("# if truth==0. The iteration is kept at 10 for the")
|
|
1585
|
+
self._emit("# user-specified semantics — even though one pass is")
|
|
1586
|
+
self._emit("# enough mathematically, the loop is the definition.")
|
|
1587
|
+
self._emit("#")
|
|
1588
|
+
self._emit("# Output is a truth-axis vector — a three-valued bool. Identical")
|
|
1589
|
+
self._emit("# inputs of type bool/fuzzy/trit will defuzzify to true, false,")
|
|
1590
|
+
self._emit("# or unknown depending on the sign of their truth coordinate.")
|
|
1591
|
+
self._emit()
|
|
1592
|
+
self._emit("def _truth_projector(self):")
|
|
1593
|
+
self._indent += 1
|
|
1594
|
+
self._emit('"""Diagonal dim×dim projector onto the truth axis. Cached."""')
|
|
1595
|
+
self._emit("if not hasattr(self, '_truth_proj_cache') or self._truth_proj_cache is None:")
|
|
1596
|
+
self._indent += 1
|
|
1597
|
+
self._emit("M = _np.zeros((self.dim, self.dim), dtype=_np.float64)")
|
|
1598
|
+
self._emit("idx = self.semantic_dim + self.AXIS_TRUTH")
|
|
1599
|
+
self._emit("M[idx, idx] = 1.0")
|
|
1600
|
+
self._emit("self._truth_proj_cache = M")
|
|
1601
|
+
self._indent -= 1
|
|
1602
|
+
self._emit("return self._truth_proj_cache")
|
|
1603
|
+
self._indent -= 1
|
|
1604
|
+
self._emit()
|
|
1605
|
+
# defuzzify runtime method was deleted in v0.3 step 4. The
|
|
1606
|
+
# `defuzzy(x)` source form is expanded inline by
|
|
1607
|
+
# `_defuzzy_expr_src` above into ten nested `_VSA.eq(...)`
|
|
1608
|
+
# calls wrapping the truth-axis projection of the input —
|
|
1609
|
+
# matching the stdlib definition in `stdlib/logic.su`.
|
|
1610
|
+
|
|
1611
|
+
self._emit("def make_random_rotation(self, angle, n_planes=1, seed=None):")
|
|
1612
|
+
self._indent += 1
|
|
1613
|
+
self._emit('"""Block-diagonal Haar rotation, scaled so its largest eigenphase ~= angle.')
|
|
1614
|
+
self._emit('')
|
|
1615
|
+
self._emit('Haar-uniform in the semantic block, identity in the synthetic')
|
|
1616
|
+
self._emit('block — matches the binding-rotation layout so eigenrotation')
|
|
1617
|
+
self._emit('loops walk the semantic subspace while the synthetic subspace')
|
|
1618
|
+
self._emit('stays untouched.')
|
|
1619
|
+
self._emit('')
|
|
1620
|
+
self._emit('Uniform-angle Givens composition makes every plane orbit at the')
|
|
1621
|
+
self._emit('same frequency, so any trajectory is near-periodic and never')
|
|
1622
|
+
self._emit('explores the hypersphere. A Haar-random orthogonal matrix has a')
|
|
1623
|
+
self._emit('spectrum of eigenphases and produces quasi-periodic trajectories')
|
|
1624
|
+
self._emit('that actually sample the sphere. `angle` and `n_planes` are kept')
|
|
1625
|
+
self._emit('in the signature for cross-backend API compatibility.')
|
|
1626
|
+
self._emit('"""')
|
|
1627
|
+
self._emit("rng = _np.random.RandomState(seed if seed is not None else self.seed)")
|
|
1628
|
+
self._emit("A = rng.randn(self.semantic_dim, self.semantic_dim)")
|
|
1629
|
+
self._emit("Q_sem, _ = _np.linalg.qr(A)")
|
|
1630
|
+
self._emit("# Fractional matrix power via eigendecomposition so the caller")
|
|
1631
|
+
self._emit("# can still dial rotation magnitude via `angle`. Q^(angle/pi)")
|
|
1632
|
+
self._emit("# interpolates between identity (angle=0) and full Q (angle=pi).")
|
|
1633
|
+
self._emit("w, V = _np.linalg.eig(Q_sem)")
|
|
1634
|
+
self._emit("phases = _np.angle(w) * (angle / _np.pi)")
|
|
1635
|
+
self._emit("R_sem = _np.real((V * _np.exp(1j * phases)) @ _np.linalg.inv(V))")
|
|
1636
|
+
self._emit("R = _np.eye(self.dim, dtype=_np.float64)")
|
|
1637
|
+
self._emit("R[:self.semantic_dim, :self.semantic_dim] = R_sem")
|
|
1638
|
+
self._emit("return R")
|
|
1639
|
+
self._indent -= 1
|
|
1640
|
+
self._emit()
|
|
1641
|
+
self._emit("def compile_prototypes(self, prototype_vectors, frame_seed=None):")
|
|
1642
|
+
self._indent += 1
|
|
1643
|
+
self._emit('"""Pass-through on the numpy substrate: no KC sparsification here."""')
|
|
1644
|
+
self._emit("return dict(prototype_vectors)")
|
|
1645
|
+
self._indent -= 1
|
|
1646
|
+
self._emit()
|
|
1647
|
+
self._emit("def _step(self, state, R, target, halted, k, threshold, eps=1e-12):")
|
|
1648
|
+
self._indent += 1
|
|
1649
|
+
self._emit('"""RNN cell: one branchless eigenrotation step with soft halt.')
|
|
1650
|
+
self._emit('')
|
|
1651
|
+
self._emit("Pure tensor ops — multiply, add, divide, exp, minimum. No `if`, no")
|
|
1652
|
+
self._emit("control flow. The soft halt indicator (sigmoid) plus monotone")
|
|
1653
|
+
self._emit("cumulative halt (clamped at 1) freezes state once convergence is")
|
|
1654
|
+
self._emit("reached, without any host-side branch.")
|
|
1655
|
+
self._emit('"""')
|
|
1656
|
+
self._emit("cand = R @ state")
|
|
1657
|
+
self._emit("cand = cand / (_np.linalg.norm(cand) + eps)")
|
|
1658
|
+
self._emit("sim = _np.dot(cand, target) / (_np.linalg.norm(target) + eps)")
|
|
1659
|
+
self._emit("halt = 1.0 / (1.0 + _np.exp(-k * (sim - threshold)))")
|
|
1660
|
+
self._emit("halted = _np.minimum(halted + halt, 1.0)")
|
|
1661
|
+
self._emit("state = (1.0 - halted) * cand + halted * state")
|
|
1662
|
+
self._emit("return state, halted")
|
|
1663
|
+
self._indent -= 1
|
|
1664
|
+
self._emit()
|
|
1665
|
+
self._emit("def loop(self, initial_state, rotation, compiled_prototypes,")
|
|
1666
|
+
self._indent += 1
|
|
1667
|
+
self._emit("target_name=None, threshold=0.5, max_iters=50, k=20.0, frame_seed=None):")
|
|
1668
|
+
self._emit('"""Branchless RNN-style eigenrotation loop.')
|
|
1669
|
+
self._emit('')
|
|
1670
|
+
self._emit("Runs `max_iters` cell steps unconditionally — no early exit, no")
|
|
1671
|
+
self._emit("host-side `for iters` count, no `if best_score >= threshold`. Soft")
|
|
1672
|
+
self._emit("halt freezes state once convergence; output gating zeroes value")
|
|
1673
|
+
self._emit("axes if convergence never fires (incomplete output → exception")
|
|
1674
|
+
self._emit('channel via AXIS_LOOP_DONE).')
|
|
1675
|
+
self._emit('')
|
|
1676
|
+
self._emit("Returns (target_name, state, iters_est) where iters_est is a tensor")
|
|
1677
|
+
self._emit("scalar approximating the step at which convergence happened.")
|
|
1678
|
+
self._emit('"""')
|
|
1679
|
+
self._emit("state = initial_state.copy()")
|
|
1680
|
+
self._emit("halted = 0.0")
|
|
1681
|
+
self._emit("# iters_active accumulates (1 - halted) each step — counts the steps")
|
|
1682
|
+
self._emit("# the cell was 'active' (not yet saturated). Approximates the step at")
|
|
1683
|
+
self._emit("# which convergence happened, as a tensor scalar (no Python int counter).")
|
|
1684
|
+
self._emit("iters_active = 0.0")
|
|
1685
|
+
self._emit("# Pick the target: named target if provided, else the single proto.")
|
|
1686
|
+
self._emit("if target_name is not None:")
|
|
1687
|
+
self._indent += 1
|
|
1688
|
+
self._emit("target = compiled_prototypes[target_name]")
|
|
1689
|
+
self._indent -= 1
|
|
1690
|
+
self._emit("else:")
|
|
1691
|
+
self._indent += 1
|
|
1692
|
+
self._emit("target = next(iter(compiled_prototypes.values()))")
|
|
1693
|
+
self._indent -= 1
|
|
1694
|
+
self._emit("# T-step unroll. The Python `for` is meta-iteration over compile-")
|
|
1695
|
+
self._emit("# time-fixed steps; each iteration is a tensor-op cell with no")
|
|
1696
|
+
self._emit("# data-dependent branches.")
|
|
1697
|
+
self._emit("for _t in range(max_iters):")
|
|
1698
|
+
self._indent += 1
|
|
1699
|
+
self._emit("iters_active = iters_active + (1.0 - float(halted))")
|
|
1700
|
+
self._emit("state, halted = self._step(state, rotation, target, halted, k, threshold)")
|
|
1701
|
+
self._indent -= 1
|
|
1702
|
+
self._emit("# Output gating: multiply value-bearing axes by halted so an")
|
|
1703
|
+
self._emit("# incomplete loop emits a near-zero output. AXIS_LOOP_DONE itself")
|
|
1704
|
+
self._emit("# carries the cumulative halt as a tensor scalar for downstream")
|
|
1705
|
+
self._emit("# code that wants to read the convergence confidence.")
|
|
1706
|
+
self._emit("gated = state * float(halted)")
|
|
1707
|
+
self._emit("gated[self.semantic_dim + self.AXIS_LOOP_DONE] = float(halted)")
|
|
1708
|
+
self._emit("return target_name, gated, iters_active")
|
|
1709
|
+
self._indent -= 1
|
|
1710
|
+
self._indent -= 1
|
|
1711
|
+
self._emit()
|
|
1712
|
+
self._emit()
|
|
1713
|
+
self._emit(
|
|
1714
|
+
f"_VSA = _NumpyVSA("
|
|
1715
|
+
f"semantic_dim={self._semantic_dim}, "
|
|
1716
|
+
f"synthetic_dim={self._synthetic_dim}, "
|
|
1717
|
+
f"seed={self.runtime_seed}, "
|
|
1718
|
+
f"llm_model={self._llm_model!r})"
|
|
1719
|
+
)
|
|
1720
|
+
# Batched pre-fetch of every basis_vector("...") string argument
|
|
1721
|
+
# the program uses. One Ollama round-trip instead of N sequential
|
|
1722
|
+
# ones. Collected by the simplify pass (see translate_module).
|
|
1723
|
+
if self._prefetch_strings:
|
|
1724
|
+
self._emit(f"_VSA.embed_batch({self._prefetch_strings!r})")
|
|
1725
|
+
self._emit()
|
|
1726
|
+
self._emit()
|
|
1727
|
+
self._emit("def _argmax_cosine(query, candidates):")
|
|
1728
|
+
self._indent += 1
|
|
1729
|
+
self._emit('"""Candidate with the largest cosine similarity to query.')
|
|
1730
|
+
self._emit('')
|
|
1731
|
+
self._emit("Vectorized: stacks `candidates` into a (N, d) matrix and")
|
|
1732
|
+
self._emit("computes all N cosines in a single matmul. Equivalent to the")
|
|
1733
|
+
self._emit("old Python for-loop over _VSA.similarity, but ~Nx faster on")
|
|
1734
|
+
self._emit("CPU and the shape the PyTorch/GPU backend will reuse without")
|
|
1735
|
+
self._emit("any further rewriting. N small-kernel launches becomes 1 big one.")
|
|
1736
|
+
self._emit('"""')
|
|
1737
|
+
self._emit("if not candidates:")
|
|
1738
|
+
self._indent += 1
|
|
1739
|
+
self._emit("return None")
|
|
1740
|
+
self._indent -= 1
|
|
1741
|
+
self._emit("M = _np.stack([_np.asarray(c, dtype=_np.float64) for c in candidates])")
|
|
1742
|
+
self._emit("q = _np.asarray(query, dtype=_np.float64)")
|
|
1743
|
+
self._emit("row_norms = _np.linalg.norm(M, axis=1)")
|
|
1744
|
+
self._emit("q_norm = _np.linalg.norm(q)")
|
|
1745
|
+
self._emit("if q_norm == 0:")
|
|
1746
|
+
self._indent += 1
|
|
1747
|
+
self._emit("return candidates[0]")
|
|
1748
|
+
self._indent -= 1
|
|
1749
|
+
self._emit("safe_rn = _np.where(row_norms > 0, row_norms, 1.0)")
|
|
1750
|
+
self._emit("scores = (M @ q) / (safe_rn * q_norm)")
|
|
1751
|
+
self._emit("scores = _np.where(row_norms > 0, scores, -_np.inf)")
|
|
1752
|
+
self._emit("return candidates[int(_np.argmax(scores))]")
|
|
1753
|
+
self._indent -= 1
|
|
1754
|
+
self._emit()
|
|
1755
|
+
self._emit()
|
|
1756
|
+
self._emit_select_helper()
|
|
1757
|
+
self._emit()
|
|
1758
|
+
self._emit("def _vector_map_lookup(pairs, key):")
|
|
1759
|
+
self._indent += 1
|
|
1760
|
+
self._emit('"""Identity-first lookup for vector-keyed maps, cosine fallback.')
|
|
1761
|
+
self._emit('')
|
|
1762
|
+
self._emit("Identity-hit short-circuits before any matmul (the common case")
|
|
1763
|
+
self._emit("for literal vector keys). The cosine fallback stacks and matmuls.")
|
|
1764
|
+
self._emit('"""')
|
|
1765
|
+
self._emit("for k, v in pairs:")
|
|
1766
|
+
self._indent += 1
|
|
1767
|
+
self._emit("if k is key:")
|
|
1768
|
+
self._indent += 1
|
|
1769
|
+
self._emit("return v")
|
|
1770
|
+
self._indent -= 1
|
|
1771
|
+
self._indent -= 1
|
|
1772
|
+
self._emit("if not pairs:")
|
|
1773
|
+
self._indent += 1
|
|
1774
|
+
self._emit("return None")
|
|
1775
|
+
self._indent -= 1
|
|
1776
|
+
self._emit("keys = _np.stack([_np.asarray(k, dtype=_np.float64) for k, _ in pairs])")
|
|
1777
|
+
self._emit("q = _np.asarray(key, dtype=_np.float64)")
|
|
1778
|
+
self._emit("row_norms = _np.linalg.norm(keys, axis=1)")
|
|
1779
|
+
self._emit("q_norm = _np.linalg.norm(q)")
|
|
1780
|
+
self._emit("if q_norm == 0:")
|
|
1781
|
+
self._indent += 1
|
|
1782
|
+
self._emit("return pairs[0][1]")
|
|
1783
|
+
self._indent -= 1
|
|
1784
|
+
self._emit("safe_rn = _np.where(row_norms > 0, row_norms, 1.0)")
|
|
1785
|
+
self._emit("scores = (keys @ q) / (safe_rn * q_norm)")
|
|
1786
|
+
self._emit("scores = _np.where(row_norms > 0, scores, -_np.inf)")
|
|
1787
|
+
self._emit("return pairs[int(_np.argmax(scores))][1]")
|
|
1788
|
+
self._indent -= 1
|
|
1789
|
+
|
|
1790
|
+
|
|
1791
|
+
def translate_module(module: ast.Module, **kwargs) -> str:
|
|
1792
|
+
"""Translate a parsed Sutra module to a self-contained Python module.
|
|
1793
|
+
|
|
1794
|
+
Runs the simplification pass over the AST before handing to the
|
|
1795
|
+
codegen so identity rewrites (bundle(v) -> v, bundle flattening)
|
|
1796
|
+
happen in source-to-source form rather than in the emitted
|
|
1797
|
+
Python. Also collects every `basis_vector("...")` string literal
|
|
1798
|
+
so the codegen can emit a batched Ollama pre-fetch at module init
|
|
1799
|
+
(N HTTP round-trips collapse into one batched embed call).
|
|
1800
|
+
"""
|
|
1801
|
+
from .simplify import simplify_module, collect_basis_vector_strings
|
|
1802
|
+
from .inliner import inline_stdlib_calls
|
|
1803
|
+
# Inline stdlib calls first — the inlined polynomial bodies then
|
|
1804
|
+
# go through simplify's arithmetic constant folding / zero
|
|
1805
|
+
# absorption, which can fold parts of the inlined form.
|
|
1806
|
+
inline_stdlib_calls(module)
|
|
1807
|
+
simplify_module(module)
|
|
1808
|
+
strings = collect_basis_vector_strings(module)
|
|
1809
|
+
cg = Codegen(**kwargs)
|
|
1810
|
+
cg._prefetch_strings = strings
|
|
1811
|
+
return cg.translate(module)
|