spiralthink-core 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- encoder/__init__.py +39 -0
- encoder/baseline.py +205 -0
- reference/__init__.py +38 -0
- reference/decoder.py +92 -0
- spiral_format/__init__.py +49 -0
- spiral_format/codec.py +133 -0
- spiral_format/constants.py +55 -0
- spiral_format/container.py +354 -0
- spiral_format/errors.py +43 -0
- spiral_format/io.py +325 -0
- spiral_format/report.py +132 -0
- spiralthink_core-0.9.0.dist-info/METADATA +128 -0
- spiralthink_core-0.9.0.dist-info/RECORD +15 -0
- spiralthink_core-0.9.0.dist-info/WHEEL +5 -0
- spiralthink_core-0.9.0.dist-info/top_level.txt +3 -0
encoder/__init__.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# SpiralThink — Baseline encoder package (L1)
|
|
3
|
+
# (c) 2026 pfreig-art. Apache License 2.0.
|
|
4
|
+
"""
|
|
5
|
+
L1 baseline encoder package.
|
|
6
|
+
|
|
7
|
+
Re-exports the baseline (CPU-only) encoder API so downstream code can do:
|
|
8
|
+
|
|
9
|
+
from encoder import Params, encode, coord_descent, hamming
|
|
10
|
+
|
|
11
|
+
The baseline encoder searches a 4-parameter helical SpiralProgram by random
|
|
12
|
+
restart + coordinate descent on Hamming distance, then records the XOR residual
|
|
13
|
+
to guarantee bit-exact (zero-error) reconstruction.
|
|
14
|
+
|
|
15
|
+
This package is deliberately pure-Python + stdlib. GPU experiments live under
|
|
16
|
+
`encoder.experimental` (not yet present); the industrial encoder lives under
|
|
17
|
+
the proprietary L2 (`spiralcore/`).
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from .baseline import (
|
|
22
|
+
Params,
|
|
23
|
+
coord_descent,
|
|
24
|
+
encode,
|
|
25
|
+
encode_from_params,
|
|
26
|
+
encode_with_params,
|
|
27
|
+
hamming,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"Params",
|
|
32
|
+
"coord_descent",
|
|
33
|
+
"encode",
|
|
34
|
+
"encode_from_params",
|
|
35
|
+
"encode_with_params",
|
|
36
|
+
"hamming",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
__version__ = "0.9.0"
|
encoder/baseline.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# SpiralThink — Baseline Encoder (L1)
|
|
3
|
+
# (c) 2026 pfreig-art. Apache License 2.0.
|
|
4
|
+
"""
|
|
5
|
+
Baseline encoder: searches a SpiralProgram pi = (a, b, c, d) parametrizing
|
|
6
|
+
the canonical 4-parameter helix family
|
|
7
|
+
|
|
8
|
+
r(t) = a
|
|
9
|
+
theta(t) = b * t
|
|
10
|
+
z(t) = c * t
|
|
11
|
+
phi(x,y,z) = floor((x + a) * d) mod 256
|
|
12
|
+
|
|
13
|
+
over a target byte stream x. We optimize (a, b, c, d) by random restart +
|
|
14
|
+
coordinate descent on Hamming distance. After convergence, residual bytes
|
|
15
|
+
are recorded in pi.residual to guarantee zero-error reconstruction.
|
|
16
|
+
|
|
17
|
+
This is a *baseline* encoder — deliberately simple, CPU only, no learned
|
|
18
|
+
priors, no GPU. The L2 industrial encoder (SpiralCore™) lives in the
|
|
19
|
+
proprietary tree and uses neural priors + ASIC-friendly kernels.
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import math
|
|
24
|
+
import random
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
|
|
27
|
+
from reference import SpiralProgram, decode, verify
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class Params:
|
|
32
|
+
a: float
|
|
33
|
+
b: float
|
|
34
|
+
c: float
|
|
35
|
+
d: float
|
|
36
|
+
|
|
37
|
+
def to_program(self, n: int) -> SpiralProgram:
|
|
38
|
+
# Closure builders for the 4-parameter helix family. We use nested defs
|
|
39
|
+
# (not lambdas) so mypy --strict can infer signatures without resorting
|
|
40
|
+
# to # type: ignore. The captured-by-default-arg pattern is preserved
|
|
41
|
+
# to keep each closure independent of mutable outer state.
|
|
42
|
+
a, b, c, d = self.a, self.b, self.c, self.d
|
|
43
|
+
|
|
44
|
+
def r_fn(t: float, a: float = a) -> float:
|
|
45
|
+
return a
|
|
46
|
+
|
|
47
|
+
def theta_fn(t: float, b: float = b) -> float:
|
|
48
|
+
return b * t
|
|
49
|
+
|
|
50
|
+
def z_fn(t: float, c: float = c) -> float:
|
|
51
|
+
return c * t
|
|
52
|
+
|
|
53
|
+
def phi_fn(
|
|
54
|
+
x: float, y: float, z: float, a: float = a, d: float = d
|
|
55
|
+
) -> int:
|
|
56
|
+
return int((x + a) * d) & 0xFF
|
|
57
|
+
|
|
58
|
+
return SpiralProgram(
|
|
59
|
+
r=r_fn,
|
|
60
|
+
theta=theta_fn,
|
|
61
|
+
z=z_fn,
|
|
62
|
+
phi=phi_fn,
|
|
63
|
+
n=n,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def hamming(a: bytes, b: bytes) -> int:
|
|
68
|
+
# zip(strict=False) is intentional: the length-mismatch term `abs(len(a)-len(b))`
|
|
69
|
+
# below counts the unpaired tail bytes as full mismatches. Using strict=True
|
|
70
|
+
# would forbid the common case where decode() output is shorter than the target.
|
|
71
|
+
return sum(x ^ y != 0 for x, y in zip(a, b, strict=False)) + abs(len(a) - len(b))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def coord_descent(
|
|
75
|
+
x: bytes,
|
|
76
|
+
p: Params,
|
|
77
|
+
*,
|
|
78
|
+
steps: int = 200,
|
|
79
|
+
sigma: float = 0.05,
|
|
80
|
+
rng: random.Random | None = None,
|
|
81
|
+
) -> Params:
|
|
82
|
+
rng = rng or random.Random(0xC0FFEE)
|
|
83
|
+
best = p
|
|
84
|
+
best_err = hamming(decode(best.to_program(len(x))), x)
|
|
85
|
+
for _ in range(steps):
|
|
86
|
+
cand = Params(
|
|
87
|
+
a=best.a + rng.gauss(0, sigma),
|
|
88
|
+
b=best.b + rng.gauss(0, sigma * 0.1),
|
|
89
|
+
c=best.c + rng.gauss(0, sigma * 0.1),
|
|
90
|
+
d=max(1.0, best.d + rng.gauss(0, sigma)),
|
|
91
|
+
)
|
|
92
|
+
err = hamming(decode(cand.to_program(len(x))), x)
|
|
93
|
+
if err < best_err:
|
|
94
|
+
best, best_err = cand, err
|
|
95
|
+
if err == 0:
|
|
96
|
+
break
|
|
97
|
+
return best
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _finalize_with_residual(
|
|
101
|
+
x: bytes, params: Params, label: str
|
|
102
|
+
) -> SpiralProgram:
|
|
103
|
+
"""Common finalization: build pi from params, compute residual, optimize.
|
|
104
|
+
|
|
105
|
+
If the helix-only decode already equals ``x`` the residual is all zeros
|
|
106
|
+
and we replace it with ``b""``. The reference decoder skips the residual
|
|
107
|
+
pass entirely on empty bytes (see ``reference.decode``), so this is a
|
|
108
|
+
pure semantic-preserving optimization that lets ``pi.bitlen()`` stay
|
|
109
|
+
constant in ``len(x)`` for inputs the helix family fits exactly. This
|
|
110
|
+
is the regime in which the algorithmic ratio ``rho_pi`` grows with n.
|
|
111
|
+
"""
|
|
112
|
+
pi_no_residual = params.to_program(len(x))
|
|
113
|
+
y = decode(pi_no_residual)
|
|
114
|
+
# strict=True validates the invariant that the helix-only decode produces
|
|
115
|
+
# exactly len(x) bytes for an n=len(x) SpiralProgram. A length mismatch here
|
|
116
|
+
# would be a reference-decoder bug; fail loudly rather than silently truncate.
|
|
117
|
+
raw_residual = bytes(a ^ b for a, b in zip(x, y, strict=True))
|
|
118
|
+
residual = raw_residual if any(raw_residual) else b""
|
|
119
|
+
pi = SpiralProgram(
|
|
120
|
+
r=pi_no_residual.r, theta=pi_no_residual.theta,
|
|
121
|
+
z=pi_no_residual.z, phi=pi_no_residual.phi,
|
|
122
|
+
n=pi_no_residual.n, residual=residual,
|
|
123
|
+
)
|
|
124
|
+
assert verify(pi, x), f"{label} encoder failed zero-error invariant"
|
|
125
|
+
return pi
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def encode_from_params(
|
|
129
|
+
x: bytes,
|
|
130
|
+
params: Params,
|
|
131
|
+
) -> tuple[SpiralProgram, Params]:
|
|
132
|
+
"""Oracle encode: skip the search and use the caller-supplied ``params``.
|
|
133
|
+
|
|
134
|
+
This is useful when the caller already knows (or can cheaply compute) a
|
|
135
|
+
near-optimal helix fit — e.g. for synthetic structured inputs used in
|
|
136
|
+
scaling benchmarks, or for an offline pre-fit pipeline that hands the
|
|
137
|
+
fitted params to the L1 encoder for finalization.
|
|
138
|
+
|
|
139
|
+
The XOR residual is still computed and applied, so the zero-error
|
|
140
|
+
invariant holds regardless of how good ``params`` actually are. With an
|
|
141
|
+
exact fit the residual is ``b""`` and ``pi.bitlen()`` is constant in
|
|
142
|
+
``len(x)`` — this is the regime where the algorithmic ratio
|
|
143
|
+
``rho_pi = 8 * n / pi.bitlen()`` diverges with n.
|
|
144
|
+
"""
|
|
145
|
+
pi = _finalize_with_residual(x, params, label="oracle")
|
|
146
|
+
return pi, params
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def encode_with_params(
|
|
150
|
+
x: bytes,
|
|
151
|
+
*,
|
|
152
|
+
restarts: int = 8,
|
|
153
|
+
steps_per_restart: int = 200,
|
|
154
|
+
seed: int = 0xC0FFEE,
|
|
155
|
+
) -> tuple[SpiralProgram, Params]:
|
|
156
|
+
"""Same as ``encode`` but also returns the underlying 4-parameter ``Params``.
|
|
157
|
+
|
|
158
|
+
The container layer (``spiral_format``) needs the raw ``(a, b, c, d)`` to
|
|
159
|
+
serialize the program without pickling closures. The ``SpiralProgram``
|
|
160
|
+
return value carries the residual; the ``Params`` value carries the helix
|
|
161
|
+
family parameters.
|
|
162
|
+
"""
|
|
163
|
+
rng = random.Random(seed)
|
|
164
|
+
best: Params | None = None
|
|
165
|
+
best_err = math.inf
|
|
166
|
+
for _ in range(restarts):
|
|
167
|
+
p0 = Params(
|
|
168
|
+
a=rng.uniform(0.5, 1.5),
|
|
169
|
+
b=rng.uniform(0.05, 0.2),
|
|
170
|
+
c=rng.uniform(0.005, 0.02),
|
|
171
|
+
d=rng.uniform(64, 192),
|
|
172
|
+
)
|
|
173
|
+
p = coord_descent(x, p0, steps=steps_per_restart, rng=rng)
|
|
174
|
+
err = hamming(decode(p.to_program(len(x))), x)
|
|
175
|
+
if err < best_err:
|
|
176
|
+
best, best_err = p, err
|
|
177
|
+
if err == 0:
|
|
178
|
+
break
|
|
179
|
+
assert best is not None
|
|
180
|
+
pi = _finalize_with_residual(x, best, label="baseline")
|
|
181
|
+
return pi, best
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def encode(
|
|
185
|
+
x: bytes,
|
|
186
|
+
*,
|
|
187
|
+
restarts: int = 8,
|
|
188
|
+
steps_per_restart: int = 200,
|
|
189
|
+
seed: int = 0xC0FFEE,
|
|
190
|
+
) -> SpiralProgram:
|
|
191
|
+
"""Search for pi minimizing Hamming(U(pi), x); finalize with residual XOR."""
|
|
192
|
+
pi, _ = encode_with_params(
|
|
193
|
+
x,
|
|
194
|
+
restarts=restarts,
|
|
195
|
+
steps_per_restart=steps_per_restart,
|
|
196
|
+
seed=seed,
|
|
197
|
+
)
|
|
198
|
+
return pi
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
if __name__ == "__main__":
|
|
202
|
+
target = bytes((i * 7 + 13) & 0xFF for i in range(2048))
|
|
203
|
+
pi = encode(target, restarts=4, steps_per_restart=100)
|
|
204
|
+
print(f"|x| = {len(target)} B, |pi| ~= {pi.bitlen()} bits")
|
|
205
|
+
print(f"verify = {verify(pi, target)} (zero-error)")
|
reference/__init__.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# SPDX-License-Identifier: CC-BY-4.0
|
|
2
|
+
# SpiralThink — Reference package (L0)
|
|
3
|
+
# (c) 2026 pfreig-art. See LICENSES/CC-BY-4.0.txt.
|
|
4
|
+
"""
|
|
5
|
+
L0 reference package.
|
|
6
|
+
|
|
7
|
+
Re-exports the canonical SpiralProgram contract and the reference decoder so that
|
|
8
|
+
downstream packages (encoder/, demo/, tests/, spiralcore/) can simply do:
|
|
9
|
+
|
|
10
|
+
from reference import SpiralProgram, decode, verify, ratio, helix
|
|
11
|
+
|
|
12
|
+
This module is part of the open-science tier (CC-BY-4.0). Treat it as the
|
|
13
|
+
authoritative specification of the SpiralProgram schema. Any divergence between
|
|
14
|
+
an implementation and this module is, by definition, a bug in the implementation.
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from .decoder import (
|
|
19
|
+
DecodeMap,
|
|
20
|
+
Scalar,
|
|
21
|
+
SpiralProgram,
|
|
22
|
+
decode,
|
|
23
|
+
helix,
|
|
24
|
+
ratio,
|
|
25
|
+
verify,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"DecodeMap",
|
|
30
|
+
"Scalar",
|
|
31
|
+
"SpiralProgram",
|
|
32
|
+
"decode",
|
|
33
|
+
"helix",
|
|
34
|
+
"ratio",
|
|
35
|
+
"verify",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
__version__ = "0.9.0"
|
reference/decoder.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# SPDX-License-Identifier: CC-BY-4.0
|
|
2
|
+
# SpiralThink — Reference Decoder (L0)
|
|
3
|
+
# (c) 2026 pfreig-art. Universal geometric decoder over a parametric helix.
|
|
4
|
+
"""
|
|
5
|
+
Reference decoder for SpiralThink programs.
|
|
6
|
+
|
|
7
|
+
A SpiralThink program pi = (r, theta, z, phi) is a 4-tuple of callables
|
|
8
|
+
that parametrize a helical trajectory H(t) and a deterministic decoding
|
|
9
|
+
map phi: H -> Sigma^*. Reconstruction is bit-exact (zero-error).
|
|
10
|
+
|
|
11
|
+
This module is the *minimal canonical* decoder — prioritizes clarity over
|
|
12
|
+
speed. The L1 baseline encoder and the L2 SpiralCore industrial encoder
|
|
13
|
+
live in sibling packages.
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
import math
|
|
19
|
+
from collections.abc import Callable
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
|
|
22
|
+
Scalar = Callable[[float], float]
|
|
23
|
+
DecodeMap = Callable[[float, float, float], int] # (x, y, z) -> symbol
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class SpiralProgram:
|
|
28
|
+
"""A SpiralThink program pi = (r, theta, z, phi)."""
|
|
29
|
+
r: Scalar
|
|
30
|
+
theta: Scalar
|
|
31
|
+
z: Scalar
|
|
32
|
+
phi: DecodeMap
|
|
33
|
+
n: int # chain length
|
|
34
|
+
dt: float = 1.0 # sampling step on the helix parameter t
|
|
35
|
+
residual: bytes = b"" # optional residual patch delta (zero-error guarantee)
|
|
36
|
+
|
|
37
|
+
def bitlen(self) -> int:
|
|
38
|
+
"""Approximate program length |pi| in bits (parameters + residual)."""
|
|
39
|
+
# Each scalar function is assumed encoded with ~32 bits of parameters.
|
|
40
|
+
# Real encoders compute Kolmogorov-style minimal description.
|
|
41
|
+
param_bits = 4 * 32
|
|
42
|
+
return param_bits + 8 * len(self.residual)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def helix(pi: SpiralProgram, t: float) -> tuple[float, float, float]:
|
|
46
|
+
"""Evaluate the helical trajectory H(t) = (r cosθ, r sinθ, z)."""
|
|
47
|
+
r, th, z = pi.r(t), pi.theta(t), pi.z(t)
|
|
48
|
+
return (r * math.cos(th), r * math.sin(th), z)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def decode(pi: SpiralProgram) -> bytes:
|
|
52
|
+
"""Run U(pi) and return the reconstructed byte stream x.
|
|
53
|
+
|
|
54
|
+
Zero-error guarantee: if pi.residual is non-empty, it is XOR-applied
|
|
55
|
+
as a final correction pass. By construction len(residual) << n.
|
|
56
|
+
"""
|
|
57
|
+
out = bytearray(pi.n)
|
|
58
|
+
for i in range(pi.n):
|
|
59
|
+
x, y, z = helix(pi, i * pi.dt)
|
|
60
|
+
out[i] = pi.phi(x, y, z) & 0xFF
|
|
61
|
+
if pi.residual:
|
|
62
|
+
for i, b in enumerate(pi.residual):
|
|
63
|
+
out[i] ^= b
|
|
64
|
+
return bytes(out)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def verify(pi: SpiralProgram, x: bytes) -> bool:
|
|
68
|
+
"""Cryptographic equality check between U(pi) and the target x."""
|
|
69
|
+
return hashlib.sha256(decode(pi)).digest() == hashlib.sha256(x).digest()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def ratio(pi: SpiralProgram) -> float:
|
|
73
|
+
"""Effective compression ratio rho = |x| / |pi|."""
|
|
74
|
+
return (8 * pi.n) / max(pi.bitlen(), 1)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# --------------------------------------------------------------------------
|
|
78
|
+
# Demo: a periodic byte stream reconstructed from a 4-parameter helix.
|
|
79
|
+
# --------------------------------------------------------------------------
|
|
80
|
+
if __name__ == "__main__":
|
|
81
|
+
pi = SpiralProgram(
|
|
82
|
+
r=lambda t: 1.0,
|
|
83
|
+
theta=lambda t: 0.1 * t,
|
|
84
|
+
z=lambda t: 0.01 * t,
|
|
85
|
+
phi=lambda x, y, z: int((x + 1.0) * 127) & 0xFF,
|
|
86
|
+
n=10_000,
|
|
87
|
+
)
|
|
88
|
+
x = decode(pi)
|
|
89
|
+
print(f"|x| = {len(x)} bytes")
|
|
90
|
+
print(f"|pi| = {pi.bitlen()} bits")
|
|
91
|
+
print(f"rho = {ratio(pi):.1f}x")
|
|
92
|
+
print(f"verify(pi, x) = {verify(pi, x)}")
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# SpiralThink — .spiral on-disk artifact format (L1)
|
|
3
|
+
# (c) 2026 pfreig-art. Apache License 2.0.
|
|
4
|
+
"""
|
|
5
|
+
`spiral_format` — the on-disk `.spiral` artifact container.
|
|
6
|
+
|
|
7
|
+
This package is a thin, tier-L1 wrapper that turns the in-memory SpiralProgram
|
|
8
|
+
(from `reference/`) plus the L1 baseline encoder (from `encoder/`) into a
|
|
9
|
+
self-describing file format with the canonical extension `.spiral`.
|
|
10
|
+
|
|
11
|
+
Design constraints (mirrored by the contract tests in `tests/`):
|
|
12
|
+
|
|
13
|
+
* The format is self-describing: magic bytes + format version + JSON manifest.
|
|
14
|
+
* Integrity is enforced by an explicit SHA-256 of the original payload stored
|
|
15
|
+
in the artifact trailer. Bit-flip / truncation / wrong-magic / wrong-version
|
|
16
|
+
MUST cause `decompress`/`inspect` to fail closed, never to silently succeed.
|
|
17
|
+
* Roundtrip is bit-exact: for every input file `x`, `decompress(compress(x))`
|
|
18
|
+
reproduces `x` byte-for-byte, matching the zero-error invariant defined in
|
|
19
|
+
`reference/decoder.py`.
|
|
20
|
+
* The file extension is `.spiral`.
|
|
21
|
+
|
|
22
|
+
PASS 1 ships the public API surface only — implementations raise
|
|
23
|
+
`NotImplementedError` and the contract tests gate further work (PASS 2).
|
|
24
|
+
"""
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
from .constants import MAGIC, MAGIC_LEN, MIME_TYPE, SUPPORTED_VERSIONS, VERSION
|
|
28
|
+
from .errors import (
|
|
29
|
+
BadMagicError,
|
|
30
|
+
IntegrityError,
|
|
31
|
+
SpiralFormatError,
|
|
32
|
+
TruncatedArtifactError,
|
|
33
|
+
UnsupportedVersionError,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"MAGIC",
|
|
38
|
+
"MAGIC_LEN",
|
|
39
|
+
"MIME_TYPE",
|
|
40
|
+
"SUPPORTED_VERSIONS",
|
|
41
|
+
"VERSION",
|
|
42
|
+
"BadMagicError",
|
|
43
|
+
"IntegrityError",
|
|
44
|
+
"SpiralFormatError",
|
|
45
|
+
"TruncatedArtifactError",
|
|
46
|
+
"UnsupportedVersionError",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
__version__ = "0.1.0"
|
spiral_format/codec.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# SpiralThink — SpiralProgram <-> bytes codec (L1)
|
|
3
|
+
# (c) 2026 pfreig-art. Apache License 2.0.
|
|
4
|
+
"""
|
|
5
|
+
Small-struct codec for the L1 baseline helix-4 SpiralProgram.
|
|
6
|
+
|
|
7
|
+
Wire layout (big-endian):
|
|
8
|
+
|
|
9
|
+
+--------+--------+--------+--------+--------+--------+--------+--------+
|
|
10
|
+
| n (uint64, chain length) |
|
|
11
|
+
+-----------------------------------------------------------------------+
|
|
12
|
+
| a (float64, r-parameter) |
|
|
13
|
+
+-----------------------------------------------------------------------+
|
|
14
|
+
| b (float64, theta-parameter) |
|
|
15
|
+
+-----------------------------------------------------------------------+
|
|
16
|
+
| c (float64, z-parameter) |
|
|
17
|
+
+-----------------------------------------------------------------------+
|
|
18
|
+
| d (float64, phi-scale) |
|
|
19
|
+
+-----------------------------------------------------------------------+
|
|
20
|
+
| residual_len (uint32) |
|
|
21
|
+
+-----------------------------------------------------------------------+
|
|
22
|
+
| residual_len bytes of XOR residual |
|
|
23
|
+
+-----------------------------------------------------------------------+
|
|
24
|
+
|
|
25
|
+
Fixed prefix: 8 + 4*8 + 4 = 44 bytes. Total = 44 + residual_len.
|
|
26
|
+
|
|
27
|
+
The encoding tag stored in the manifest is ``PAYLOAD_ENCODING``. Any future
|
|
28
|
+
encoder family (e.g. a 6-parameter helix) MUST use a different tag so older
|
|
29
|
+
readers fail with ManifestError instead of silently decoding into the wrong
|
|
30
|
+
family.
|
|
31
|
+
|
|
32
|
+
This codec is deliberately closed-form: no pickle, no eval, no dynamic
|
|
33
|
+
imports. Auditors can verify it by inspection.
|
|
34
|
+
"""
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import struct
|
|
38
|
+
from typing import Final
|
|
39
|
+
|
|
40
|
+
from encoder import Params # type: ignore[import-not-found]
|
|
41
|
+
from reference import SpiralProgram # type: ignore[import-not-found]
|
|
42
|
+
|
|
43
|
+
from .errors import SpiralFormatError
|
|
44
|
+
|
|
45
|
+
#: Manifest tag identifying this payload codec.
|
|
46
|
+
PAYLOAD_ENCODING: Final[str] = "spiralprogram-helix4-v1"
|
|
47
|
+
|
|
48
|
+
#: struct format: n (u64), a, b, c, d (f64 x 4), residual_len (u32). All big-endian.
|
|
49
|
+
_HEADER_FMT = "!QddddI"
|
|
50
|
+
_HEADER_LEN = struct.calcsize(_HEADER_FMT) # 44
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def encode_program(pi: SpiralProgram, params: Params) -> bytes:
|
|
54
|
+
"""Serialize a helix-4 SpiralProgram to bytes.
|
|
55
|
+
|
|
56
|
+
``params`` carries the raw (a, b, c, d) used by the encoder; ``pi`` only
|
|
57
|
+
carries the residual (and re-derivable n, dt). We never serialize lambdas.
|
|
58
|
+
"""
|
|
59
|
+
if not isinstance(params, Params):
|
|
60
|
+
raise SpiralFormatError(
|
|
61
|
+
f"params must be encoder.Params, got {type(params).__name__}"
|
|
62
|
+
)
|
|
63
|
+
if pi.n < 0:
|
|
64
|
+
raise SpiralFormatError(f"n must be >= 0, got {pi.n}")
|
|
65
|
+
if pi.dt != 1.0:
|
|
66
|
+
# Current schema fixes dt=1.0. Keeping the field for forward
|
|
67
|
+
# compatibility but refusing non-default values prevents a silent
|
|
68
|
+
# roundtrip mismatch.
|
|
69
|
+
raise SpiralFormatError(
|
|
70
|
+
f"helix4-v1 codec requires dt=1.0, got dt={pi.dt}"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
residual = bytes(pi.residual)
|
|
74
|
+
if len(residual) > 0xFFFFFFFF: # pragma: no cover
|
|
75
|
+
# Defensive: residual_len is a uint32 field. Hitting this requires a
|
|
76
|
+
# ~4 GiB residual, which the L1 baseline encoder will never produce
|
|
77
|
+
# (it bounds residual size at len(x) <= input size, and inputs that
|
|
78
|
+
# large blow up before reaching this codec). Untested by design.
|
|
79
|
+
raise SpiralFormatError(
|
|
80
|
+
f"residual too large for uint32 length field: {len(residual)}"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
header = struct.pack(
|
|
84
|
+
_HEADER_FMT,
|
|
85
|
+
pi.n,
|
|
86
|
+
float(params.a),
|
|
87
|
+
float(params.b),
|
|
88
|
+
float(params.c),
|
|
89
|
+
float(params.d),
|
|
90
|
+
len(residual),
|
|
91
|
+
)
|
|
92
|
+
return header + residual
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def decode_program(blob: bytes) -> SpiralProgram:
|
|
96
|
+
"""Reconstruct the SpiralProgram from a helix-4 byte blob."""
|
|
97
|
+
if not isinstance(blob, (bytes, bytearray)):
|
|
98
|
+
raise SpiralFormatError(
|
|
99
|
+
f"blob must be bytes, got {type(blob).__name__}"
|
|
100
|
+
)
|
|
101
|
+
if len(blob) < _HEADER_LEN:
|
|
102
|
+
raise SpiralFormatError(
|
|
103
|
+
f"helix4-v1 payload truncated: need at least {_HEADER_LEN} bytes, "
|
|
104
|
+
f"got {len(blob)}"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
n, a, b, c, d, residual_len = struct.unpack(_HEADER_FMT, blob[:_HEADER_LEN])
|
|
108
|
+
expected = _HEADER_LEN + residual_len
|
|
109
|
+
if len(blob) != expected:
|
|
110
|
+
raise SpiralFormatError(
|
|
111
|
+
f"helix4-v1 payload length mismatch: header declares "
|
|
112
|
+
f"{expected} bytes, blob has {len(blob)}"
|
|
113
|
+
)
|
|
114
|
+
residual = bytes(blob[_HEADER_LEN:])
|
|
115
|
+
|
|
116
|
+
# Rebuild the same lambdas the L1 baseline encoder produces. This MUST stay
|
|
117
|
+
# in lock-step with encoder.baseline.Params.to_program.
|
|
118
|
+
return SpiralProgram(
|
|
119
|
+
r=lambda t, a=a: a,
|
|
120
|
+
theta=lambda t, b=b: b * t,
|
|
121
|
+
z=lambda t, c=c: c * t,
|
|
122
|
+
phi=lambda x, y, z, a=a, d=d: int((x + a) * d) & 0xFF,
|
|
123
|
+
n=int(n),
|
|
124
|
+
dt=1.0,
|
|
125
|
+
residual=residual,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
__all__ = [
|
|
130
|
+
"PAYLOAD_ENCODING",
|
|
131
|
+
"decode_program",
|
|
132
|
+
"encode_program",
|
|
133
|
+
]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# SpiralThink — .spiral format constants (L1)
|
|
3
|
+
# (c) 2026 pfreig-art. Apache License 2.0.
|
|
4
|
+
"""
|
|
5
|
+
Constants that define the on-disk `.spiral` artifact.
|
|
6
|
+
|
|
7
|
+
Stability policy
|
|
8
|
+
----------------
|
|
9
|
+
The values in this module are **the** canonical format identifiers. Changing
|
|
10
|
+
``MAGIC`` is a hard incompatibility and is never allowed. Bumping ``VERSION``
|
|
11
|
+
is allowed only together with an entry in ``SUPPORTED_VERSIONS`` and a
|
|
12
|
+
documented migration note in ``docs/spiral_format.md`` (added in a later pass).
|
|
13
|
+
|
|
14
|
+
Wire layout (big-endian, see ``docs/spiral_format.md`` once written):
|
|
15
|
+
|
|
16
|
+
+--------+--------+--------+--------+--------+--------+
|
|
17
|
+
| 'S' | 'P' | 'R' | 'L' | VERSION (u16) |
|
|
18
|
+
+--------+--------+--------+--------+--------+--------+
|
|
19
|
+
| manifest length (uint32, big-endian) |
|
|
20
|
+
+--------+--------+--------+--------+--------+--------+
|
|
21
|
+
| manifest bytes (UTF-8 JSON) |
|
|
22
|
+
+-----------------------------------------------------+
|
|
23
|
+
| payload length (uint64, big-endian) |
|
|
24
|
+
+-----------------------------------------------------+
|
|
25
|
+
| payload bytes (opaque) |
|
|
26
|
+
+-----------------------------------------------------+
|
|
27
|
+
| SHA-256 of original input (32 bytes, trailer) |
|
|
28
|
+
+-----------------------------------------------------+
|
|
29
|
+
|
|
30
|
+
The trailer SHA-256 is the bit-exact integrity field. It is computed over the
|
|
31
|
+
**uncompressed input**, not over the artifact, and is the value that
|
|
32
|
+
``verify_roundtrip`` compares against the decoded output.
|
|
33
|
+
"""
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
from typing import Final
|
|
37
|
+
|
|
38
|
+
MAGIC: Final[bytes] = b"SPRL"
|
|
39
|
+
MAGIC_LEN: Final[int] = len(MAGIC)
|
|
40
|
+
|
|
41
|
+
#: Current format version written by ``pack_artifact``.
|
|
42
|
+
VERSION: Final[int] = 1
|
|
43
|
+
|
|
44
|
+
#: Format versions ``unpack_artifact`` knows how to read. Forward-compatible
|
|
45
|
+
#: readers may add older versions here; never remove a version once shipped.
|
|
46
|
+
SUPPORTED_VERSIONS: Final[frozenset[int]] = frozenset({1})
|
|
47
|
+
|
|
48
|
+
#: Suggested MIME type for tooling (mailers, browsers). Not yet IANA-registered.
|
|
49
|
+
MIME_TYPE: Final[str] = "application/vnd.spiralthink.spiral"
|
|
50
|
+
|
|
51
|
+
#: Canonical filename extension, including the leading dot.
|
|
52
|
+
FILE_EXTENSION: Final[str] = ".spiral"
|
|
53
|
+
|
|
54
|
+
#: SHA-256 digest size in bytes.
|
|
55
|
+
TRAILER_HASH_LEN: Final[int] = 32
|