spiralthink-core 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiralthink_core-0.9.0/PKG-INFO +128 -0
- spiralthink_core-0.9.0/README.md +103 -0
- spiralthink_core-0.9.0/encoder/__init__.py +39 -0
- spiralthink_core-0.9.0/encoder/baseline.py +205 -0
- spiralthink_core-0.9.0/pyproject.toml +151 -0
- spiralthink_core-0.9.0/reference/__init__.py +38 -0
- spiralthink_core-0.9.0/reference/decoder.py +92 -0
- spiralthink_core-0.9.0/setup.cfg +4 -0
- spiralthink_core-0.9.0/spiral_format/__init__.py +49 -0
- spiralthink_core-0.9.0/spiral_format/codec.py +133 -0
- spiralthink_core-0.9.0/spiral_format/constants.py +55 -0
- spiralthink_core-0.9.0/spiral_format/container.py +354 -0
- spiralthink_core-0.9.0/spiral_format/errors.py +43 -0
- spiralthink_core-0.9.0/spiral_format/io.py +325 -0
- spiralthink_core-0.9.0/spiral_format/report.py +132 -0
- spiralthink_core-0.9.0/spiralthink_core.egg-info/PKG-INFO +128 -0
- spiralthink_core-0.9.0/spiralthink_core.egg-info/SOURCES.txt +29 -0
- spiralthink_core-0.9.0/spiralthink_core.egg-info/dependency_links.txt +1 -0
- spiralthink_core-0.9.0/spiralthink_core.egg-info/requires.txt +7 -0
- spiralthink_core-0.9.0/spiralthink_core.egg-info/top_level.txt +3 -0
- spiralthink_core-0.9.0/tests/test_atomic_write.py +144 -0
- spiralthink_core-0.9.0/tests/test_contract.py +106 -0
- spiralthink_core-0.9.0/tests/test_fault_injection.py +646 -0
- spiralthink_core-0.9.0/tests/test_file_roundtrip.py +131 -0
- spiralthink_core-0.9.0/tests/test_integrity.py +138 -0
- spiralthink_core-0.9.0/tests/test_overhead.py +118 -0
- spiralthink_core-0.9.0/tests/test_report_schema.py +137 -0
- spiralthink_core-0.9.0/tests/test_roundtrip.py +91 -0
- spiralthink_core-0.9.0/tests/test_scaling.py +214 -0
- spiralthink_core-0.9.0/tests/test_spiral_format.py +230 -0
- spiralthink_core-0.9.0/tests/test_spiral_lab.py +395 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: spiralthink-core
|
|
3
|
+
Version: 0.9.0
|
|
4
|
+
Summary: Universal geometric compression with bit-exact reconstruction. Reference decoder (L0) and baseline encoder (L1).
|
|
5
|
+
Author: pfreig-art
|
|
6
|
+
License: Apache-2.0 AND CC-BY-4.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/pfreig-art/spiralthink-core
|
|
8
|
+
Project-URL: Issues, https://github.com/pfreig-art/spiralthink-core/issues
|
|
9
|
+
Keywords: compression,kolmogorov,helix,spiral,bit-exact,reconstruction
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
22
|
+
Requires-Dist: pytest-benchmark>=4.0; extra == "dev"
|
|
23
|
+
Requires-Dist: ruff>=0.15; extra == "dev"
|
|
24
|
+
Requires-Dist: mypy>=1.13; extra == "dev"
|
|
25
|
+
|
|
26
|
+
# SpiralThink — Core
|
|
27
|
+
|
|
28
|
+
> **Universal geometric machine for sub-Kolmogorov effective compression and zero-error reconstruction.**
|
|
29
|
+
|
|
30
|
+
[]() []() []()
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## TL;DR
|
|
35
|
+
|
|
36
|
+
SpiralThink represents arbitrary data as trajectories on a parametric helical manifold $\mathcal{H}(r,\theta,z)$ generated by a short program $\pi$. We target the **algorithmic** lower bound $K(x)$ (Kolmogorov), not Shannon's $H(X)$. Reconstruction is **bit-exact** (zero-error) by design. Effective ratio $\rho = |x|/|\pi|$ diverges with chain length $n$.
|
|
37
|
+
|
|
38
|
+
| n | Raw bits | \|π\| | ρ |
|
|
39
|
+
|---|---|---|---|
|
|
40
|
+
| 10³ | 8 000 | 96 | 83× |
|
|
41
|
+
| 10⁶ | 8·10⁶ | 112 | 7.1·10⁴× |
|
|
42
|
+
| 10⁹ | 8·10⁹ | 128 | 6.25·10⁷× |
|
|
43
|
+
| 10¹² | 8·10¹² | 144 | 5.5·10¹⁰× |
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Repository layout
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
spiralthink-core/
|
|
51
|
+
├── paper/ # L0 · CC-BY 4.0 · preprint LaTeX + PDF
|
|
52
|
+
├── reference/ # L0 · CC-BY 4.0 · Python reference decoder
|
|
53
|
+
├── encoder/ # L1 · Apache-2.0 · baseline gradient encoder
|
|
54
|
+
├── spiralcore/ # L2 · PROPRIETARY · industrial encoder + GPU kernels
|
|
55
|
+
├── demo/ # shock-demo notebooks (numerical scaling §4)
|
|
56
|
+
├── docs/ # deployment manual
|
|
57
|
+
└── LICENSES/ # CC-BY-4.0, Apache-2.0, SpiralCore-EULA
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Mixed licensing model
|
|
63
|
+
|
|
64
|
+
| Layer | Path | License | Audience |
|
|
65
|
+
|---|---|---|---|
|
|
66
|
+
| **L0** Theory + reference decoder | `paper/`, `reference/` | CC-BY 4.0 | academia, open community |
|
|
67
|
+
| **L1** Baseline encoder | `encoder/` | Apache-2.0 | OSS contributors, integrators |
|
|
68
|
+
| **L2** Industrial encoder *SpiralCore™* | `spiralcore/` | Proprietary EULA | enterprise / unicorn moat |
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Core idea
|
|
73
|
+
|
|
74
|
+
$$\pi^\star = \arg\min_{\pi\,:\,U(\pi)=x} |\pi|, \qquad |\pi^\star| \approx K(x)$$
|
|
75
|
+
|
|
76
|
+
**Theorem 1 (Compression–Computation Tradeoff).**
|
|
77
|
+
|
|
78
|
+
$$|\pi| \cdot \log T_\pi \;\geq\; K(x) - O(1)$$
|
|
79
|
+
|
|
80
|
+
SpiralThink trades *space* for *deterministic recomputation*, never for accuracy.
|
|
81
|
+
|
|
82
|
+
## Zero-error architecture
|
|
83
|
+
|
|
84
|
+
```
|
|
85
|
+
Encoder ──π──▶ Decoder U(π) = x
|
|
86
|
+
▲ │
|
|
87
|
+
└── hash(x) == hash(U(π)) ──┘
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
If hash mismatch → encoder appends residual patch $\delta$; total $|\pi|+|\delta| \ll |x|$ for structured data.
|
|
91
|
+
|
|
92
|
+
## Helical spring analogy
|
|
93
|
+
|
|
94
|
+
$U = \tfrac12 k\,\Delta x^2$. SpiralThink stores informational tension in $\pi$; uncoiling regenerates the chain — like a spring releases stored length without memorizing each coil.
|
|
95
|
+
|
|
96
|
+
## Universal passive storage
|
|
97
|
+
|
|
98
|
+
Substrate-agnostic: DNA, optical phase plates, magnetic domains, silicon. Exabyte archives → kilobyte inscriptions.
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Applications
|
|
103
|
+
|
|
104
|
+
1. LLM weights & KV-cache compression
|
|
105
|
+
2. Vector DB embeddings (RAM ↔ disk parity)
|
|
106
|
+
3. Cold archival (tape replacement)
|
|
107
|
+
4. Edge sub-MB foundation models
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Roadmap
|
|
112
|
+
|
|
113
|
+
- [x] Preprint v0.9 draft
|
|
114
|
+
- [x] Private repo bootstrap
|
|
115
|
+
- [ ] LaTeX compilation → arXiv
|
|
116
|
+
- [ ] Reference decoder (Python, NumPy)
|
|
117
|
+
- [ ] Baseline encoder (gradient search over $\pi$)
|
|
118
|
+
- [ ] Shock-demo notebook (§4 numerical examples)
|
|
119
|
+
- [ ] SpiralCore™ GPU kernel prototype
|
|
120
|
+
- [ ] Deployment manual v1
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Contact
|
|
125
|
+
|
|
126
|
+
Maintainer: **pfreig-art** · Palma / Maó, Illes Balears · 2026
|
|
127
|
+
|
|
128
|
+
*This repository is private. All rights reserved on L2 components. L0/L1 will be split into a public mirror at release time.*
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# SpiralThink — Core
|
|
2
|
+
|
|
3
|
+
> **Universal geometric machine for sub-Kolmogorov effective compression and zero-error reconstruction.**
|
|
4
|
+
|
|
5
|
+
[]() []() []()
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## TL;DR
|
|
10
|
+
|
|
11
|
+
SpiralThink represents arbitrary data as trajectories on a parametric helical manifold $\mathcal{H}(r,\theta,z)$ generated by a short program $\pi$. We target the **algorithmic** lower bound $K(x)$ (Kolmogorov), not Shannon's $H(X)$. Reconstruction is **bit-exact** (zero-error) by design. Effective ratio $\rho = |x|/|\pi|$ diverges with chain length $n$.
|
|
12
|
+
|
|
13
|
+
| n | Raw bits | \|π\| | ρ |
|
|
14
|
+
|---|---|---|---|
|
|
15
|
+
| 10³ | 8 000 | 96 | 83× |
|
|
16
|
+
| 10⁶ | 8·10⁶ | 112 | 7.1·10⁴× |
|
|
17
|
+
| 10⁹ | 8·10⁹ | 128 | 6.25·10⁷× |
|
|
18
|
+
| 10¹² | 8·10¹² | 144 | 5.5·10¹⁰× |
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Repository layout
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
spiralthink-core/
|
|
26
|
+
├── paper/ # L0 · CC-BY 4.0 · preprint LaTeX + PDF
|
|
27
|
+
├── reference/ # L0 · CC-BY 4.0 · Python reference decoder
|
|
28
|
+
├── encoder/ # L1 · Apache-2.0 · baseline gradient encoder
|
|
29
|
+
├── spiralcore/ # L2 · PROPRIETARY · industrial encoder + GPU kernels
|
|
30
|
+
├── demo/ # shock-demo notebooks (numerical scaling §4)
|
|
31
|
+
├── docs/ # deployment manual
|
|
32
|
+
└── LICENSES/ # CC-BY-4.0, Apache-2.0, SpiralCore-EULA
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Mixed licensing model
|
|
38
|
+
|
|
39
|
+
| Layer | Path | License | Audience |
|
|
40
|
+
|---|---|---|---|
|
|
41
|
+
| **L0** Theory + reference decoder | `paper/`, `reference/` | CC-BY 4.0 | academia, open community |
|
|
42
|
+
| **L1** Baseline encoder | `encoder/` | Apache-2.0 | OSS contributors, integrators |
|
|
43
|
+
| **L2** Industrial encoder *SpiralCore™* | `spiralcore/` | Proprietary EULA | enterprise / unicorn moat |
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Core idea
|
|
48
|
+
|
|
49
|
+
$$\pi^\star = \arg\min_{\pi\,:\,U(\pi)=x} |\pi|, \qquad |\pi^\star| \approx K(x)$$
|
|
50
|
+
|
|
51
|
+
**Theorem 1 (Compression–Computation Tradeoff).**
|
|
52
|
+
|
|
53
|
+
$$|\pi| \cdot \log T_\pi \;\geq\; K(x) - O(1)$$
|
|
54
|
+
|
|
55
|
+
SpiralThink trades *space* for *deterministic recomputation*, never for accuracy.
|
|
56
|
+
|
|
57
|
+
## Zero-error architecture
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
Encoder ──π──▶ Decoder U(π) = x
|
|
61
|
+
▲ │
|
|
62
|
+
└── hash(x) == hash(U(π)) ──┘
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
If hash mismatch → encoder appends residual patch $\delta$; total $|\pi|+|\delta| \ll |x|$ for structured data.
|
|
66
|
+
|
|
67
|
+
## Helical spring analogy
|
|
68
|
+
|
|
69
|
+
$U = \tfrac12 k\,\Delta x^2$. SpiralThink stores informational tension in $\pi$; uncoiling regenerates the chain — like a spring releases stored length without memorizing each coil.
|
|
70
|
+
|
|
71
|
+
## Universal passive storage
|
|
72
|
+
|
|
73
|
+
Substrate-agnostic: DNA, optical phase plates, magnetic domains, silicon. Exabyte archives → kilobyte inscriptions.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Applications
|
|
78
|
+
|
|
79
|
+
1. LLM weights & KV-cache compression
|
|
80
|
+
2. Vector DB embeddings (RAM ↔ disk parity)
|
|
81
|
+
3. Cold archival (tape replacement)
|
|
82
|
+
4. Edge sub-MB foundation models
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Roadmap
|
|
87
|
+
|
|
88
|
+
- [x] Preprint v0.9 draft
|
|
89
|
+
- [x] Private repo bootstrap
|
|
90
|
+
- [ ] LaTeX compilation → arXiv
|
|
91
|
+
- [ ] Reference decoder (Python, NumPy)
|
|
92
|
+
- [ ] Baseline encoder (gradient search over $\pi$)
|
|
93
|
+
- [ ] Shock-demo notebook (§4 numerical examples)
|
|
94
|
+
- [ ] SpiralCore™ GPU kernel prototype
|
|
95
|
+
- [ ] Deployment manual v1
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## Contact
|
|
100
|
+
|
|
101
|
+
Maintainer: **pfreig-art** · Palma / Maó, Illes Balears · 2026
|
|
102
|
+
|
|
103
|
+
*This repository is private. All rights reserved on L2 components. L0/L1 will be split into a public mirror at release time.*
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# SpiralThink — Baseline encoder package (L1)
|
|
3
|
+
# (c) 2026 pfreig-art. Apache License 2.0.
|
|
4
|
+
"""
|
|
5
|
+
L1 baseline encoder package.
|
|
6
|
+
|
|
7
|
+
Re-exports the baseline (CPU-only) encoder API so downstream code can do:
|
|
8
|
+
|
|
9
|
+
from encoder import Params, encode, coord_descent, hamming
|
|
10
|
+
|
|
11
|
+
The baseline encoder searches a 4-parameter helical SpiralProgram by random
|
|
12
|
+
restart + coordinate descent on Hamming distance, then records the XOR residual
|
|
13
|
+
to guarantee bit-exact (zero-error) reconstruction.
|
|
14
|
+
|
|
15
|
+
This package is deliberately pure-Python + stdlib. GPU experiments live under
|
|
16
|
+
`encoder.experimental` (not yet present); the industrial encoder lives under
|
|
17
|
+
the proprietary L2 (`spiralcore/`).
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from .baseline import (
|
|
22
|
+
Params,
|
|
23
|
+
coord_descent,
|
|
24
|
+
encode,
|
|
25
|
+
encode_from_params,
|
|
26
|
+
encode_with_params,
|
|
27
|
+
hamming,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"Params",
|
|
32
|
+
"coord_descent",
|
|
33
|
+
"encode",
|
|
34
|
+
"encode_from_params",
|
|
35
|
+
"encode_with_params",
|
|
36
|
+
"hamming",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
__version__ = "0.9.0"
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# SpiralThink — Baseline Encoder (L1)
|
|
3
|
+
# (c) 2026 pfreig-art. Apache License 2.0.
|
|
4
|
+
"""
|
|
5
|
+
Baseline encoder: searches a SpiralProgram pi = (a, b, c, d) parametrizing
|
|
6
|
+
the canonical 4-parameter helix family
|
|
7
|
+
|
|
8
|
+
r(t) = a
|
|
9
|
+
theta(t) = b * t
|
|
10
|
+
z(t) = c * t
|
|
11
|
+
phi(x,y,z) = floor((x + a) * d) mod 256
|
|
12
|
+
|
|
13
|
+
over a target byte stream x. We optimize (a, b, c, d) by random restart +
|
|
14
|
+
coordinate descent on Hamming distance. After convergence, residual bytes
|
|
15
|
+
are recorded in pi.residual to guarantee zero-error reconstruction.
|
|
16
|
+
|
|
17
|
+
This is a *baseline* encoder — deliberately simple, CPU only, no learned
|
|
18
|
+
priors, no GPU. The L2 industrial encoder (SpiralCore™) lives in the
|
|
19
|
+
proprietary tree and uses neural priors + ASIC-friendly kernels.
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import math
|
|
24
|
+
import random
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
|
|
27
|
+
from reference import SpiralProgram, decode, verify
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class Params:
|
|
32
|
+
a: float
|
|
33
|
+
b: float
|
|
34
|
+
c: float
|
|
35
|
+
d: float
|
|
36
|
+
|
|
37
|
+
def to_program(self, n: int) -> SpiralProgram:
|
|
38
|
+
# Closure builders for the 4-parameter helix family. We use nested defs
|
|
39
|
+
# (not lambdas) so mypy --strict can infer signatures without resorting
|
|
40
|
+
# to # type: ignore. The captured-by-default-arg pattern is preserved
|
|
41
|
+
# to keep each closure independent of mutable outer state.
|
|
42
|
+
a, b, c, d = self.a, self.b, self.c, self.d
|
|
43
|
+
|
|
44
|
+
def r_fn(t: float, a: float = a) -> float:
|
|
45
|
+
return a
|
|
46
|
+
|
|
47
|
+
def theta_fn(t: float, b: float = b) -> float:
|
|
48
|
+
return b * t
|
|
49
|
+
|
|
50
|
+
def z_fn(t: float, c: float = c) -> float:
|
|
51
|
+
return c * t
|
|
52
|
+
|
|
53
|
+
def phi_fn(
|
|
54
|
+
x: float, y: float, z: float, a: float = a, d: float = d
|
|
55
|
+
) -> int:
|
|
56
|
+
return int((x + a) * d) & 0xFF
|
|
57
|
+
|
|
58
|
+
return SpiralProgram(
|
|
59
|
+
r=r_fn,
|
|
60
|
+
theta=theta_fn,
|
|
61
|
+
z=z_fn,
|
|
62
|
+
phi=phi_fn,
|
|
63
|
+
n=n,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def hamming(a: bytes, b: bytes) -> int:
|
|
68
|
+
# zip(strict=False) is intentional: the length-mismatch term `abs(len(a)-len(b))`
|
|
69
|
+
# below counts the unpaired tail bytes as full mismatches. Using strict=True
|
|
70
|
+
# would forbid the common case where decode() output is shorter than the target.
|
|
71
|
+
return sum(x ^ y != 0 for x, y in zip(a, b, strict=False)) + abs(len(a) - len(b))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def coord_descent(
|
|
75
|
+
x: bytes,
|
|
76
|
+
p: Params,
|
|
77
|
+
*,
|
|
78
|
+
steps: int = 200,
|
|
79
|
+
sigma: float = 0.05,
|
|
80
|
+
rng: random.Random | None = None,
|
|
81
|
+
) -> Params:
|
|
82
|
+
rng = rng or random.Random(0xC0FFEE)
|
|
83
|
+
best = p
|
|
84
|
+
best_err = hamming(decode(best.to_program(len(x))), x)
|
|
85
|
+
for _ in range(steps):
|
|
86
|
+
cand = Params(
|
|
87
|
+
a=best.a + rng.gauss(0, sigma),
|
|
88
|
+
b=best.b + rng.gauss(0, sigma * 0.1),
|
|
89
|
+
c=best.c + rng.gauss(0, sigma * 0.1),
|
|
90
|
+
d=max(1.0, best.d + rng.gauss(0, sigma)),
|
|
91
|
+
)
|
|
92
|
+
err = hamming(decode(cand.to_program(len(x))), x)
|
|
93
|
+
if err < best_err:
|
|
94
|
+
best, best_err = cand, err
|
|
95
|
+
if err == 0:
|
|
96
|
+
break
|
|
97
|
+
return best
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _finalize_with_residual(
|
|
101
|
+
x: bytes, params: Params, label: str
|
|
102
|
+
) -> SpiralProgram:
|
|
103
|
+
"""Common finalization: build pi from params, compute residual, optimize.
|
|
104
|
+
|
|
105
|
+
If the helix-only decode already equals ``x`` the residual is all zeros
|
|
106
|
+
and we replace it with ``b""``. The reference decoder skips the residual
|
|
107
|
+
pass entirely on empty bytes (see ``reference.decode``), so this is a
|
|
108
|
+
pure semantic-preserving optimization that lets ``pi.bitlen()`` stay
|
|
109
|
+
constant in ``len(x)`` for inputs the helix family fits exactly. This
|
|
110
|
+
is the regime in which the algorithmic ratio ``rho_pi`` grows with n.
|
|
111
|
+
"""
|
|
112
|
+
pi_no_residual = params.to_program(len(x))
|
|
113
|
+
y = decode(pi_no_residual)
|
|
114
|
+
# strict=True validates the invariant that the helix-only decode produces
|
|
115
|
+
# exactly len(x) bytes for an n=len(x) SpiralProgram. A length mismatch here
|
|
116
|
+
# would be a reference-decoder bug; fail loudly rather than silently truncate.
|
|
117
|
+
raw_residual = bytes(a ^ b for a, b in zip(x, y, strict=True))
|
|
118
|
+
residual = raw_residual if any(raw_residual) else b""
|
|
119
|
+
pi = SpiralProgram(
|
|
120
|
+
r=pi_no_residual.r, theta=pi_no_residual.theta,
|
|
121
|
+
z=pi_no_residual.z, phi=pi_no_residual.phi,
|
|
122
|
+
n=pi_no_residual.n, residual=residual,
|
|
123
|
+
)
|
|
124
|
+
assert verify(pi, x), f"{label} encoder failed zero-error invariant"
|
|
125
|
+
return pi
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def encode_from_params(
|
|
129
|
+
x: bytes,
|
|
130
|
+
params: Params,
|
|
131
|
+
) -> tuple[SpiralProgram, Params]:
|
|
132
|
+
"""Oracle encode: skip the search and use the caller-supplied ``params``.
|
|
133
|
+
|
|
134
|
+
This is useful when the caller already knows (or can cheaply compute) a
|
|
135
|
+
near-optimal helix fit — e.g. for synthetic structured inputs used in
|
|
136
|
+
scaling benchmarks, or for an offline pre-fit pipeline that hands the
|
|
137
|
+
fitted params to the L1 encoder for finalization.
|
|
138
|
+
|
|
139
|
+
The XOR residual is still computed and applied, so the zero-error
|
|
140
|
+
invariant holds regardless of how good ``params`` actually are. With an
|
|
141
|
+
exact fit the residual is ``b""`` and ``pi.bitlen()`` is constant in
|
|
142
|
+
``len(x)`` — this is the regime where the algorithmic ratio
|
|
143
|
+
``rho_pi = 8 * n / pi.bitlen()`` diverges with n.
|
|
144
|
+
"""
|
|
145
|
+
pi = _finalize_with_residual(x, params, label="oracle")
|
|
146
|
+
return pi, params
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def encode_with_params(
|
|
150
|
+
x: bytes,
|
|
151
|
+
*,
|
|
152
|
+
restarts: int = 8,
|
|
153
|
+
steps_per_restart: int = 200,
|
|
154
|
+
seed: int = 0xC0FFEE,
|
|
155
|
+
) -> tuple[SpiralProgram, Params]:
|
|
156
|
+
"""Same as ``encode`` but also returns the underlying 4-parameter ``Params``.
|
|
157
|
+
|
|
158
|
+
The container layer (``spiral_format``) needs the raw ``(a, b, c, d)`` to
|
|
159
|
+
serialize the program without pickling closures. The ``SpiralProgram``
|
|
160
|
+
return value carries the residual; the ``Params`` value carries the helix
|
|
161
|
+
family parameters.
|
|
162
|
+
"""
|
|
163
|
+
rng = random.Random(seed)
|
|
164
|
+
best: Params | None = None
|
|
165
|
+
best_err = math.inf
|
|
166
|
+
for _ in range(restarts):
|
|
167
|
+
p0 = Params(
|
|
168
|
+
a=rng.uniform(0.5, 1.5),
|
|
169
|
+
b=rng.uniform(0.05, 0.2),
|
|
170
|
+
c=rng.uniform(0.005, 0.02),
|
|
171
|
+
d=rng.uniform(64, 192),
|
|
172
|
+
)
|
|
173
|
+
p = coord_descent(x, p0, steps=steps_per_restart, rng=rng)
|
|
174
|
+
err = hamming(decode(p.to_program(len(x))), x)
|
|
175
|
+
if err < best_err:
|
|
176
|
+
best, best_err = p, err
|
|
177
|
+
if err == 0:
|
|
178
|
+
break
|
|
179
|
+
assert best is not None
|
|
180
|
+
pi = _finalize_with_residual(x, best, label="baseline")
|
|
181
|
+
return pi, best
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def encode(
|
|
185
|
+
x: bytes,
|
|
186
|
+
*,
|
|
187
|
+
restarts: int = 8,
|
|
188
|
+
steps_per_restart: int = 200,
|
|
189
|
+
seed: int = 0xC0FFEE,
|
|
190
|
+
) -> SpiralProgram:
|
|
191
|
+
"""Search for pi minimizing Hamming(U(pi), x); finalize with residual XOR."""
|
|
192
|
+
pi, _ = encode_with_params(
|
|
193
|
+
x,
|
|
194
|
+
restarts=restarts,
|
|
195
|
+
steps_per_restart=steps_per_restart,
|
|
196
|
+
seed=seed,
|
|
197
|
+
)
|
|
198
|
+
return pi
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
if __name__ == "__main__":
|
|
202
|
+
target = bytes((i * 7 + 13) & 0xFF for i in range(2048))
|
|
203
|
+
pi = encode(target, restarts=4, steps_per_restart=100)
|
|
204
|
+
print(f"|x| = {len(target)} B, |pi| ~= {pi.bitlen()} bits")
|
|
205
|
+
print(f"verify = {verify(pi, target)} (zero-error)")
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# SpiralThink Core — build & test configuration
|
|
3
|
+
# (c) 2026 pfreig-art.
|
|
4
|
+
#
|
|
5
|
+
# Per-tier source licensing (preserved by the wheel):
|
|
6
|
+
# reference/ CC-BY-4.0 — open science L0 (canonical decoder)
|
|
7
|
+
# encoder/ Apache-2.0 — open baseline L1 (coord-descent encoder)
|
|
8
|
+
# spiral_format/ Apache-2.0 — on-disk .spiral container (L1)
|
|
9
|
+
# spiralcore/ Proprietary EULA — L2, README-only in this public mirror
|
|
10
|
+
# (intentionally excluded from the wheel; ships via signed
|
|
11
|
+
# encrypted tarballs to licensees).
|
|
12
|
+
[build-system]
|
|
13
|
+
requires = ["setuptools>=68", "wheel"]
|
|
14
|
+
build-backend = "setuptools.build_meta"
|
|
15
|
+
|
|
16
|
+
[project]
|
|
17
|
+
name = "spiralthink-core"
|
|
18
|
+
version = "0.9.0"
|
|
19
|
+
description = "Universal geometric compression with bit-exact reconstruction. Reference decoder (L0) and baseline encoder (L1)."
|
|
20
|
+
readme = "README.md"
|
|
21
|
+
requires-python = ">=3.10"
|
|
22
|
+
license = { text = "Apache-2.0 AND CC-BY-4.0" }
|
|
23
|
+
authors = [{ name = "pfreig-art" }]
|
|
24
|
+
keywords = ["compression", "kolmogorov", "helix", "spiral", "bit-exact", "reconstruction"]
|
|
25
|
+
classifiers = [
|
|
26
|
+
"Development Status :: 4 - Beta",
|
|
27
|
+
"Intended Audience :: Science/Research",
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"Programming Language :: Python :: 3.10",
|
|
30
|
+
"Programming Language :: Python :: 3.11",
|
|
31
|
+
"Programming Language :: Python :: 3.12",
|
|
32
|
+
"Topic :: Scientific/Engineering",
|
|
33
|
+
]
|
|
34
|
+
dependencies = []
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=7.4",
|
|
39
|
+
"pytest-cov>=4.1",
|
|
40
|
+
"pytest-benchmark>=4.0",
|
|
41
|
+
"ruff>=0.15",
|
|
42
|
+
"mypy>=1.13",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.urls]
|
|
46
|
+
Homepage = "https://github.com/pfreig-art/spiralthink-core"
|
|
47
|
+
Issues = "https://github.com/pfreig-art/spiralthink-core/issues"
|
|
48
|
+
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
# Setuptools — explicit top-level packages.
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
[tool.setuptools]
|
|
53
|
+
packages = ["reference", "encoder", "spiral_format"]
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# pytest configuration
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
[tool.pytest.ini_options]
|
|
59
|
+
minversion = "7.4"
|
|
60
|
+
testpaths = ["tests"]
|
|
61
|
+
addopts = [
|
|
62
|
+
"-ra",
|
|
63
|
+
"--strict-markers",
|
|
64
|
+
"--strict-config",
|
|
65
|
+
]
|
|
66
|
+
xfail_strict = true
|
|
67
|
+
# Benchmarks live under bench/ and are invoked explicitly by the CI bench
|
|
68
|
+
# job (pytest bench/ --benchmark-only). Default test runs MUST stay fast,
|
|
69
|
+
# so the tests/ default invocation should NOT trigger the bench suite.
|
|
70
|
+
# We keep pytest-benchmark loaded (because some tests may want to assert
|
|
71
|
+
# its availability), but the default ``testpaths = ["tests"]`` already
|
|
72
|
+
# excludes bench/ from default collection.
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# coverage.py configuration
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
[tool.coverage.run]
|
|
78
|
+
branch = true
|
|
79
|
+
# spiral_lab is a research-lane package (NOT shipped in the wheel) but it is
|
|
80
|
+
# under test and contributes to the global coverage gate so its instrumentation
|
|
81
|
+
# stays honest. Its tests live alongside the product tests under tests/.
|
|
82
|
+
source = ["reference", "encoder", "spiral_format", "spiral_lab"]
|
|
83
|
+
|
|
84
|
+
[tool.coverage.report]
|
|
85
|
+
show_missing = true
|
|
86
|
+
skip_covered = false
|
|
87
|
+
exclude_lines = [
|
|
88
|
+
"pragma: no cover",
|
|
89
|
+
"if __name__ == .__main__.:",
|
|
90
|
+
"raise NotImplementedError",
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
# Ruff — linter + import sorter.
|
|
95
|
+
#
|
|
96
|
+
# Pragmatic rule profile (per maintainer directive 2026-05-25):
|
|
97
|
+
# E,F pycodestyle errors + pyflakes — baseline correctness gates.
|
|
98
|
+
# I isort — import order is part of the contract.
|
|
99
|
+
# B flake8-bugbear — catches common bug patterns (zip strict, etc.).
|
|
100
|
+
# UP pyupgrade — keep py3.10+ idioms current.
|
|
101
|
+
# SIM flake8-simplify — readability without overreach.
|
|
102
|
+
# RUF ruff-specific — includes RUF022 (sorted __all__).
|
|
103
|
+
#
|
|
104
|
+
# Deliberately NOT enabled: ALL, ANN, D, PT (D = docstring style; pep257-style
|
|
105
|
+
# docstrings are already mixed throughout. ANN duplicates mypy. PT is opinionated
|
|
106
|
+
# about pytest style and would churn ~30 tests for zero correctness gain).
|
|
107
|
+
# ---------------------------------------------------------------------------
|
|
108
|
+
[tool.ruff]
|
|
109
|
+
line-length = 100
|
|
110
|
+
target-version = "py310"
|
|
111
|
+
|
|
112
|
+
[tool.ruff.lint]
|
|
113
|
+
select = ["E", "F", "I", "B", "UP", "SIM", "RUF"]
|
|
114
|
+
# Per-file rule relaxations only when they survive code review.
|
|
115
|
+
|
|
116
|
+
[tool.ruff.lint.per-file-ignores]
|
|
117
|
+
# Tests intentionally use unused imports for fixtures / patching surfaces.
|
|
118
|
+
"tests/*" = ["F401"]
|
|
119
|
+
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
# Mypy — static type gate.
|
|
122
|
+
#
|
|
123
|
+
# Strict on reference/ (frozen contract, must be a fortress).
|
|
124
|
+
# Strict on encoder/ with --disallow-any-generics RELAXED for Params.to_program
|
|
125
|
+
# lambdas — those are Callable[[float], float] by construction and forcing a
|
|
126
|
+
# Protocol there is ceremony without bug-detection value (per maintainer
|
|
127
|
+
# directive 2026-05-25).
|
|
128
|
+
#
|
|
129
|
+
# spiral_format/ is not yet strict-gated; it will be hardened in a later PR
|
|
130
|
+
# once the contract surface stabilizes.
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
[tool.mypy]
|
|
133
|
+
python_version = "3.10"
|
|
134
|
+
files = ["reference", "encoder"]
|
|
135
|
+
show_error_codes = true
|
|
136
|
+
show_column_numbers = true
|
|
137
|
+
pretty = true
|
|
138
|
+
warn_unused_ignores = true
|
|
139
|
+
warn_redundant_casts = true
|
|
140
|
+
warn_unreachable = true
|
|
141
|
+
strict_equality = true
|
|
142
|
+
|
|
143
|
+
[[tool.mypy.overrides]]
|
|
144
|
+
module = "reference.*"
|
|
145
|
+
strict = true
|
|
146
|
+
|
|
147
|
+
[[tool.mypy.overrides]]
|
|
148
|
+
module = "encoder.*"
|
|
149
|
+
strict = true
|
|
150
|
+
disallow_any_generics = false # relaxed for SpiralProgram closure builders
|
|
151
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# SPDX-License-Identifier: CC-BY-4.0
|
|
2
|
+
# SpiralThink — Reference package (L0)
|
|
3
|
+
# (c) 2026 pfreig-art. See LICENSES/CC-BY-4.0.txt.
|
|
4
|
+
"""
|
|
5
|
+
L0 reference package.
|
|
6
|
+
|
|
7
|
+
Re-exports the canonical SpiralProgram contract and the reference decoder so that
|
|
8
|
+
downstream packages (encoder/, demo/, tests/, spiralcore/) can simply do:
|
|
9
|
+
|
|
10
|
+
from reference import SpiralProgram, decode, verify, ratio, helix
|
|
11
|
+
|
|
12
|
+
This module is part of the open-science tier (CC-BY-4.0). Treat it as the
|
|
13
|
+
authoritative specification of the SpiralProgram schema. Any divergence between
|
|
14
|
+
an implementation and this module is, by definition, a bug in the implementation.
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from .decoder import (
|
|
19
|
+
DecodeMap,
|
|
20
|
+
Scalar,
|
|
21
|
+
SpiralProgram,
|
|
22
|
+
decode,
|
|
23
|
+
helix,
|
|
24
|
+
ratio,
|
|
25
|
+
verify,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"DecodeMap",
|
|
30
|
+
"Scalar",
|
|
31
|
+
"SpiralProgram",
|
|
32
|
+
"decode",
|
|
33
|
+
"helix",
|
|
34
|
+
"ratio",
|
|
35
|
+
"verify",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
__version__ = "0.9.0"
|