PyPI - spectraldiag - Versions diffs - 0.1.0__tar.gz - Mend

spectraldiag 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

spectraldiag-0.1.0/CMakeLists.txt +18 -0
spectraldiag-0.1.0/LICENSE +21 -0
spectraldiag-0.1.0/MANIFEST.in +6 -0
spectraldiag-0.1.0/PKG-INFO +154 -0
spectraldiag-0.1.0/README.md +116 -0
spectraldiag-0.1.0/pyproject.toml +39 -0
spectraldiag-0.1.0/python/spectraldiag/__init__.py +148 -0
spectraldiag-0.1.0/python/spectraldiag/callbacks.py +269 -0
spectraldiag-0.1.0/python/spectraldiag/graph_lap.py +179 -0
spectraldiag-0.1.0/python/spectraldiag.egg-info/PKG-INFO +154 -0
spectraldiag-0.1.0/python/spectraldiag.egg-info/SOURCES.txt +20 -0
spectraldiag-0.1.0/python/spectraldiag.egg-info/dependency_links.txt +1 -0
spectraldiag-0.1.0/python/spectraldiag.egg-info/requires.txt +24 -0
spectraldiag-0.1.0/python/spectraldiag.egg-info/top_level.txt +1 -0
spectraldiag-0.1.0/setup.cfg +4 -0
spectraldiag-0.1.0/setup.py +22 -0
spectraldiag-0.1.0/src/pymodule.cpp +149 -0
spectraldiag-0.1.0/src/spectraldiag/alignment.hpp +195 -0
spectraldiag-0.1.0/src/spectraldiag/api.hpp +47 -0
spectraldiag-0.1.0/src/spectraldiag/diagnostics.hpp +338 -0
spectraldiag-0.1.0/src/spectraldiag/spectral.hpp +181 -0
spectraldiag-0.1.0/tests/test_core.py +171 -0

spectraldiag-0.1.0/CMakeLists.txt ADDED Viewed

@@ -0,0 +1,18 @@
+cmake_minimum_required(VERSION 3.18)
+project(spectraldiag LANGUAGES CXX)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+if(MSVC)
+    add_compile_options(/O2 /EHsc)
+else()
+    add_compile_options(-O3 -ffast-math)
+endif()
+find_package(Python COMPONENTS Interpreter Development REQUIRED)
+Python_add_library(_core MODULE src/pymodule.cpp)
+target_include_directories(_core PRIVATE src)
+install(TARGETS _core DESTINATION spectraldiag)

spectraldiag-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 spectraldiag contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

spectraldiag-0.1.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,6 @@
+include README.md
+include LICENSE
+recursive-include src *.cpp *.hpp *.h
+recursive-include python *.py
+include CMakeLists.txt
+include pyproject.toml

spectraldiag-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,154 @@
+Metadata-Version: 2.4
+Name: spectraldiag
+Version: 0.1.0
+Summary: Mathematically rigorous ML model diagnostics: stationarity verdict, effective dimension, barrier certificate
+License: MIT
+Project-URL: Homepage, https://github.com/drozdisme/spectraldiag
+Project-URL: Repository, https://github.com/drozdisme/spectraldiag
+Keywords: machine-learning,scaling-laws,diagnostics,benchmarking,spectral
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: C++
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Provides-Extra: torch
+Requires-Dist: torch>=2.0; extra == "torch"
+Provides-Extra: hf
+Requires-Dist: transformers>=4.30; extra == "hf"
+Requires-Dist: torch>=2.0; extra == "hf"
+Provides-Extra: lightning
+Requires-Dist: lightning>=2.0; extra == "lightning"
+Requires-Dist: torch>=2.0; extra == "lightning"
+Provides-Extra: data
+Requires-Dist: numpy>=1.24; extra == "data"
+Requires-Dist: scipy>=1.10; extra == "data"
+Requires-Dist: scikit-learn>=1.3; extra == "data"
+Provides-Extra: all
+Requires-Dist: torch>=2.0; extra == "all"
+Requires-Dist: transformers>=4.30; extra == "all"
+Requires-Dist: lightning>=2.0; extra == "all"
+Requires-Dist: numpy>=1.24; extra == "all"
+Requires-Dist: scipy>=1.10; extra == "all"
+Requires-Dist: scikit-learn>=1.3; extra == "all"
+Dynamic: license-file
+# spectraldiag
+**Mathematically rigorous ML model diagnostics.**
+Answers not "what happened" (that's W&B) but **"why it happened and what to do"** — with mathematical proof, not empirical guessing.
+```python
+pip install spectraldiag
+```
+---
+## Three core functions
+### `stationarity_verdict(ntk_eigs, target_coeffs)`
+Is your model's feature learning done, or still evolving?
+```python
+from spectraldiag import stationarity_verdict
+result = stationarity_verdict(ntk_eigs, target_coeffs)
+print(result.reason)
+# STATIONARY. Source exponent r_hat=0.491 (±0.031) is consistent with r=0.5
+# (self-organised criticality). Model is pinned to the Sobolev minimax barrier
+# β₀=0.556. Additional data will improve loss at rate D^{-0.556} — no more
+# than 44% further gain possible without compositional restructuring.
+```
+**What it computes:** fits the source exponent `r` from the empirical NTK spectrum. `r ≈ 0.5` means your model has self-organised to the critical attractor — it's stationary, permanently bounded by `β₀ = 2s/(2s+d*)`.
+### `effective_dimension(laplacian_eigs, approx_errors, model_sizes)`
+Does your data have compositional structure your model could exploit?
+```python
+from spectraldiag import effective_dimension
+result = effective_dimension(laplacian_eigs, approx_errors, model_sizes)
+print(result.verdict)
+# COMPOSITIONAL STRUCTURE DETECTED. Data intrinsic dimension d*=8.2 but
+# effective task dimension d_loc=2.1. Compositional approximation exponent
+# α=1.19 vs Sobolev baseline α=0.30 — 3.9× compression gain available.
+```
+**What it computes:** estimates `d*` from the graph-Laplacian spectrum of your data, `d_loc` from the model-side approximation exponent. If `d_loc < d*`, genuine compositional structure exists — and the phase transition theorem says emergence is real.
+### `barrier_certificate(d_star, d_loc, s, current_loss, current_N, current_D)`
+Where is your model relative to the theoretical ceiling?
+```python
+from spectraldiag import barrier_certificate
+result = barrier_certificate(
+    d_star=8.0, d_loc=2.0, s=1.25,
+    current_loss=0.42, current_N=1e8, current_D=1e11
+)
+print(result.verdict)
+# BARRIER CERTIFICATE. Theoretical ceiling β₀=0.238. With compositional
+# structure (d_loc=2.0), barrier rises to β'=0.556 — 2.3× faster data
+# scaling. Training budget D=1e11 has NOT passed the crossover D_cross≈...
+```
+---
+## One-line integration
+```python
+from spectraldiag.callbacks import make_hf_callback
+trainer = Trainer(
+    ...,
+    callbacks=[make_hf_callback(eval_data=(X_val, y_val))]
+)
+```
+Works with HuggingFace Trainer and PyTorch Lightning out of the box.
+---
+## Graph-Laplacian protocol (for real data)
+```python
+from spectraldiag.graph_lap import graph_laplacian_eigs, estimate_d_star, double_dimension_consistency
+eig_vals, eig_vecs = graph_laplacian_eigs(X_data, knn=10)
+d_star = estimate_d_star(eig_vals)
+consistency = double_dimension_consistency(d_star_data=d_star, d_loc_model=d_loc_from_model)
+print(consistency["verdict"])
+```
+---
+## Mathematical foundation
+This library implements the three-paper programme:
+- **TR** — *Boundaries of Stationary Feature Learning*: the Sobolev minimax barrier `β₀`, self-organised criticality `r=½`, approximation exponent `α=2s/d*`
+- **AB** — *Foundations of a Theory of Composable Abstractions*: defect as projection, effective dimension `d_loc`, subspace gap as the order parameter
+- **BM** — *Spectral Scaling Benchmark*: the decisive-test protocol, source exponent measurement, graph-Laplacian intrinsic dimension estimation
+The decisive invariant: `d_loc < d*` ⟺ emergence is real ⟺ the phase transition theorem applies.
+---
+## Build from source
+```bash
+pip install -e ".[all]"
+```
+Requires only a C++17 compiler. The C++ core is a standard CPython
+extension built automatically by setuptools — no CMake, no pybind11,
+no extra build dependencies.

spectraldiag-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,116 @@
+# spectraldiag
+**Mathematically rigorous ML model diagnostics.**
+Answers not "what happened" (that's W&B) but **"why it happened and what to do"** — with mathematical proof, not empirical guessing.
+```python
+pip install spectraldiag
+```
+---
+## Three core functions
+### `stationarity_verdict(ntk_eigs, target_coeffs)`
+Is your model's feature learning done, or still evolving?
+```python
+from spectraldiag import stationarity_verdict
+result = stationarity_verdict(ntk_eigs, target_coeffs)
+print(result.reason)
+# STATIONARY. Source exponent r_hat=0.491 (±0.031) is consistent with r=0.5
+# (self-organised criticality). Model is pinned to the Sobolev minimax barrier
+# β₀=0.556. Additional data will improve loss at rate D^{-0.556} — no more
+# than 44% further gain possible without compositional restructuring.
+```
+**What it computes:** fits the source exponent `r` from the empirical NTK spectrum. `r ≈ 0.5` means your model has self-organised to the critical attractor — it's stationary, permanently bounded by `β₀ = 2s/(2s+d*)`.
+### `effective_dimension(laplacian_eigs, approx_errors, model_sizes)`
+Does your data have compositional structure your model could exploit?
+```python
+from spectraldiag import effective_dimension
+result = effective_dimension(laplacian_eigs, approx_errors, model_sizes)
+print(result.verdict)
+# COMPOSITIONAL STRUCTURE DETECTED. Data intrinsic dimension d*=8.2 but
+# effective task dimension d_loc=2.1. Compositional approximation exponent
+# α=1.19 vs Sobolev baseline α=0.30 — 3.9× compression gain available.
+```
+**What it computes:** estimates `d*` from the graph-Laplacian spectrum of your data, `d_loc` from the model-side approximation exponent. If `d_loc < d*`, genuine compositional structure exists — and the phase transition theorem says emergence is real.
+### `barrier_certificate(d_star, d_loc, s, current_loss, current_N, current_D)`
+Where is your model relative to the theoretical ceiling?
+```python
+from spectraldiag import barrier_certificate
+result = barrier_certificate(
+    d_star=8.0, d_loc=2.0, s=1.25,
+    current_loss=0.42, current_N=1e8, current_D=1e11
+)
+print(result.verdict)
+# BARRIER CERTIFICATE. Theoretical ceiling β₀=0.238. With compositional
+# structure (d_loc=2.0), barrier rises to β'=0.556 — 2.3× faster data
+# scaling. Training budget D=1e11 has NOT passed the crossover D_cross≈...
+```
+---
+## One-line integration
+```python
+from spectraldiag.callbacks import make_hf_callback
+trainer = Trainer(
+    ...,
+    callbacks=[make_hf_callback(eval_data=(X_val, y_val))]
+)
+```
+Works with HuggingFace Trainer and PyTorch Lightning out of the box.
+---
+## Graph-Laplacian protocol (for real data)
+```python
+from spectraldiag.graph_lap import graph_laplacian_eigs, estimate_d_star, double_dimension_consistency
+eig_vals, eig_vecs = graph_laplacian_eigs(X_data, knn=10)
+d_star = estimate_d_star(eig_vals)
+consistency = double_dimension_consistency(d_star_data=d_star, d_loc_model=d_loc_from_model)
+print(consistency["verdict"])
+```
+---
+## Mathematical foundation
+This library implements the three-paper programme:
+- **TR** — *Boundaries of Stationary Feature Learning*: the Sobolev minimax barrier `β₀`, self-organised criticality `r=½`, approximation exponent `α=2s/d*`
+- **AB** — *Foundations of a Theory of Composable Abstractions*: defect as projection, effective dimension `d_loc`, subspace gap as the order parameter
+- **BM** — *Spectral Scaling Benchmark*: the decisive-test protocol, source exponent measurement, graph-Laplacian intrinsic dimension estimation
+The decisive invariant: `d_loc < d*` ⟺ emergence is real ⟺ the phase transition theorem applies.
+---
+## Build from source
+```bash
+pip install -e ".[all]"
+```
+Requires only a C++17 compiler. The C++ core is a standard CPython
+extension built automatically by setuptools — no CMake, no pybind11,
+no extra build dependencies.

spectraldiag-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,39 @@
+[build-system]
+requires = ["setuptools>=61", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "spectraldiag"
+version = "0.1.0"
+description = "Mathematically rigorous ML model diagnostics: stationarity verdict, effective dimension, barrier certificate"
+readme = "README.md"
+license = { text = "MIT" }
+requires-python = ">=3.9"
+keywords = ["machine-learning", "scaling-laws", "diagnostics", "benchmarking", "spectral"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: C++",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+dependencies = []
+[project.optional-dependencies]
+torch  = ["torch>=2.0"]
+hf     = ["transformers>=4.30", "torch>=2.0"]
+lightning = ["lightning>=2.0", "torch>=2.0"]
+data   = ["numpy>=1.24", "scipy>=1.10", "scikit-learn>=1.3"]
+all    = ["torch>=2.0", "transformers>=4.30", "lightning>=2.0",
+          "numpy>=1.24", "scipy>=1.10", "scikit-learn>=1.3"]
+[project.urls]
+Homepage   = "https://github.com/drozdisme/spectraldiag"
+Repository = "https://github.com/drozdisme/spectraldiag"
+[tool.setuptools]
+package-dir = {"" = "python"}
+[tool.setuptools.packages.find]
+where = ["python"]

spectraldiag-0.1.0/python/spectraldiag/__init__.py ADDED Viewed

@@ -0,0 +1,148 @@
+from __future__ import annotations
+from . import _core
+__version__ = "0.1.0"
+__all__ = [
+    "stationarity_verdict", "effective_dimension", "barrier_certificate",
+    "snr_gate", "alignment_signal", "warmstart_economy", "SpectralDiagnostics",
+]
+def _to_list(x):
+    if x is None:
+        return []
+    try:
+        import numpy as np
+        if isinstance(x, np.ndarray):
+            return x.flatten().tolist()
+    except ImportError:
+        pass
+    if hasattr(x, "tolist"):
+        return x.tolist()
+    return [float(v) for v in x]
+def _to_mat(x):
+    if x is None:
+        return []
+    try:
+        import numpy as np
+        if isinstance(x, np.ndarray):
+            if x.ndim == 1:
+                return [x.tolist()]
+            return x.tolist()
+    except ImportError:
+        pass
+    if isinstance(x, (list, tuple)) and len(x) > 0:
+        if isinstance(x[0], (list, tuple)):
+            return [[float(v) for v in row] for row in x]
+        return [[float(v) for v in x]]
+    return []
+class _Result:
+    """Attribute-access wrapper around a result dict from the C++ core."""
+    __slots__ = ("_d",)
+    def __init__(self, d):
+        object.__setattr__(self, "_d", d)
+    def __getattr__(self, name):
+        try:
+            return self._d[name]
+        except KeyError:
+            raise AttributeError(name)
+    def __repr__(self):
+        keys = ", ".join(f"{k}={v!r}" for k, v in self._d.items()
+                         if not isinstance(v, str) or len(v) < 40)
+        return f"<{type(self).__name__} {keys}>"
+    def to_dict(self):
+        return dict(self._d)
+class StatResult(_Result): pass
+class DimResult(_Result): pass
+class BarrierResult(_Result): pass
+class SNRResult(_Result): pass
+class AlignResult(_Result): pass
+class WarmstartResult(_Result): pass
+def stationarity_verdict(ntk_eigs, target_coeffs=None, s: float = -1.0) -> StatResult:
+    return StatResult(_core.stationarity_verdict(
+        _to_list(ntk_eigs), _to_list(target_coeffs), float(s)))
+def effective_dimension(laplacian_eigs, approx_errors=None, model_sizes=None) -> DimResult:
+    return DimResult(_core.effective_dimension(
+        _to_list(laplacian_eigs), _to_list(approx_errors), _to_list(model_sizes)))
+def barrier_certificate(d_star: float, d_loc: float, s: float,
+                        current_loss: float, current_N: float,
+                        current_D: float) -> BarrierResult:
+    return BarrierResult(_core.barrier_certificate(
+        float(d_star), float(d_loc), float(s),
+        float(current_loss), float(current_N), float(current_D)))
+def snr_gate(alpha: float, noise_var: float, signal_var: float) -> SNRResult:
+    return SNRResult(_core.snr_gate(float(alpha), float(noise_var), float(signal_var)))
+def alignment_signal(subspace_dirs) -> AlignResult:
+    return AlignResult(_core.alignment_signal(_to_mat(subspace_dirs)))
+def warmstart_economy(source_eigs, target_eigs,
+                      spectral_floor: float = 0.1) -> WarmstartResult:
+    return WarmstartResult(_core.warmstart_economy(
+        _to_list(source_eigs), _to_list(target_eigs), float(spectral_floor)))
+class SpectralDiagnostics:
+    def __init__(self, d_star: float = 2.0, s: float = 1.25, verbose: bool = True):
+        self.d_star = d_star
+        self.s = s
+        self.verbose = verbose
+        self._ntk_eigs = []
+        self._target_coeffs = []
+        self._lap_eigs = []
+    def fit_ntk(self, eigs, target_coeffs=None):
+        self._ntk_eigs = _to_list(eigs)
+        if target_coeffs is not None:
+            self._target_coeffs = _to_list(target_coeffs)
+        return self
+    def fit_data(self, lap_eigs):
+        self._lap_eigs = _to_list(lap_eigs)
+        return self
+    def verdict(self, s: float = -1.0) -> StatResult:
+        if not self._ntk_eigs:
+            raise ValueError("Call fit_ntk() first.")
+        res = stationarity_verdict(self._ntk_eigs, self._target_coeffs,
+                                   s if s > 0 else self.s)
+        if self.verbose:
+            print(res.reason)
+        return res
+    def dim(self, approx_errors=None, model_sizes=None) -> DimResult:
+        if not self._lap_eigs:
+            raise ValueError("Call fit_data() first.")
+        res = effective_dimension(self._lap_eigs, approx_errors, model_sizes)
+        if self.verbose:
+            print(res.verdict)
+        return res
+    def barrier(self, d_loc: float, current_loss: float,
+                current_N: float, current_D: float) -> BarrierResult:
+        res = barrier_certificate(self.d_star, d_loc, self.s,
+                                  current_loss, current_N, current_D)
+        if self.verbose:
+            print(res.verdict)
+        return res