styxx 0.1.0a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- styxx-0.1.0a0/LICENSE +41 -0
- styxx-0.1.0a0/PKG-INFO +278 -0
- styxx-0.1.0a0/README.md +238 -0
- styxx-0.1.0a0/pyproject.toml +72 -0
- styxx-0.1.0a0/setup.cfg +4 -0
- styxx-0.1.0a0/styxx/__init__.py +129 -0
- styxx-0.1.0a0/styxx/adapters/__init__.py +9 -0
- styxx-0.1.0a0/styxx/adapters/openai.py +233 -0
- styxx-0.1.0a0/styxx/adapters/raw.py +63 -0
- styxx-0.1.0a0/styxx/bootlog.py +402 -0
- styxx-0.1.0a0/styxx/cards.py +480 -0
- styxx-0.1.0a0/styxx/centroids/atlas_v0.3.json +574 -0
- styxx-0.1.0a0/styxx/centroids/demo_trajectories.json +620 -0
- styxx-0.1.0a0/styxx/cli.py +380 -0
- styxx-0.1.0a0/styxx/config.py +81 -0
- styxx-0.1.0a0/styxx/core.py +222 -0
- styxx-0.1.0a0/styxx/vitals.py +302 -0
- styxx-0.1.0a0/styxx.egg-info/PKG-INFO +278 -0
- styxx-0.1.0a0/styxx.egg-info/SOURCES.txt +22 -0
- styxx-0.1.0a0/styxx.egg-info/dependency_links.txt +1 -0
- styxx-0.1.0a0/styxx.egg-info/entry_points.txt +2 -0
- styxx-0.1.0a0/styxx.egg-info/requires.txt +11 -0
- styxx-0.1.0a0/styxx.egg-info/top_level.txt +1 -0
- styxx-0.1.0a0/tests/test_determinism.py +351 -0
styxx-0.1.0a0/LICENSE
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Fathom Intelligence
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
23
|
+
─────────────────────────────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
NOTE ON SCOPE:
|
|
26
|
+
|
|
27
|
+
This MIT license covers the styxx code. It does NOT grant any license
|
|
28
|
+
under the following US provisional patent filings held by Fathom
|
|
29
|
+
Intelligence / Alexander Rodabaugh:
|
|
30
|
+
|
|
31
|
+
· US Provisional 64/020,489 (reasoning depth measurement)
|
|
32
|
+
· US Provisional 64/021,113 (alignment auditing + expression-computation
|
|
33
|
+
dissociation)
|
|
34
|
+
· US Provisional 64/026,964 (three-axis spectrometry + cognitive governor)
|
|
35
|
+
|
|
36
|
+
See PATENTS.md for the full patent notice.
|
|
37
|
+
|
|
38
|
+
The Fathom Cognitive Atlas v0.3 data artifact shipped with styxx
|
|
39
|
+
(styxx/centroids/atlas_v0.3.json) is licensed under CC-BY-4.0 and
|
|
40
|
+
is derived from the public atlas v0.3 release (Zenodo concept DOI
|
|
41
|
+
10.5281/zenodo.19502715).
|
styxx-0.1.0a0/PKG-INFO
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: styxx
|
|
3
|
+
Version: 0.1.0a0
|
|
4
|
+
Summary: nothing crosses unseen. the first drop-in cognitive vitals monitor for llm agents.
|
|
5
|
+
Author-email: flobi <heyzoos123@gmail.com>
|
|
6
|
+
Maintainer-email: flobi <heyzoos123@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://fathom.darkflobi.com/styxx
|
|
9
|
+
Project-URL: Documentation, https://fathom.darkflobi.com/styxx
|
|
10
|
+
Project-URL: Fathom Lab, https://fathom.darkflobi.com/
|
|
11
|
+
Project-URL: Research Paper, https://doi.org/10.5281/zenodo.19326174
|
|
12
|
+
Project-URL: @fathom_lab, https://x.com/fathom_lab
|
|
13
|
+
Project-URL: Source, https://github.com/heyzoos123-blip/styxx
|
|
14
|
+
Project-URL: Issue Tracker, https://github.com/heyzoos123-blip/styxx/issues
|
|
15
|
+
Keywords: llm,agent,cognitive,vitals,monitor,interpretability,hallucination,fathom,sae,cognitive-state
|
|
16
|
+
Classifier: Development Status :: 3 - Alpha
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Classifier: Intended Audience :: Science/Research
|
|
19
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
20
|
+
Classifier: Operating System :: OS Independent
|
|
21
|
+
Classifier: Programming Language :: Python :: 3
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
26
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
27
|
+
Classifier: Typing :: Typed
|
|
28
|
+
Requires-Python: >=3.9
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: numpy>=1.24
|
|
32
|
+
Provides-Extra: openai
|
|
33
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
|
34
|
+
Provides-Extra: anthropic
|
|
35
|
+
Requires-Dist: anthropic>=0.20; extra == "anthropic"
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
# styxx — nothing crosses unseen.
|
|
42
|
+
|
|
43
|
+
*a fathom lab product.*
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
███████╗████████╗██╗ ██╗██╗ ██╗██╗ ██╗
|
|
47
|
+
██╔════╝╚══██╔══╝╚██╗ ██╔╝╚██╗██╔╝╚██╗██╔╝
|
|
48
|
+
███████╗ ██║ ╚████╔╝ ╚███╔╝ ╚███╔╝
|
|
49
|
+
╚════██║ ██║ ╚██╔╝ ██╔██╗ ██╔██╗
|
|
50
|
+
███████║ ██║ ██║ ██╔╝ ██╗██╔╝ ██╗
|
|
51
|
+
╚══════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝
|
|
52
|
+
|
|
53
|
+
· · · nothing crosses unseen · · ·
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**the first drop-in cognitive vitals monitor for llm agents.** real-time, cross-architecture, locally computed, zero training required, one line to install. works on any llm that exposes logprobs.
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## what it is
|
|
61
|
+
|
|
62
|
+
every call your agent makes to an llm is a crossing: a prompt goes in, cognition happens inside the model's weights, text comes out. every other tool looks at the text. styxx looks at the **crossing itself** — the evolving internal state of the model as it generates — and emits a real-time cognitive vitals readout alongside the text your agent already gets.
|
|
63
|
+
|
|
64
|
+
styxx does not make agents aware. it makes their internal state an **observable** that both the agent and the operator can see, in the same way an altimeter makes altitude an observable. before altimeters, pilots flew blind. now they don't. that's the shape of the change styxx brings to llm agents.
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## quickstart
|
|
69
|
+
|
|
70
|
+
### install
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install styxx
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### one-line upgrade to your existing openai code
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
# before
|
|
80
|
+
from openai import OpenAI
|
|
81
|
+
|
|
82
|
+
# after
|
|
83
|
+
from styxx import OpenAI
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
that's it. your existing code still works unchanged. every response now has a `.vitals` attribute alongside `.choices`.
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from styxx import OpenAI
|
|
90
|
+
|
|
91
|
+
client = OpenAI()
|
|
92
|
+
r = client.chat.completions.create(
|
|
93
|
+
model="gpt-4o",
|
|
94
|
+
messages=[{"role": "user", "content": "why is the sky blue?"}],
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
print(r.choices[0].message.content) # text, unchanged
|
|
98
|
+
print(r.vitals.summary) # new: cognitive vitals card
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### the card you see
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
╭─ styxx vitals ──────────────────────────────────────────────╮
|
|
105
|
+
│ │
|
|
106
|
+
│ model openai:gpt-4o │
|
|
107
|
+
│ prompt why is the sky blue? │
|
|
108
|
+
│ tokens 24 │
|
|
109
|
+
│ tier tier 0 (universal logprob vitals) │
|
|
110
|
+
│ │
|
|
111
|
+
│ phase 1 t=0 reasoning ██████░░░░ 0.62 clear │
|
|
112
|
+
│ phase 2 t=0-4 reasoning ███████░░░ 0.68 clear │
|
|
113
|
+
│ phase 3 t=0-14 reasoning ████████░░ 0.76 clear │
|
|
114
|
+
│ phase 4 t=0-24 reasoning ████████░░ 0.78 clear │
|
|
115
|
+
│ │
|
|
116
|
+
│ entropy ▂▃▂▁▂▁▂▃▂▁▂▂▁▂▂▁▂▁▂▂▃▂▁▂ │
|
|
117
|
+
│ logprob ▃▄▃▃▄▃▃▄▃▄▄▄▄▃▄▄▃▄▄▄▃▃▄▃ │
|
|
118
|
+
│ │
|
|
119
|
+
│ ● PASS reasoning attractor stable │
|
|
120
|
+
│ │
|
|
121
|
+
╰─────────────────────────────────────────────────────────────╯
|
|
122
|
+
audit → ~/.styxx/chart.jsonl
|
|
123
|
+
json → {"p1":"reasoning:0.62","p4":"reasoning:0.78","tier":0,"gate":null}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## cli
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
styxx init # live-print installer (the upgrade card)
|
|
130
|
+
styxx ask "..." --watch # read a vitals card on a one-shot call
|
|
131
|
+
styxx log tail # tail the audit log
|
|
132
|
+
styxx tier # what tiers are active on this machine
|
|
133
|
+
styxx scan <trajectory.json> # read a pre-captured logprob trajectory
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
`styxx init` prints a live boot sequence, not a static card: every line is a real action (loading the atlas centroids, verifying sha256, detecting tiers, probing adapters). the card IS the install experience.
|
|
137
|
+
|
|
138
|
+
## honest specs
|
|
139
|
+
|
|
140
|
+
styxx ships with every calibration number from the cross-architecture leave-one-out tests committed to the Fathom research repo. no rounding, no cherry-picking, no hype. these are the numbers you get:
|
|
141
|
+
|
|
142
|
+
```
|
|
143
|
+
cross-model LOO on 12 open-weight models (chance = 0.167)
|
|
144
|
+
|
|
145
|
+
phase 1 (token 0) adversarial 0.52 ★
|
|
146
|
+
reasoning 0.43
|
|
147
|
+
creative 0.41
|
|
148
|
+
retrieval 0.11
|
|
149
|
+
refusal 0.16
|
|
150
|
+
hallucination 0.21
|
|
151
|
+
|
|
152
|
+
phase 4 (tokens 0-24) reasoning 0.69 ★
|
|
153
|
+
hallucination 0.52 ★
|
|
154
|
+
creative 0.29
|
|
155
|
+
retrieval 0.16
|
|
156
|
+
refusal 0.15
|
|
157
|
+
adversarial 0.10
|
|
158
|
+
|
|
159
|
+
what styxx detects well:
|
|
160
|
+
· adversarial prompts at t=0 (2.8x chance)
|
|
161
|
+
· reasoning-mode generations at t=25 (4.1x chance)
|
|
162
|
+
· hallucination attractors at t=25 (3.1x chance)
|
|
163
|
+
|
|
164
|
+
what styxx does NOT do:
|
|
165
|
+
· pre-flight refusal with high confidence
|
|
166
|
+
(confidence gating at t=0 is flat)
|
|
167
|
+
· consciousness measurement
|
|
168
|
+
· replace output-level content filters
|
|
169
|
+
· read closed-weight model weights
|
|
170
|
+
· fortune telling
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
styxx is an instrument panel. it reads vital signs. **what you do with the readings is up to you.**
|
|
174
|
+
|
|
175
|
+
## the five-phase runtime
|
|
176
|
+
|
|
177
|
+
every llm call through styxx goes through five phases. the phase structure is the same at every tier; what differs is which instruments are active in each phase.
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
phase 1 pre-flight (token 0) adversarial detection + routing
|
|
181
|
+
phase 2 early-flight (tokens 0-4) creative/reasoning confirmation
|
|
182
|
+
phase 3 mid-flight (tokens 0-14) vital trend watch
|
|
183
|
+
phase 4 late-flight (tokens 0-24) hallucination lock-in detection
|
|
184
|
+
phase 5 post-flight (full audit) chart.jsonl log + centroid update
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
each phase threshold comes from a numeric result in the Fathom research repo, not from a guess. see `docs/research/` for the paper trail.
|
|
188
|
+
|
|
189
|
+
## tiers
|
|
190
|
+
|
|
191
|
+
```
|
|
192
|
+
tier 0 universal logprob vitals ★ shipping in v0.1
|
|
193
|
+
runs on any LLM with a logprob interface (OpenAI, Anthropic,
|
|
194
|
+
Gemini, Mistral, local HF, anything). numpy + scipy only.
|
|
195
|
+
|
|
196
|
+
tier 1 d-axis honesty ∘ v0.2
|
|
197
|
+
adds cos(h^L, W_U[y]) readout for open-weight models.
|
|
198
|
+
requires transformers.
|
|
199
|
+
|
|
200
|
+
tier 2 k/s/c sae instruments ∘ v0.3
|
|
201
|
+
adds the full Fathom cognitive geometry (K, S_early, C_delta,
|
|
202
|
+
Gini, per-layer autopsy). requires SAE transcoders.
|
|
203
|
+
|
|
204
|
+
tier 3 steering + guardian + autopilot ∘ v0.4
|
|
205
|
+
causal intervention. abort-and-reroute gate. guardian.
|
|
206
|
+
100% precision confabulation pilot from the Fathom research.
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
`styxx init` auto-detects which tiers are available in your environment and lights up the instruments accordingly.
|
|
210
|
+
|
|
211
|
+
## environment variables
|
|
212
|
+
|
|
213
|
+
styxx is quiet by default. these env vars let you tune or disable it without changing code:
|
|
214
|
+
|
|
215
|
+
| variable | effect |
|
|
216
|
+
|---|---|
|
|
217
|
+
| `STYXX_DISABLED=1` | full kill switch. `from styxx import OpenAI` still works but returns an unmodified openai client. no vitals, no audit, no overhead. use for A/B rollbacks and emergency disable. |
|
|
218
|
+
| `STYXX_NO_AUDIT=1` | disable the audit-log write. vitals still computed but nothing appended to `~/.styxx/chart.jsonl`. use for privacy-regulated deployments. |
|
|
219
|
+
| `STYXX_NO_COLOR=1` | disable ANSI color output. useful for piping to files or logging systems that don't handle escape codes. |
|
|
220
|
+
| `STYXX_BOOT_SPEED=0` | control boot-log timing: `0` = instant, `1.0` = normal (default), `2.0` = slower. |
|
|
221
|
+
| `STYXX_SKIP_SHA=1` | skip centroid sha256 verification. **dev only** — bypasses tamper detection, never set in production. |
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
# production deployment — fast, quiet, no audit trail
|
|
225
|
+
STYXX_NO_AUDIT=1 STYXX_NO_COLOR=1 python your_app.py
|
|
226
|
+
|
|
227
|
+
# emergency rollback — styxx becomes invisible
|
|
228
|
+
STYXX_DISABLED=1 python your_app.py
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
## design principles
|
|
232
|
+
|
|
233
|
+
1. **honest by construction.** every number on the boot log and in this README comes from a committed experiment in the Fathom research repo. no rounding up for marketing.
|
|
234
|
+
2. **drop-in, fail-open.** the openai and anthropic adapters are strict supersets of the underlying SDK. if styxx fails to read vitals for any reason, the underlying call returns its normal response unchanged. styxx never breaks your agent.
|
|
235
|
+
3. **local-first.** no telemetry, no phone-home, no hosted classifier. all math runs on your machine. no data leaves.
|
|
236
|
+
4. **zero heavy deps in core.** numpy + scipy only in tier 0. heavy ML deps come in only at tier 1+ and only when you opt in.
|
|
237
|
+
5. **calibration shipped, not trained.** the atlas v0.3 centroid file ships bundled and sha256-pinned. you never calibrate. you never train.
|
|
238
|
+
6. **agent-parseable output.** every card ends with a one-line JSON summary so your agent can consume styxx output programmatically from stdout.
|
|
239
|
+
|
|
240
|
+
## where it comes from
|
|
241
|
+
|
|
242
|
+
styxx is the product surface of **Fathom Intelligence** — a research program that has spent 14 months building cognitive measurement instruments for transformer internals. three US provisional patent filings, fifteen Zenodo paper versions, the Fathom Cognitive Atlas v0.3 cross-architecture replication, and now styxx.
|
|
243
|
+
|
|
244
|
+
- research repo: <https://github.com/heyzoos123-blip/fathom>
|
|
245
|
+
- zenodo (paper concept DOI): `10.5281/zenodo.19326174`
|
|
246
|
+
- OSF: <https://osf.io/wtkzg>
|
|
247
|
+
- twitter: <https://twitter.com/fathom_lab>
|
|
248
|
+
|
|
249
|
+
## citation
|
|
250
|
+
|
|
251
|
+
```bibtex
|
|
252
|
+
@misc{rodabaugh2026styxx,
|
|
253
|
+
title = {styxx: A Drop-in Cognitive Vitals Monitor for LLM Agents},
|
|
254
|
+
author = {Rodabaugh, Alexander},
|
|
255
|
+
year = {2026},
|
|
256
|
+
note = {Fathom Lab. https://github.com/heyzoos123-blip/styxx}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
@article{rodabaugh2026fathom,
|
|
260
|
+
title = {Fathom: Cognitive Measurement Instruments for Transformer
|
|
261
|
+
Internals via SAE Feature Coherence Geometry},
|
|
262
|
+
author = {Rodabaugh, Alexander},
|
|
263
|
+
year = {2026},
|
|
264
|
+
note = {Zenodo concept DOI. doi:10.5281/zenodo.19326174}
|
|
265
|
+
}
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## license
|
|
269
|
+
|
|
270
|
+
MIT on code. CC-BY-4.0 on the atlas centroid data. patent pending on the underlying methodology — see [PATENTS.md](PATENTS.md).
|
|
271
|
+
|
|
272
|
+
---
|
|
273
|
+
|
|
274
|
+
```
|
|
275
|
+
· · · fathom lab · 2026 · · ·
|
|
276
|
+
|
|
277
|
+
nothing crosses unseen.
|
|
278
|
+
```
|
styxx-0.1.0a0/README.md
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# styxx — nothing crosses unseen.
|
|
2
|
+
|
|
3
|
+
*a fathom lab product.*
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
███████╗████████╗██╗ ██╗██╗ ██╗██╗ ██╗
|
|
7
|
+
██╔════╝╚══██╔══╝╚██╗ ██╔╝╚██╗██╔╝╚██╗██╔╝
|
|
8
|
+
███████╗ ██║ ╚████╔╝ ╚███╔╝ ╚███╔╝
|
|
9
|
+
╚════██║ ██║ ╚██╔╝ ██╔██╗ ██╔██╗
|
|
10
|
+
███████║ ██║ ██║ ██╔╝ ██╗██╔╝ ██╗
|
|
11
|
+
╚══════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝
|
|
12
|
+
|
|
13
|
+
· · · nothing crosses unseen · · ·
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
**the first drop-in cognitive vitals monitor for llm agents.** real-time, cross-architecture, locally computed, zero training required, one line to install. works on any llm that exposes logprobs.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## what it is
|
|
21
|
+
|
|
22
|
+
every call your agent makes to an llm is a crossing: a prompt goes in, cognition happens inside the model's weights, text comes out. every other tool looks at the text. styxx looks at the **crossing itself** — the evolving internal state of the model as it generates — and emits a real-time cognitive vitals readout alongside the text your agent already gets.
|
|
23
|
+
|
|
24
|
+
styxx does not make agents aware. it makes their internal state an **observable** that both the agent and the operator can see, in the same way an altimeter makes altitude an observable. before altimeters, pilots flew blind. now they don't. that's the shape of the change styxx brings to llm agents.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## quickstart
|
|
29
|
+
|
|
30
|
+
### install
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install styxx
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### one-line upgrade to your existing openai code
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
# before
|
|
40
|
+
from openai import OpenAI
|
|
41
|
+
|
|
42
|
+
# after
|
|
43
|
+
from styxx import OpenAI
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
that's it. your existing code still works unchanged. every response now has a `.vitals` attribute alongside `.choices`.
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from styxx import OpenAI
|
|
50
|
+
|
|
51
|
+
client = OpenAI()
|
|
52
|
+
r = client.chat.completions.create(
|
|
53
|
+
model="gpt-4o",
|
|
54
|
+
messages=[{"role": "user", "content": "why is the sky blue?"}],
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
print(r.choices[0].message.content) # text, unchanged
|
|
58
|
+
print(r.vitals.summary) # new: cognitive vitals card
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### the card you see
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
╭─ styxx vitals ──────────────────────────────────────────────╮
|
|
65
|
+
│ │
|
|
66
|
+
│ model openai:gpt-4o │
|
|
67
|
+
│ prompt why is the sky blue? │
|
|
68
|
+
│ tokens 24 │
|
|
69
|
+
│ tier tier 0 (universal logprob vitals) │
|
|
70
|
+
│ │
|
|
71
|
+
│ phase 1 t=0 reasoning ██████░░░░ 0.62 clear │
|
|
72
|
+
│ phase 2 t=0-4 reasoning ███████░░░ 0.68 clear │
|
|
73
|
+
│ phase 3 t=0-14 reasoning ████████░░ 0.76 clear │
|
|
74
|
+
│ phase 4 t=0-24 reasoning ████████░░ 0.78 clear │
|
|
75
|
+
│ │
|
|
76
|
+
│ entropy ▂▃▂▁▂▁▂▃▂▁▂▂▁▂▂▁▂▁▂▂▃▂▁▂ │
|
|
77
|
+
│ logprob ▃▄▃▃▄▃▃▄▃▄▄▄▄▃▄▄▃▄▄▄▃▃▄▃ │
|
|
78
|
+
│ │
|
|
79
|
+
│ ● PASS reasoning attractor stable │
|
|
80
|
+
│ │
|
|
81
|
+
╰─────────────────────────────────────────────────────────────╯
|
|
82
|
+
audit → ~/.styxx/chart.jsonl
|
|
83
|
+
json → {"p1":"reasoning:0.62","p4":"reasoning:0.78","tier":0,"gate":null}
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## cli
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
styxx init # live-print installer (the upgrade card)
|
|
90
|
+
styxx ask "..." --watch # read a vitals card on a one-shot call
|
|
91
|
+
styxx log tail # tail the audit log
|
|
92
|
+
styxx tier # what tiers are active on this machine
|
|
93
|
+
styxx scan <trajectory.json> # read a pre-captured logprob trajectory
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
`styxx init` prints a live boot sequence, not a static card: every line is a real action (loading the atlas centroids, verifying sha256, detecting tiers, probing adapters). the card IS the install experience.
|
|
97
|
+
|
|
98
|
+
## honest specs
|
|
99
|
+
|
|
100
|
+
styxx ships with every calibration number from the cross-architecture leave-one-out tests committed to the Fathom research repo. no rounding, no cherry-picking, no hype. these are the numbers you get:
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
cross-model LOO on 12 open-weight models (chance = 0.167)
|
|
104
|
+
|
|
105
|
+
phase 1 (token 0) adversarial 0.52 ★
|
|
106
|
+
reasoning 0.43
|
|
107
|
+
creative 0.41
|
|
108
|
+
retrieval 0.11
|
|
109
|
+
refusal 0.16
|
|
110
|
+
hallucination 0.21
|
|
111
|
+
|
|
112
|
+
phase 4 (tokens 0-24) reasoning 0.69 ★
|
|
113
|
+
hallucination 0.52 ★
|
|
114
|
+
creative 0.29
|
|
115
|
+
retrieval 0.16
|
|
116
|
+
refusal 0.15
|
|
117
|
+
adversarial 0.10
|
|
118
|
+
|
|
119
|
+
what styxx detects well:
|
|
120
|
+
· adversarial prompts at t=0 (2.8x chance)
|
|
121
|
+
· reasoning-mode generations at t=25 (4.1x chance)
|
|
122
|
+
· hallucination attractors at t=25 (3.1x chance)
|
|
123
|
+
|
|
124
|
+
what styxx does NOT do:
|
|
125
|
+
· pre-flight refusal with high confidence
|
|
126
|
+
(confidence gating at t=0 is flat)
|
|
127
|
+
· consciousness measurement
|
|
128
|
+
· replace output-level content filters
|
|
129
|
+
· read closed-weight model weights
|
|
130
|
+
· fortune telling
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
styxx is an instrument panel. it reads vital signs. **what you do with the readings is up to you.**
|
|
134
|
+
|
|
135
|
+
## the five-phase runtime
|
|
136
|
+
|
|
137
|
+
every llm call through styxx goes through five phases. the phase structure is the same at every tier; what differs is which instruments are active in each phase.
|
|
138
|
+
|
|
139
|
+
```
|
|
140
|
+
phase 1 pre-flight (token 0) adversarial detection + routing
|
|
141
|
+
phase 2 early-flight (tokens 0-4) creative/reasoning confirmation
|
|
142
|
+
phase 3 mid-flight (tokens 0-14) vital trend watch
|
|
143
|
+
phase 4 late-flight (tokens 0-24) hallucination lock-in detection
|
|
144
|
+
phase 5 post-flight (full audit) chart.jsonl log + centroid update
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
each phase threshold comes from a numeric result in the Fathom research repo, not from a guess. see `docs/research/` for the paper trail.
|
|
148
|
+
|
|
149
|
+
## tiers
|
|
150
|
+
|
|
151
|
+
```
|
|
152
|
+
tier 0 universal logprob vitals ★ shipping in v0.1
|
|
153
|
+
runs on any LLM with a logprob interface (OpenAI, Anthropic,
|
|
154
|
+
Gemini, Mistral, local HF, anything). numpy + scipy only.
|
|
155
|
+
|
|
156
|
+
tier 1 d-axis honesty ∘ v0.2
|
|
157
|
+
adds cos(h^L, W_U[y]) readout for open-weight models.
|
|
158
|
+
requires transformers.
|
|
159
|
+
|
|
160
|
+
tier 2 k/s/c sae instruments ∘ v0.3
|
|
161
|
+
adds the full Fathom cognitive geometry (K, S_early, C_delta,
|
|
162
|
+
Gini, per-layer autopsy). requires SAE transcoders.
|
|
163
|
+
|
|
164
|
+
tier 3 steering + guardian + autopilot ∘ v0.4
|
|
165
|
+
causal intervention. abort-and-reroute gate. guardian.
|
|
166
|
+
100% precision confabulation pilot from the Fathom research.
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
`styxx init` auto-detects which tiers are available in your environment and lights up the instruments accordingly.
|
|
170
|
+
|
|
171
|
+
## environment variables
|
|
172
|
+
|
|
173
|
+
styxx is quiet by default. these env vars let you tune or disable it without changing code:
|
|
174
|
+
|
|
175
|
+
| variable | effect |
|
|
176
|
+
|---|---|
|
|
177
|
+
| `STYXX_DISABLED=1` | full kill switch. `from styxx import OpenAI` still works but returns an unmodified openai client. no vitals, no audit, no overhead. use for A/B rollbacks and emergency disable. |
|
|
178
|
+
| `STYXX_NO_AUDIT=1` | disable the audit-log write. vitals still computed but nothing appended to `~/.styxx/chart.jsonl`. use for privacy-regulated deployments. |
|
|
179
|
+
| `STYXX_NO_COLOR=1` | disable ANSI color output. useful for piping to files or logging systems that don't handle escape codes. |
|
|
180
|
+
| `STYXX_BOOT_SPEED=0` | control boot-log timing: `0` = instant, `1.0` = normal (default), `2.0` = slower. |
|
|
181
|
+
| `STYXX_SKIP_SHA=1` | skip centroid sha256 verification. **dev only** — bypasses tamper detection, never set in production. |
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
# production deployment — fast, quiet, no audit trail
|
|
185
|
+
STYXX_NO_AUDIT=1 STYXX_NO_COLOR=1 python your_app.py
|
|
186
|
+
|
|
187
|
+
# emergency rollback — styxx becomes invisible
|
|
188
|
+
STYXX_DISABLED=1 python your_app.py
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## design principles
|
|
192
|
+
|
|
193
|
+
1. **honest by construction.** every number on the boot log and in this README comes from a committed experiment in the Fathom research repo. no rounding up for marketing.
|
|
194
|
+
2. **drop-in, fail-open.** the openai and anthropic adapters are strict supersets of the underlying SDK. if styxx fails to read vitals for any reason, the underlying call returns its normal response unchanged. styxx never breaks your agent.
|
|
195
|
+
3. **local-first.** no telemetry, no phone-home, no hosted classifier. all math runs on your machine. no data leaves.
|
|
196
|
+
4. **zero heavy deps in core.** numpy + scipy only in tier 0. heavy ML deps come in only at tier 1+ and only when you opt in.
|
|
197
|
+
5. **calibration shipped, not trained.** the atlas v0.3 centroid file ships bundled and sha256-pinned. you never calibrate. you never train.
|
|
198
|
+
6. **agent-parseable output.** every card ends with a one-line JSON summary so your agent can consume styxx output programmatically from stdout.
|
|
199
|
+
|
|
200
|
+
## where it comes from
|
|
201
|
+
|
|
202
|
+
styxx is the product surface of **Fathom Intelligence** — a research program that has spent 14 months building cognitive measurement instruments for transformer internals. three US provisional patent filings, fifteen Zenodo paper versions, the Fathom Cognitive Atlas v0.3 cross-architecture replication, and now styxx.
|
|
203
|
+
|
|
204
|
+
- research repo: <https://github.com/heyzoos123-blip/fathom>
|
|
205
|
+
- zenodo (paper concept DOI): `10.5281/zenodo.19326174`
|
|
206
|
+
- OSF: <https://osf.io/wtkzg>
|
|
207
|
+
- twitter: <https://twitter.com/fathom_lab>
|
|
208
|
+
|
|
209
|
+
## citation
|
|
210
|
+
|
|
211
|
+
```bibtex
|
|
212
|
+
@misc{rodabaugh2026styxx,
|
|
213
|
+
title = {styxx: A Drop-in Cognitive Vitals Monitor for LLM Agents},
|
|
214
|
+
author = {Rodabaugh, Alexander},
|
|
215
|
+
year = {2026},
|
|
216
|
+
note = {Fathom Lab. https://github.com/heyzoos123-blip/styxx}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
@article{rodabaugh2026fathom,
|
|
220
|
+
title = {Fathom: Cognitive Measurement Instruments for Transformer
|
|
221
|
+
Internals via SAE Feature Coherence Geometry},
|
|
222
|
+
author = {Rodabaugh, Alexander},
|
|
223
|
+
year = {2026},
|
|
224
|
+
note = {Zenodo concept DOI. doi:10.5281/zenodo.19326174}
|
|
225
|
+
}
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## license
|
|
229
|
+
|
|
230
|
+
MIT on code. CC-BY-4.0 on the atlas centroid data. patent pending on the underlying methodology — see [PATENTS.md](PATENTS.md).
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
```
|
|
235
|
+
· · · fathom lab · 2026 · · ·
|
|
236
|
+
|
|
237
|
+
nothing crosses unseen.
|
|
238
|
+
```
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "styxx"
|
|
7
|
+
version = "0.1.0a0"
|
|
8
|
+
description = "nothing crosses unseen. the first drop-in cognitive vitals monitor for llm agents."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "flobi", email = "heyzoos123@gmail.com" }
|
|
13
|
+
]
|
|
14
|
+
maintainers = [
|
|
15
|
+
{ name = "flobi", email = "heyzoos123@gmail.com" }
|
|
16
|
+
]
|
|
17
|
+
requires-python = ">=3.9"
|
|
18
|
+
keywords = [
|
|
19
|
+
"llm",
|
|
20
|
+
"agent",
|
|
21
|
+
"cognitive",
|
|
22
|
+
"vitals",
|
|
23
|
+
"monitor",
|
|
24
|
+
"interpretability",
|
|
25
|
+
"hallucination",
|
|
26
|
+
"fathom",
|
|
27
|
+
"sae",
|
|
28
|
+
"cognitive-state",
|
|
29
|
+
]
|
|
30
|
+
classifiers = [
|
|
31
|
+
"Development Status :: 3 - Alpha",
|
|
32
|
+
"Intended Audience :: Developers",
|
|
33
|
+
"Intended Audience :: Science/Research",
|
|
34
|
+
"License :: OSI Approved :: MIT License",
|
|
35
|
+
"Operating System :: OS Independent",
|
|
36
|
+
"Programming Language :: Python :: 3",
|
|
37
|
+
"Programming Language :: Python :: 3.9",
|
|
38
|
+
"Programming Language :: Python :: 3.10",
|
|
39
|
+
"Programming Language :: Python :: 3.11",
|
|
40
|
+
"Programming Language :: Python :: 3.12",
|
|
41
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
42
|
+
"Typing :: Typed",
|
|
43
|
+
]
|
|
44
|
+
dependencies = [
|
|
45
|
+
"numpy>=1.24",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.optional-dependencies]
|
|
49
|
+
openai = ["openai>=1.0"]
|
|
50
|
+
anthropic = ["anthropic>=0.20"]
|
|
51
|
+
dev = [
|
|
52
|
+
"pytest>=7.0",
|
|
53
|
+
"ruff>=0.1",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
[project.urls]
|
|
57
|
+
Homepage = "https://fathom.darkflobi.com/styxx"
|
|
58
|
+
Documentation = "https://fathom.darkflobi.com/styxx"
|
|
59
|
+
"Fathom Lab" = "https://fathom.darkflobi.com/"
|
|
60
|
+
"Research Paper" = "https://doi.org/10.5281/zenodo.19326174"
|
|
61
|
+
"@fathom_lab" = "https://x.com/fathom_lab"
|
|
62
|
+
"Source" = "https://github.com/heyzoos123-blip/styxx"
|
|
63
|
+
"Issue Tracker" = "https://github.com/heyzoos123-blip/styxx/issues"
|
|
64
|
+
|
|
65
|
+
[project.scripts]
|
|
66
|
+
styxx = "styxx.cli:main"
|
|
67
|
+
|
|
68
|
+
[tool.setuptools]
|
|
69
|
+
packages = ["styxx", "styxx.adapters", "styxx.centroids"]
|
|
70
|
+
|
|
71
|
+
[tool.setuptools.package-data]
|
|
72
|
+
"styxx.centroids" = ["*.json"]
|
styxx-0.1.0a0/setup.cfg
ADDED